In [23]:
import geopandas as gpd
import overpass
import os
import pandas as pd

In [24]:
overpassAPI = overpass.API(timeout=900000)

In [25]:
# Function is used for evaluation of existing number of heat power values only
def replace_NotPowerValues_with_nan(gdf):
    for column in gdf.columns:
        # Check if the column contains numeric data
        if gdf[column].dtype != object:
            continue
        # Replace values that don't contain 'MW' or 'kW' with NaN
        gdf[column] = gdf[column].apply(lambda x: x if ('MW' in str(x) or 'kW' in str(x)) else pd.NA)
    return gdf

In [26]:
def merge_gdfs_along_row_and_column(gdf1, gdf2):
    empty_gdf = gpd.GeoDataFrame()
    # columns_merged = pd.merge(gdf1_filtered, gdf2_filtered, how='outer', on='geometry', suffixes=("", "#")) # use to get information about how many heat power values exist in the dataframe
    columns_merged = pd.merge(gdf1, gdf2, how='outer', on='geometry', suffixes=("", "#"))
    rows_merged = merge_columns_within_geodataframe(columns_merged, empty_gdf)
    return rows_merged

In [27]:
def merge_columns_within_geodataframe(gdf, gdf_to_create):
    if not gdf.empty:
        gdf_copy = gdf.copy()
        gdf_copy_index_last_element = gdf_copy.shape[1] - 1
        gdf_copy_last_column_name = list(gdf_copy.iloc[:, [gdf_copy_index_last_element]].columns)[0]

        for col1 in gdf_copy:
            column_to_create = ''
            current_column_name = gdf_copy[col1].name
            if current_column_name[-1] == '#':
                for character in current_column_name:
                    if not character == '#':
                        column_to_create = column_to_create + character
                    else:
                        break
                for col2 in gdf_copy:  # search for column with equivalent column name (beside suffix)
                    current_column_name_2 = gdf_copy[col2].name
                    if current_column_name_2 == column_to_create:
                        new_series = gdf_copy[col1].combine_first(gdf_copy[col2]).rename(col2, inplace=True)
                        gdf_to_create.loc[:, column_to_create] = new_series
                        gdf.drop(gdf_copy[col1].name, axis=1, inplace=True)
                        gdf.drop(gdf_copy[col2].name, axis=1, inplace=True)
                        return merge_columns_within_geodataframe(gdf, gdf_to_create)
                    elif current_column_name_2 == gdf_copy_last_column_name:
                        gdf_to_create.loc[:, column_to_create] = gdf_copy[current_column_name_2]
                        gdf.drop(gdf_copy[current_column_name_2].name, axis=1, inplace=True)
                        return merge_columns_within_geodataframe(gdf, gdf_to_create)
            elif current_column_name == gdf_copy_last_column_name:
                gdf_to_create.loc[:, current_column_name] = gdf_copy[current_column_name]
                gdf.drop(gdf_copy[current_column_name].name, axis=1, inplace=True)
                return merge_columns_within_geodataframe(gdf, gdf_to_create)

    return gdf_to_create

In [28]:
def filter_result(gdf, interesting_columns):
    # Create a copy of the GeoDataFrame
    gdf_filtered = gdf.copy()

    # Get the columns to be removed
    columns_to_remove = []
    for col in gdf.columns:
        if col not in interesting_columns:
            columns_to_remove.append(col)

    # Drop the columns from the copy of GeoDataFrame
    gdf_filtered.drop(columns=columns_to_remove, axis=1, inplace=True)

    return gdf_filtered

In [29]:
def transform_non_points_to_points(gdf):
    for index, row in gdf.iterrows():
        if row['geometry'].geom_type != 'Point':
            gdf.at[index, 'geometry'] = row['geometry'].representative_point()
    return gdf

In [30]:
def delete_rows_with_empty_values(gdf):
    mask = gdf.geometry.notnull() & gdf.drop("geometry", axis=1).isnull().all(
        axis=1)  # Create a mask to identify rows with only a non-null geometry but empty values in other columns
    gdf = gdf.loc[~mask]  # Delete rows that match the mask
    gdf = gdf.reset_index(drop=True)

    return gdf

In [31]:
def osm_query_for_tag_within_bavaria(tag):
    overpass_response = overpassAPI.get(
        f'area[name="Bayern"][admin_level=4];{tag}(area);(._; >;);', verbosity='geom')
    if overpass_response['features']:
        gdf_result_1 = gpd.GeoDataFrame.from_features(overpass_response, crs ='4326')
        gdf_result_2 = delete_rows_with_empty_values(gdf_result_1)
    else:
        return gpd.GeoDataFrame(geometry=[])

    return transform_non_points_to_points(gdf_result_2)

In [32]:
 # column filter
filter_columns = ['generator:output:steam',
                       'generator:output:hot_water',
                       'generator:output:heat',
                       'generator:output:hot_air',
                       'plant:output:hot_water',
                       'plant:output:heat',
                       'plant:output:steam',
                       'plant:output:hot_air',
                       'geometry']


tag_sources = [
    'nw["power"="plant"]',
    'nw["generator:source"="solar"]',
    'nwr["generator:source"="oil"]',
    'nwr["generator:source"="gasoline"]',
    'nwr["generator:source"="biomass"]',
    'nwr["generator:source"="biofuel"]',
    'nwr["generator:source"="geothermal"]',
    'nwr["generator:source"="waste"]',
    'nwr["generator:source"="thermal"]',
    'nwr["generator:method"="thermal"]',
    'nwr["generator:type"="solar_thermal_collector"]',
    'nwr["generator:type"="heat_pump"]',
    'nw["plant:source"="solar"]',
    'nwr["plant:source"="oil"]',
    'nwr["plant:source"="gasoline"]',
    'nwr["plant:source"="biofuel"]',
    'nwr["plant:source"="geothermal"]',
    'nwr["plant:source"="waste"]',
    'nwr["plant:source"="thermal"]',
    'nwr["plant:method"="thermal"]',
    'nw["plant:source"="biomass"]',
    'nwr["plant:source:biomass"]',
    'nwr["name"~"Geothermie"]',
    'nwr["industrial"="heating_station"]'
]

In [33]:
df_result = gpd.GeoDataFrame()
df_result = df_result.set_geometry([])
for tag in tag_sources:
    df_osm = filter_result(osm_query_for_tag_within_bavaria(tag), filter_columns)
    df_result = merge_gdfs_along_row_and_column(df_result, df_osm)

  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value
  self.obj[key] = value


In [34]:
df_result = delete_rows_with_empty_values(df_result)

In [35]:
# Add a new column with the name of a source
df_result['Source'] = 'OSM'

In [36]:
df_result.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 192 entries, 0 to 191
Data columns (total 10 columns):
 #   Column                      Non-Null Count  Dtype   
---  ------                      --------------  -----   
 0   generator:output:hot_water  59 non-null     object  
 1   plant:output:hot_water      82 non-null     object  
 2   generator:output:heat       31 non-null     object  
 3   generator:output:steam      6 non-null      object  
 4   geometry                    192 non-null    geometry
 5   generator:output:hot_air    2 non-null      object  
 6   plant:output:hot_air        2 non-null      object  
 7   plant:output:steam          4 non-null      object  
 8   plant:output:heat           11 non-null     object  
 9   Source                      192 non-null    object  
dtypes: geometry(1), object(9)
memory usage: 15.1+ KB


In [38]:
df_result

Unnamed: 0,generator:output:hot_water,plant:output:hot_water,generator:output:heat,generator:output:steam,geometry,generator:output:hot_air,plant:output:hot_air,plant:output:steam,plant:output:heat,Source
0,,yes,,,POINT (13.11807 48.67130),,,,,OSM
1,yes,,,,POINT (11.28332 47.94538),,,,,OSM
2,,yes,,,POINT (12.64437 47.87334),,,,,OSM
3,,,yes,,POINT (10.54386 49.66300),,,,,OSM
4,,,,,POINT (10.03098 48.42478),,,yes,,OSM
...,...,...,...,...,...,...,...,...,...,...
187,,1450 kW,,,POINT (11.88405 48.03504),,,,,OSM
188,,,,,POINT (11.98851 47.88139),,10 MW,,,OSM
189,,462 MW,,,POINT (11.06025 49.43830),,,,,OSM
190,yes,,,,POINT (13.24986 48.65868),,,,,OSM


In [37]:
save_path = 'data/sources'
df_result.to_file(os.path.join(save_path, "results_osm_search.shp"), encoding='utf-8')

  df_result.to_file(os.path.join(save_path, "results_osm_search.shp"), encoding='utf-8')
