In [63]:
import pandas as pd
import networkx as nx
import numpy as np
import unicodedata
import pandas as pd
import requests
import geopandas as gpd
import osmnx as ox

pd.set_option('display.max_rows', 200)

In [6]:
df_grid = pd.read_excel('./jao_static_grid.xlsx', sheet_name='Lines', skiprows=[0])
df_grid = df_grid[[
    'NE_name', 'EIC_Code', 'TSO', 
    'Full_name', 'Full_name.1', 'Voltage_level(kV)', 
    'Resistance_R(Ω)', 'Reactance_X(Ω)', 'Susceptance_B(μS)', 'Length_(km)'
]]
df_grid = df_grid.rename(columns={
    'NE_name': 'name', 
    'EIC_Code': 'eic',
    'TSO': 'tso',
    'Voltage_level(kV)': 'voltage',
    'Resistance_R(Ω)': 'resistance',
    'Reactance_X(Ω)': 'reactance',
    'Susceptance_B(μS)': 'susceptance',
    'Length_(km)': 'length',
    'Full_name': 'substation_1',
    'Full_name.1': 'substation_2',
})

tso_zone_map = {
    '50HERTZ': 'DE', 
    'Amprion GmbH': 'DE', 
    'APG': 'AT', 
    'CEPS': 'CZ', 
    'Creos': 'DE', 
    'ELES': 'SI', 
    'ELIA': 'BE',
    'HOPS': 'HR', 
    'MAVIR': 'HU', 
    'PSE': 'PL', 
    'RTE': 'FR', 
    'SEPS': 'SK', 
    'TEL': 'RO', 
    'TENNET NL': 'NL',
    'TENNETGMBH': 'DE', 
    'TRANSNETBW': 'DE'
}

df_grid['zone'] = df_grid.apply(lambda row : tso_zone_map[row.tso], axis=1)

In [7]:
s1 = set(df_grid.substation_1.unique())
s1 = s1.union(df_grid.substation_2.unique())
substations = list(s1)

substation_zones = []
substation_tso = []
for s in substations:
    substation_zones.append(df_grid[(df_grid.substation_1 == s) | (df_grid.substation_2 == s)].iloc[0].zone)
    substation_tso.append(df_grid[(df_grid.substation_1 == s) | (df_grid.substation_2 == s)].iloc[0].tso)

df_jao_substations = pd.DataFrame(list(zip(substations.copy(), substation_zones, substation_tso)), columns =['name', 'zone', 'tso'])

substation_location_map = {}
for s in substations:
    substation_location_map[s] = None

In [8]:
df_scigrid_substations = pd.read_csv('vertices_eu_power_160718.csvdata', delimiter=',', quotechar="'")
df_scigrid_substations = df_scigrid_substations[df_scigrid_substations.typ == 'substation']
df_scigrid_substations = df_scigrid_substations.dropna(subset=['name'])
df_scigrid_substations = df_scigrid_substations[[
    'lon', 'lat', 'name', 'operator'
]]

In [9]:
scigrid_substations = df_scigrid_substations.name
scigrid_substations = list(map(str.lower, scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace('poste électrique de ', ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace('poste électrique du ', ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace('poste électrique des', ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("poste électrique d'", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace('poste de ', ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("poste d'", ''), scigrid_substations))

scigrid_substations = list(map(lambda s: s.replace("150 kv ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("220 kv-umspannwerk ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("220-kv-umspannwerk ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("220kv ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("380 kv ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("380-kv-umspannwerk ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("380kv-umspannwerk ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("400 kv ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("400kv ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("380kv ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("400/132 kv ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("400/132/50 kv ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("400/150 kv ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("400/150kv ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("400kv/150kv ", ''), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("ß", 'ss'), scigrid_substations))

scigrid_substations = list(map(lambda s: s.replace("á", 'a'), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("é", 'e'), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("í", 'i'), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("ó", 'o'), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("ö", 'o'), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("ő", 'o'), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("ú", 'u'), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("ü", 'u'), scigrid_substations))
scigrid_substations = list(map(lambda s: s.replace("ű", 'u'), scigrid_substations))

scigrid_substations = list(map(lambda s: unicodedata.normalize('NFKD', s).encode('ascii','ignore').decode("utf-8"), scigrid_substations))

In [10]:
def replace_tail_space(s):
    if s[-1] == ' ':
        s = s[0:-1]
    if s[-1] == ' ':
        s = s[0:-1]
    if s[0] == ' ':
        s = s[1:]
    return s
        
substations = list(map(str.lower, substations))
substations = list(map(lambda s: s.replace(' (le)', ''), substations))
substations = list(map(lambda s: s.replace(' (la)', ''), substations))
substations = list(map(lambda s: s.replace(' (les)', ''), substations))
substations = list(map(lambda s: s.replace(' (les)', ''), substations))

substations = list(map(lambda s: s.replace(' (l )', ''), substations))
substations = list(map(lambda s: s.replace(' (poste)', ''), substations))
substations = list(map(lambda s: s.replace(' (poste 400 kv)', ''), substations))
substations = list(map(lambda s: s.replace(' (poste reseau)', ''), substations))
substations = list(map(lambda s: s.replace(' (poste blinde)', ''), substations))
substations = list(map(lambda s: s.replace(' (abzweig)', ''), substations))
substations = list(map(lambda s: s.replace(' (poste exterieur)', ''), substations))
substations = list(map(lambda s: s.replace(' (poste evacuation)', ''), substations))

substations = list(map(lambda s: s.replace("ß", 'ss'), substations))
substations = list(map(lambda s: s.replace("á", 'a'), substations))
substations = list(map(lambda s: s.replace("é", 'e'), substations))
substations = list(map(lambda s: s.replace("í", 'i'), substations))
substations = list(map(lambda s: s.replace("ó", 'o'), substations))
substations = list(map(lambda s: s.replace("ö", 'o'), substations))
substations = list(map(lambda s: s.replace("ő", 'o'), substations))
substations = list(map(lambda s: s.replace("ú", 'u'), substations))
substations = list(map(lambda s: s.replace("ü", 'u'), substations))
substations = list(map(lambda s: s.replace("ű", 'u'), substations))

substations = list(map(lambda s: s.replace("aftakking ", ''), substations))
substations = list(map(lambda s: s.replace(" 380kv", ''), substations))
substations = list(map(lambda s: s.replace(" 220kv", ''), substations))
substations = list(map(lambda s: s.replace("y-", ''), substations))

substations = list(map(lambda s: s.replace("ue", 'u'), substations))
substations = list(map(lambda s: s.replace("oe", 'o'), substations))
substations = list(map(lambda s: s.replace("ae", 'a'), substations))
substations = list(map(lambda s: s.replace("/nord", ''), substations))
substations = list(map(lambda s: s.replace("/sud", ''), substations))
substations = list(map(lambda s: s.replace("/west", ''), substations))
substations = list(map(lambda s: s.replace("/n", ''), substations))
substations = list(map(lambda s: s.replace("/w", ''), substations))
substations = list(map(lambda s: s.replace("/sw", ''), substations))
substations = list(map(lambda s: s.replace("/s", ''), substations))
substations = list(map(lambda s: s.replace("/o", ''), substations))
substations = list(map(lambda s: s.replace("  2", ''), substations))
substations = list(map(lambda s: s.replace(" 1", ''), substations))
substations = list(map(lambda s: s.replace(" 2", ''), substations))
substations = list(map(lambda s: s.replace("st ", ''), substations))

substations = list(map(lambda s: s.replace("emden/borssum", 'borssum'), substations))
substations = list(map(lambda s: s.replace("hamburgst", 'hamburg'), substations))
substations = list(map(lambda s: s.replace("karlsruhewest", 'karlsruhe'), substations))
substations = list(map(lambda s: s.replace("bruchsal/kandelweg", 'bruchsal'), substations))
substations = list(map(lambda s: s.replace("perleberg - putlitz", 'perleberg'), substations))
substations = list(map(lambda s: s.replace("remptendorf - grossschwabhausen", 'remptendorf'), substations))
substations = list(map(lambda s: s.replace("(neu)", ''), substations))

substations = list(map(lambda s: replace_tail_space(s), substations))

substations = list(map(lambda s: unicodedata.normalize('NFKD', s).encode('ascii','ignore').decode("utf-8"), substations))

In [11]:
s2 = set(scigrid_substations)
s2 = s2.intersection(substations)
print(len(s2))

229


In [12]:
set_scigrid = set(scigrid_substations)
set_jao = set(substations)
intersect = s2

scigrid_sub = set_scigrid.difference(intersect)
jao_sub = set_jao.difference(intersect)

matching_scigrid = []
matching_jao = []
for jao_s in jao_sub:
    match = [s for s in scigrid_sub if jao_s in s]
    if len(match) > 0:
        matching_scigrid.append(match[0])
        matching_jao.append(jao_s)
        
scigrid_sub = set(scigrid_sub).difference(matching_scigrid)
jao_sub = set(jao_sub).difference(matching_jao)

print(len(matching_scigrid))

395


In [13]:
def exact_match_scigrid(row, column):
    match = df_scigrid_substations[df_scigrid_substations['name_norm'] == row['name_norm']]
    if len(match) > 0:
        return match.iloc[0][column]
    return None

def loose_match_scigrid(row, column):
    if np.isnan(row[column]):
        if row['name_norm'] in matching_jao:
            i = matching_jao.index(row['name_norm'])
            match = df_scigrid_substations[df_scigrid_substations['name_norm'] == matching_scigrid[i]]
            return match.iloc[0][column]
        else:
            return None
    return row[column]
    
    
df_scigrid_substations.loc[:, 'name_norm'] = scigrid_substations
df_jao_substations.loc[:, 'name_norm'] = substations

df_jao_substations['lat'] = df_jao_substations.apply(lambda row: exact_match_scigrid(row, 'lat'), axis=1)
df_jao_substations['lon'] = df_jao_substations.apply(lambda row: exact_match_scigrid(row, 'lon'), axis=1)

df_jao_substations['lat'] = df_jao_substations.apply(lambda row: loose_match_scigrid(row, 'lat'), axis=1)
df_jao_substations['lon'] = df_jao_substations.apply(lambda row: loose_match_scigrid(row, 'lon'), axis=1)

In [14]:
# France
# https://odre.opendatasoft.com/explore/dataset/enceintes-de-poste-rte/export/?disjunctive.etat
df_missing_jao = df_jao_substations[df_jao_substations.lat.isnull()]

def exact_match_rte(row, column):
    if np.isnan(row[column]):
        match = df_rte[df_rte['NOM POSTE'] == row['name']]
        if len(match) > 0:
            return match.iloc[0][column]
        return None
    return row[column]

df_jao_france = df_missing_jao[df_missing_jao.zone == 'FR']
df_rte = pd.read_excel('enceintes-de-poste-rte.xlsx')
df_rte = df_rte.dropna(subset=['geo_point_2d'])

# exact match
s_fr_intersect = set(df_jao_france.name)
s_fr_intersect = s_fr_intersect.intersection(df_rte['NOM POSTE'])

df_rte['lat'] = df_rte.apply(lambda row: float(str(row['geo_point_2d']).split(', ')[0]), axis=1)
df_rte['lon'] = df_rte.apply(lambda row: float(str(row['geo_point_2d']).split(',')[1]), axis=1)

df_jao_substations['lat'] = df_jao_substations.apply(lambda row: exact_match_rte(row, 'lat'), axis=1)
df_jao_substations['lon'] = df_jao_substations.apply(lambda row: exact_match_rte(row, 'lon'), axis=1)

In [15]:
df_missing_jao = df_jao_substations[df_jao_substations.lat.isnull()]
display(df_missing_jao.groupby(['zone'])['zone'].count())

zone
AT    27
BE    67
CZ     3
DE    87
FR    13
HR     9
HU    13
NL    34
PL    35
RO    99
SI     5
SK    25
Name: zone, dtype: int64

In [16]:
substations_to_skip = ['gkmb', 'offshore windpark', 'romsee sncb', 'roman nord', 'gkn2', 'roo', 'rdk7', 'rdk8', ]
df_missing_jao_osm = df_missing_jao[~df_missing_jao.name_norm.isin(substations_to_skip)]
df_missing_jao_osm.loc[:, 'osm_find'] = ''

display(df_missing_jao_osm)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_missing_jao_osm.loc[:, 'osm_find'] = ''


Unnamed: 0,name,zone,tso,name_norm,lat,lon,osm_find
2,OLSZTYN-MATK,PL,PSE,olsztyn-matk,,,
3,Eindhoven,NL,TENNET NL,eindhoven,,,
4,Slatina,RO,TEL,slatina,,,
7,Cierny Vah,SK,SEPS,cierny vah,,,
13,JUPILLE,BE,ELIA,jupille,,,
...,...,...,...,...,...,...,...
1480,BERTRANGE,DE,Creos,bertrange,,,
1483,Schwoerstadt,DE,TRANSNETBW,schworstadt,,,
1490,Fantanele,RO,TEL,fantanele,,,
1491,DUVIGNEAU,FR,RTE,duvigneau,,,


In [23]:
for index, row in df_missing_jao_osm.iterrows():
    try:
        osm_find = ox.geocode_to_gdf(row['name_norm'] + ', ' + row['zone'])
        if len(osm_find) > 0:
            df_missing_jao_osm.loc[index, 'osm_find'] = osm_find.loc[0, 'display_name']
            df_missing_jao_osm.loc[index, 'lat'] = osm_find.loc[0, 'lat']
            df_missing_jao_osm.loc[index, 'lon'] = osm_find.loc[0, 'lon']
    except ValueError as ve:
        pass
    
display(df_missing_jao_osm)

Unnamed: 0,name,zone,tso,name_norm,lat,lon,osm_find
2,OLSZTYN-MATK,PL,PSE,olsztyn-matk,,,
3,Eindhoven,NL,TENNET NL,eindhoven,51.439265,5.478633,"Eindhoven, North Brabant, Netherlands"
4,Slatina,RO,TEL,slatina,44.429997,24.363184,"Slatina, Olt, Romania"
7,Cierny Vah,SK,SEPS,cierny vah,,,
13,JUPILLE,BE,ELIA,jupille,,,
20,ALEGRO CONVERTOR STATION,BE,ELIA,alegro convertor station,,,
27,Salzburg Elixhausen,AT,APG,salzburg elixhausen,47.861606,13.06207,"Evangelische Pfarrgemeinde A und HB Salzburg, ..."
28,Lacu Sarat,RO,TEL,lacu sarat,45.221858,28.180003,"Lacu Sărat, Măcin, Tulcea, Romania"
32,MAERLANT,BE,ELIA,marlant,,,
35,Eemshaven het Hogeland,NL,TENNET NL,eemshaven het hogeland,53.448474,6.849963,"Eemshaven, Het Hogeland, Groningen, Netherlands"


In [24]:
#backup
df_missing_jao_osm.to_excel("osm_backup_1.xlsx")

In [31]:
# with google maps
locations_to_double_check = [248, 347, 383, 390, 563]

df_missing_jao_osm.loc[248, 'lat'] = 51.3868123
df_missing_jao_osm.loc[248, 'lon'] = 6.6699285

df_missing_jao_osm.loc[347, 'lat'] = 52.2147341
df_missing_jao_osm.loc[347, 'lon'] = 19.1666693

df_missing_jao_osm.loc[383, 'lat'] = 50.1408201
df_missing_jao_osm.loc[383, 'lon'] = 14.779388

df_missing_jao_osm.loc[390, 'lat'] = 49.7795834
df_missing_jao_osm.loc[390, 'lon'] = 19.2098177

df_missing_jao_osm.loc[563, 'lat'] = 48.2456958
df_missing_jao_osm.loc[563, 'lon'] = 19.7546662

for index, row in df_missing_jao_osm.iterrows():
        df_jao_substations.loc[index, 'lat'] = row['lat']
        df_jao_substations.loc[index, 'lon'] = row['lon']

In [37]:
# fill in skipped substations
# google maps
# https://www.4coffshore.com/windfarms/belgium/seamade-(mermaid)-belgium-be07.html
# https://www.transnetbw.de/en/energy-market/congestion-management/congestion

df_jao_substations.loc[189, 'lat'] = 49.0150679
df_jao_substations.loc[189, 'lon'] = 8.3122858

df_jao_substations.loc[281, 'lat'] = 49.7696621
df_jao_substations.loc[281, 'lon'] = 6.1017144

df_jao_substations.loc[296, 'lat'] = 46.96116
df_jao_substations.loc[296, 'lon'] = 26.92647

df_jao_substations.loc[341, 'lat'] = 49.0157183
df_jao_substations.loc[341, 'lon'] = 8.1224769

df_jao_substations.loc[690, 'lat'] = 49.0157183
df_jao_substations.loc[690, 'lon'] = 8.1224769

df_jao_substations.loc[953, 'lat'] = 49.4445287
df_jao_substations.loc[953, 'lon'] = 8.4973346

df_jao_substations.loc[992, 'lat'] = 50.6106292
df_jao_substations.loc[992, 'lon'] = 5.6286262

df_jao_substations.loc[1346, 'lat'] = 51.680497
df_jao_substations.loc[1346, 'lon'] = 2.885694

In [41]:
df_missing_manual = df_jao_substations[df_jao_substations.lat.isnull()]

display(df_missing_manual)

Unnamed: 0,name,zone,tso,name_norm,lat,lon
2,OLSZTYN-MATK,PL,PSE,olsztyn-matk,,
7,Cierny Vah,SK,SEPS,cierny vah,,
13,JUPILLE,BE,ELIA,jupille,,
20,ALEGRO CONVERTOR STATION,BE,ELIA,alegro convertor station,,
32,MAERLANT,BE,ELIA,marlant,,
...,...,...,...,...,...,...
1444,V. Kapusany,SK,SEPS,v. kapusany,,
1469,PIQUAGE A PLUVIGNER,FR,RTE,piquage a pluvigner,,
1470,Craiova Nord,RO,TEL,craiova nord,,
1480,BERTRANGE,DE,Creos,bertrange,,


In [None]:
ox.settings.timeout = 3

for index, row in df_missing_manual.iterrows():
    try:
        tags = {
            'power_source': True,
            'substation': True,
            'electricity': True,
        }
        osm_find = ox.geometries_from_address(row['name'] + ', ' + row['zone'], tags)
        #osm_find = ox.overpass_request(query_data, pause=3, error_pause=3)
        if len(osm_find) > 0:
            display(osm_find.iloc[0])
            df_missing_manual.loc[index, 'osm_find'] = osm_find.iloc[0, 'name']
            df_missing_manual.loc[index, 'osmid'] = osm_find.iloc[0, 'osmid']
        else:
            print('Not found: ', row['name'], ', ', index)
    except ValueError as ve:
        print('Value error, ', row['name'], ', ', index)
    except requests.exceptions.RequestException as rt:
        print('Timeout, ', row['name'], ', ', index)

In [None]:
display(df_missing_manual)