This notebook is for snapping of substations to lines

In [1]:
import os
import matplotlib.pyplot as plt 
import geopandas as gpd
#import geoplot
import pandas as pd
import numpy as np
from shapely.geometry import LineString
from shapely.geometry import Point

# Africa shape data

In [2]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
africa = world.query('continent == "Africa"')
nigeriaplot = world.query('name == "Nigeria"')
#world

# Load data

In [3]:
# pathg = os.path.realpath("data")+'/africa_all_generators.geojson' # Generators are not required in base_network
paths = os.path.realpath("data/clean")+'/africa_all_buses_clean.geojson'
pathl = os.path.realpath("data/clean")+'/africa_all_lines_clean.geojson'

# generators = gpd.read_file(pathg).set_crs(epsg=4326, inplace=True)
substations = gpd.read_file(paths).set_crs(epsg=4326, inplace=True)
lines = gpd.read_file(pathl).set_crs(epsg=4326, inplace=True)

In [4]:
# Filter only Nigeria
lines_ng = lines[lines.loc[:,"country"] == "nigeria"].copy()
substations_ng = substations[substations.loc[:,"country"] == "nigeria"].copy()

In [5]:
# Assign to every line a start and end point

lines_ng["bounds"] = lines_ng["geometry"].boundary # create start and end point
# splits into coordinates
lines_ng["bus0_lon"] = lines_ng["bounds"].bounds.iloc[:,0]
lines_ng["bus0_lat"] = lines_ng["bounds"].bounds.iloc[:,1]
lines_ng["bus1_lon"] = lines_ng["bounds"].bounds.iloc[:,2]
lines_ng["bus1_lat"] = lines_ng["bounds"].bounds.iloc[:,3]

lines_ng['bus_0_coors']=lines_ng["bounds"].apply(lambda mp: mp[0])
lines_ng['bus_1_coors']=lines_ng["bounds"].apply(lambda mp: mp[1])

In [6]:
#lines_ng[lines_ng["length"] > 10000].sort_values("length").describe()
#substations_ng

# Case 1: Create bus dataframe from line.csv (not using existing substation dataframe)

### Create bus_id for each line ending

In [7]:
bus_s = gpd.GeoDataFrame(columns = substations_ng.columns)
bus_e = gpd.GeoDataFrame(columns = substations_ng.columns)

In [8]:
# Read information from line.csv
bus_s[["voltage", "lon", "lat", "geometry", "country"]] = lines_ng[["voltage", "bus0_lon", "bus0_lat", "bus_0_coors", "country"]] # line start points
bus_e[["voltage", "lon", "lat", "geometry", "country"]] = lines_ng[["voltage", "bus1_lon", "bus1_lat", "bus_1_coors", "country"]] # line end points
bus_all = bus_s.append(bus_e).reset_index(drop=True)

In [9]:
# Assign index to bus_id 
bus_all.loc[:,"bus_id"] = bus_all.index
bus_all

Unnamed: 0,bus_id,station_id,voltage,dc,symbol,under_construction,tag_substation,tag_area,lon,lat,country,geometry
0,0,,330000,,,,,,4.578499,7.801703,nigeria,POINT (4.57850 7.80170)
1,1,,330000,,,,,,8.568362,7.626947,nigeria,POINT (8.56931 7.62724)
2,2,,330000,,,,,,6.831211,9.972919,nigeria,POINT (6.83446 9.97342)
3,3,,132000,,,,,,7.863588,8.805520,nigeria,POINT (7.86359 8.80552)
4,4,,330000,,,,,,5.591301,6.409519,nigeria,POINT (5.59130 6.48736)
...,...,...,...,...,...,...,...,...,...,...,...,...
895,895,,132000,,,,,,3.390599,7.112278,nigeria,POINT (3.39060 7.10501)
896,896,,330000,,,,,,8.338921,5.071783,nigeria,POINT (8.33888 5.07178)
897,897,,132000,,,,,,6.659265,6.209402,nigeria,POINT (6.65927 6.18680)
898,898,,132000,,,,,,7.497822,5.803749,nigeria,POINT (7.49782 5.73658)


### V1. Create station_id at same bus location

- We saw that buses are not connected exactly at one point, they are usually connected to a substation "area" (analysed on maps)
- Create station_id at exactly the same location might therefore be not always correct
- Though as you can see below, it might be still sometime the case. Examples are **station 4** (2 lines with the same voltage connect at the same point) and **station 23** (4 lines with two different voltages connect at the same point)

#TODO: Filter out the generator lines - defined as going from generator to the next station which is connected to a load. Excluding generator lines make proably sense because they are not transmission expansion relevant. For now we simplify and include generator lines.


In [10]:
# If same location/geometry make station
bus_all_v1 = bus_all
bus_all_v1["station_id"] = bus_all_v1.groupby(["lon","lat"]).ngroup()

## INFO, case nigeria
# bus_id 0:899
# station_id 0:791
# Means, only a few bus_id's at same location
bus_all_v1

Unnamed: 0,bus_id,station_id,voltage,dc,symbol,under_construction,tag_substation,tag_area,lon,lat,country,geometry
0,0,141,330000,,,,,,4.578499,7.801703,nigeria,POINT (4.57850 7.80170)
1,1,694,330000,,,,,,8.568362,7.626947,nigeria,POINT (8.56931 7.62724)
2,2,428,330000,,,,,,6.831211,9.972919,nigeria,POINT (6.83446 9.97342)
3,3,639,132000,,,,,,7.863588,8.805520,nigeria,POINT (7.86359 8.80552)
4,4,234,330000,,,,,,5.591301,6.409519,nigeria,POINT (5.59130 6.48736)
...,...,...,...,...,...,...,...,...,...,...,...,...
895,895,50,132000,,,,,,3.390599,7.112278,nigeria,POINT (3.39060 7.10501)
896,896,661,330000,,,,,,8.338921,5.071783,nigeria,POINT (8.33888 5.07178)
897,897,370,132000,,,,,,6.659265,6.209402,nigeria,POINT (6.65927 6.18680)
898,898,551,132000,,,,,,7.497822,5.803749,nigeria,POINT (7.49782 5.73658)


In [11]:
# Add station_id to line dataframe
n_row = int(bus_all.shape[0]/2)  # row length
lines_ng_v1 = lines_ng.reset_index(drop=True)
lines_ng_v1['bus0'] = bus_all.loc[0:(n_row-1), ["bus_id"]]
lines_ng_v1['bus1'] = bus_all.loc[(n_row):, ["bus_id"]].reset_index(drop=True)

In [12]:
# display(lines_ng_v1)
# display(bus_all_v1)

In [13]:
# Removing the NaN 
bus_all_v1["dc"] = "False"
bus_all_v1["symbol"] = "False"
bus_all_v1["under_construction"] = "False"
bus_all_v1["tag_substation"] = "False"
bus_all_v1["tag_area"] = "False"


In [14]:

## Generate Files for base_network (CSV) 

### lines
outputfile_partial = os.path.join(os.getcwd(), "data", "base_network", "africa_all" + "_lines" + "_build_network") # Output file directory

if not os.path.exists(outputfile_partial):
    os.makedirs(os.path.dirname(outputfile_partial), exist_ok=True) #  create clean directoryif not already exist

lines_ng_v1.to_csv(outputfile_partial + ".csv")  # Generate CSV


#### buses
outputfile_partial = os.path.join(os.getcwd(), "data", "base_network", "africa_all" + "_buses" + "_build_network") # Output file directory

if not os.path.exists(outputfile_partial):
    os.makedirs(os.path.dirname(outputfile_partial), exist_ok=True) #  create clean directoryif not already exist
    
bus_all_v1.to_csv(outputfile_partial + ".csv")  # Generate CSV

# Inference (Quick and Dirty) - No Guarantee

In [15]:
cdf = bus_all_v1.to_crs("EPSG:3857")

In [16]:
from itertools import combinations
series = cdf.geometry
max_distance = 100

points_within = []
for i1,i2 in combinations(range(0,series.shape[0]-1),2):  #Iterate over all pairs/combinations of indices
    if 0<series[i1].distance(series[i2])<=max_distance:
        points_within.append([i1,i2])

In [17]:
# points_within
for points in points_within:
    fi = points[0]
    si = points[1]
    bus_all_v1.at[si, 'station_id'] = bus_all_v1.iloc[fi]['station_id']


    

In [18]:
bus_all_v1

Unnamed: 0,bus_id,station_id,voltage,dc,symbol,under_construction,tag_substation,tag_area,lon,lat,country,geometry
0,0,141,330000,False,False,False,False,False,4.578499,7.801703,nigeria,POINT (4.57850 7.80170)
1,1,694,330000,False,False,False,False,False,8.568362,7.626947,nigeria,POINT (8.56931 7.62724)
2,2,428,330000,False,False,False,False,False,6.831211,9.972919,nigeria,POINT (6.83446 9.97342)
3,3,639,132000,False,False,False,False,False,7.863588,8.805520,nigeria,POINT (7.86359 8.80552)
4,4,234,330000,False,False,False,False,False,5.591301,6.409519,nigeria,POINT (5.59130 6.48736)
...,...,...,...,...,...,...,...,...,...,...,...,...
895,895,48,132000,False,False,False,False,False,3.390599,7.112278,nigeria,POINT (3.39060 7.10501)
896,896,654,330000,False,False,False,False,False,8.338921,5.071783,nigeria,POINT (8.33888 5.07178)
897,897,370,132000,False,False,False,False,False,6.659265,6.209402,nigeria,POINT (6.65927 6.18680)
898,898,551,132000,False,False,False,False,False,7.497822,5.803749,nigeria,POINT (7.49782 5.73658)


In [19]:
bus_all_v1.groupby(['station_id']).count()

Unnamed: 0_level_0,bus_id,voltage,dc,symbol,under_construction,tag_substation,tag_area,lon,lat,country,geometry
station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,1,1,1,1,1,1,1,1,1,1,1
1,3,3,3,3,3,3,3,3,3,3,3
2,1,1,1,1,1,1,1,1,1,1,1
4,2,2,2,2,2,2,2,2,2,2,2
5,2,2,2,2,2,2,2,2,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...
786,2,2,2,2,2,2,2,2,2,2,2
787,3,3,3,3,3,3,3,3,3,3,3
788,2,2,2,2,2,2,2,2,2,2,2
789,1,1,1,1,1,1,1,1,1,1,1


# Inference (Probably a better method) - For the Archives

This method will be implemented once cleaning is completed. See method in Powermap for how it is supposed to be done in the correct way

In [20]:
x = cdf.buffer(100)
neighbours = cdf.intersection(x)
display(neighbours)
# print all the nearby points
# display(x)

0       POINT (509676.133 871177.890)
1       POINT (953930.792 851579.415)
2      POINT (760808.496 1115885.964)
3       POINT (875370.579 984107.633)
4       POINT (622420.747 723718.024)
                    ...              
895     POINT (377439.776 792961.319)
896     POINT (928279.920 565327.081)
897     POINT (741306.011 690053.790)
898     POINT (834653.683 639662.525)
899     POINT (509513.706 871175.508)
Length: 900, dtype: geometry