# **Import library**

In [7]:
import geopandas as gpd
import pandas as pd
import osmnx as ox
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
import openpyxl

%matplotlib inline
# plt.style.use("seaborn")
plt.rcParams["figure.figsize"] = (16, 8)

In [8]:
import warnings
warnings.simplefilter("ignore")

# ***Data Preparation***

## **CSV Handling**

In [9]:
df = pd.read_excel("../MEC_dataset/training_WeeklyAggregate.xlsx")
df.head()

Unnamed: 0,sourceid,dstid,dow,mean_travel_time
0,10,241,3,2334.43
1,10,612,5,1529.83
2,10,905,4,1390.04
3,10,407,7,157.91
4,10,603,4,1781.67


In [10]:
pop = pd.read_excel("../MEC_dataset/population_density.xlsx")
pop.head()

Unnamed: 0,la_name,population_density
0,Hillingdon,2671
1,Newham,9808
2,Lambeth,11998
3,Richmond upon Thames,3451
4,Southwark,11091


In [11]:
df.columns

Index(['sourceid', 'dstid', 'dow', 'mean_travel_time'], dtype='object')

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9830 entries, 0 to 9829
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   sourceid          9830 non-null   int64  
 1   dstid             9830 non-null   int64  
 2   dow               9830 non-null   int64  
 3   mean_travel_time  9830 non-null   float64
dtypes: float64(1), int64(3)
memory usage: 307.3 KB


In [13]:
df.describe()

Unnamed: 0,sourceid,dstid,dow,mean_travel_time
count,9830.0,9830.0,9830.0,9830.0
mean,491.0,490.849542,4.112818,1621.815974
std,283.781945,285.434568,1.999946,747.806487
min,0.0,0.0,1.0,86.42
25%,245.0,248.25,2.0,1064.5925
50%,491.0,479.0,4.0,1570.71
75%,737.0,750.0,6.0,2122.1425
max,982.0,982.0,7.0,4789.0


In [14]:
df.nunique()

sourceid             983
dstid                980
dow                    7
mean_travel_time    9638
dtype: int64

## **GeoJson Handling**

In [15]:
london = gpd.read_file("../MEC_dataset/london.json")

In [16]:
london.head()

Unnamed: 0,msoa_code,msoa_name,la_code,la_name,geoeast,geonorth,popeast,popnorth,area_km2,MOVEMENT_ID,DISPLAY_NAME,geometry
0,E02000508,Hillingdon 015,00AS,Hillingdon,506163,183536,505978,183811,2.7466,0,"Hillingdon, 00AS (0)","MULTIPOLYGON (((-0.47794 51.55485, -0.47665 51..."
1,E02000716,Newham 003,00BB,Newham,541978,186009,541870,185568,1.56517,1,"Newham, 00BB (1)","MULTIPOLYGON (((0.05255 51.56171, 0.05310 51.5..."
2,E02000747,Newham 034,00BB,Newham,539578,181317,539891,181438,2.08241,2,"Newham, 00BB (2)","MULTIPOLYGON (((0.01001 51.52181, 0.01003 51.5..."
3,E02000748,Newham 035,00BB,Newham,542500,181152,542439,181339,1.33175,3,"Newham, 00BB (3)","MULTIPOLYGON (((0.05392 51.51611, 0.05174 51.5..."
4,E02000749,Newham 036,00BB,Newham,541047,181103,540847,181294,1.41902,4,"Newham, 00BB (4)","MULTIPOLYGON (((0.03241 51.51704, 0.03179 51.5..."


In [17]:
london.nunique()

msoa_code       983
msoa_name       983
la_code          33
la_name          33
geoeast         970
geonorth        971
popeast         974
popnorth        970
area_km2        982
MOVEMENT_ID     983
DISPLAY_NAME    983
geometry        983
dtype: int64

In [18]:
london.crs
#"EPSG: 27700" Projected

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [19]:
london.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 983 entries, 0 to 982
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   msoa_code     983 non-null    object  
 1   msoa_name     983 non-null    object  
 2   la_code       983 non-null    object  
 3   la_name       983 non-null    object  
 4   geoeast       983 non-null    int64   
 5   geonorth      983 non-null    int64   
 6   popeast       983 non-null    int64   
 7   popnorth      983 non-null    int64   
 8   area_km2      983 non-null    float64 
 9   MOVEMENT_ID   983 non-null    object  
 10  DISPLAY_NAME  983 non-null    object  
 11  geometry      983 non-null    geometry
dtypes: float64(1), geometry(1), int64(4), object(6)
memory usage: 92.3+ KB


In [20]:
london["MOVEMENT_ID"] = london["MOVEMENT_ID"].astype("int64")
london["x"] = london.centroid.x
london["y"] = london.centroid.y

In [21]:
london.head()

Unnamed: 0,msoa_code,msoa_name,la_code,la_name,geoeast,geonorth,popeast,popnorth,area_km2,MOVEMENT_ID,DISPLAY_NAME,geometry,x,y
0,E02000508,Hillingdon 015,00AS,Hillingdon,506163,183536,505978,183811,2.7466,0,"Hillingdon, 00AS (0)","MULTIPOLYGON (((-0.47794 51.55485, -0.47665 51...",-0.470593,51.540762
1,E02000716,Newham 003,00BB,Newham,541978,186009,541870,185568,1.56517,1,"Newham, 00BB (1)","MULTIPOLYGON (((0.05255 51.56171, 0.05310 51.5...",0.046567,51.555122
2,E02000747,Newham 034,00BB,Newham,539578,181317,539891,181438,2.08241,2,"Newham, 00BB (2)","MULTIPOLYGON (((0.01001 51.52181, 0.01003 51.5...",0.010071,51.513538
3,E02000748,Newham 035,00BB,Newham,542500,181152,542439,181339,1.33175,3,"Newham, 00BB (3)","MULTIPOLYGON (((0.05392 51.51611, 0.05174 51.5...",0.052078,51.511347
4,E02000749,Newham 036,00BB,Newham,541047,181103,540847,181294,1.41902,4,"Newham, 00BB (4)","MULTIPOLYGON (((0.03241 51.51704, 0.03179 51.5...",0.03118,51.51123


In [22]:
# plt.figure(figsize=(40,40),dpi=)
# ax = london.plot()
# london.apply(lambda x: ax.annotate(text=x.MOVEMENT_ID, xy=x.geometry.centroid.coords[0], ha="center"), axis=1)
# plt.show()

In [23]:
ldn = london.to_crs("EPSG: 27700")
#"EPSG: 27700"

In [None]:
ldn.plot()

# ***OpenStreetMap (OSMnx) Initialization***

https://osmnx.readthedocs.io/en/stable/index.html

## Load saved geopackage

In [None]:
# fp = "https://github.com/mystique1011/ML_competition/blob/main/MEC_dataset/london.gpkg?raw=true"
# gdf_nodes = gpd.read_file(fp, layer="nodes").set_index("osmid")
# gdf_edges = gpd.read_file(fp, layer="edges").set_index(["u", "v", "key"])
# assert gdf_nodes.index.is_unique and gdf_edges.index.is_unique

# # convert the node/edge GeoDataFrames to a MultiDiGraph
# graph_attrs = {"crs": "epsg:4326", "simplified": False}
# G = ox.graph_from_gdfs(gdf_nodes, gdf_edges, graph_attrs)

## Download nodes from OpenStreetMap

In [24]:
G = ox.graph_from_place("London, England", network_type = "drive")

# fig, ax = ox.plot_graph(G)

In [None]:
# ox.io.save_graph_geopackage(G,"../MEC_dataset/london.gpkg")

In [None]:
# edge_centrality = nx.closeness_centrality(nx.line_graph(G))
# nx.set_edge_attributes(G, edge_centrality, "edge_centrality")
# ec = ox.plot.get_edge_colors_by_attr(G, "edge_centrality", cmap="inferno")
# fig, ax = ox.plot_graph(G, edge_color=ec, edge_linewidth=2, node_size=0)

## Important OSMNx Function

In [25]:
G = ox.utils_graph.remove_isolated_nodes(G)
G = ox.speed.add_edge_speeds(G)
G = ox.speed.add_edge_travel_times(G)

In [None]:
# def get_nodes(long,lat,edges):
#     if edges == "edges":
#         return ox.distance.nearest_edges(G, X=long, Y=lat)
#     else:
#         return ox.distance.nearest_nodes(G, X=long, Y=lat)

In [26]:
def get_route(dataframe,attr="nodes", weight="length", cpus=1):
    if attr=="nodes":
        rt = ox.shortest_path(G,dataframe["node_id_n_src"],dataframe["node_id_n_dst"], weight=weight,cpus=cpus)
    else:
        rt = ox.shortest_path(G,dataframe["node_id_e_src"],dataframe["node_id_e_dst"], weight=weight,cpus=cpus)
    return rt

In [45]:
def get_attr(route, feature=None):
    return ox.utils_graph.get_route_edge_attributes(G,route,feature)

In [28]:
def get_attr_count(route, attribute):
    attribute_values = []
    for u, v in zip(route[:-1], route[1:]):
        data = min(G.get_edge_data(u, v).values(), key=lambda x: x["length"])
        try:
            attribute_value = data[attribute]
            attribute_values.append(attribute_value)
        except KeyError:
            pass
    return attribute_values

## Extract

In [29]:
# Get osmid from London.json
london["node_id_n"] = ox.distance.nearest_nodes(G,london.centroid.x,london.centroid.y)
london[["node_id_e","nodes_id_2","to_drop"]] = ox.distance.nearest_edges(G,london.centroid.x,london.centroid.y)

# ***Feature Extraction***

## **Get important features from geospatial data**

In [30]:
pop.head()

Unnamed: 0,la_name,population_density
0,Hillingdon,2671
1,Newham,9808
2,Lambeth,11998
3,Richmond upon Thames,3451
4,Southwark,11091


In [31]:
london = london.merge(pop,left_on = "la_name", right_on = "la_name", how = "left")

In [32]:
london.head()

Unnamed: 0,msoa_code,msoa_name,la_code,la_name,geoeast,geonorth,popeast,popnorth,area_km2,MOVEMENT_ID,DISPLAY_NAME,geometry,x,y,node_id_n,node_id_e,nodes_id_2,to_drop,population_density
0,E02000508,Hillingdon 015,00AS,Hillingdon,506163,183536,505978,183811,2.7466,0,"Hillingdon, 00AS (0)","MULTIPOLYGON (((-0.47794 51.55485, -0.47665 51...",-0.470593,51.540762,6271776012,6307891023,7857342724,0,2671
1,E02000716,Newham 003,00BB,Newham,541978,186009,541870,185568,1.56517,1,"Newham, 00BB (1)","MULTIPOLYGON (((0.05255 51.56171, 0.05310 51.5...",0.046567,51.555122,6331732604,32636604,32636592,0,9808
2,E02000747,Newham 034,00BB,Newham,539578,181317,539891,181438,2.08241,2,"Newham, 00BB (2)","MULTIPOLYGON (((0.01001 51.52181, 0.01003 51.5...",0.010071,51.513538,662105375,662105375,662105365,0,9808
3,E02000748,Newham 035,00BB,Newham,542500,181152,542439,181339,1.33175,3,"Newham, 00BB (3)","MULTIPOLYGON (((0.05392 51.51611, 0.05174 51.5...",0.052078,51.511347,666992158,667024057,526010386,0,9808
4,E02000749,Newham 036,00BB,Newham,541047,181103,540847,181294,1.41902,4,"Newham, 00BB (4)","MULTIPOLYGON (((0.03241 51.51704, 0.03179 51.5...",0.03118,51.51123,282590709,135551775,283249668,0,9808


In [33]:
ldn = london.drop(["DISPLAY_NAME","geometry","msoa_code","msoa_name","la_code","nodes_id_2","to_drop"],axis=1)

src = df.merge(ldn, left_on = "sourceid", right_on = "MOVEMENT_ID", how = "left")
src = src.rename({"geoeast":"geoeast_src", "geonorth":"geonorth_src","popeast":"popeast_src","popnorth":"popnorth_src",
                  "la_name":"la_name_src","area_km2":"area_src", "msoa_code":"msoa_code_src", "x" : "src_x", "y" : "src_y",
                 "node_id_n" : "node_id_n_src", "node_id_e" : "node_id_e_src","population_density":"population_density_src"},axis = 1)

dst = src.merge(ldn, left_on = "dstid", right_on = "MOVEMENT_ID", how = "left")
dst = dst.rename({"geoeast":"geoeast_dst", "geonorth":"geonorth_dst","popeast":"popeast_dst","popnorth":"popnorth_dst",
                  "la_name":"la_name_dst","area_km2":"area_dst","msoa_code":"msoa_code_dst","x" : "dst_x", "y" : "dst_y",
                  "node_id_n" : "node_id_n_dst", "node_id_e" : "node_id_e_dst","population_density":"population_density_dst"},axis = 1)

df = dst.drop(["MOVEMENT_ID_x","MOVEMENT_ID_y"],axis=1)

df.head()

Unnamed: 0,sourceid,dstid,dow,mean_travel_time,la_name_src,geoeast_src,geonorth_src,popeast_src,popnorth_src,area_src,...,geoeast_dst,geonorth_dst,popeast_dst,popnorth_dst,area_dst,dst_x,dst_y,node_id_n_dst,node_id_e_dst,population_density_dst
0,10,241,3,2334.43,Newham,542413,182380,542450,182415,0.790802,...,530851,174285,530876,174289,0.584104,-0.118199,51.452418,33776678,33776696,11998
1,10,612,5,1529.83,Newham,542413,182380,542450,182415,0.790802,...,551347,192721,550534,191931,6.27943,0.184471,51.61299,1138048373,25760609,2320
2,10,905,4,1390.04,Newham,542413,182380,542450,182415,0.790802,...,535060,178641,535009,178663,0.662274,-0.056014,51.490606,104397683,270932211,11091
3,10,407,7,157.91,Newham,542413,182380,542450,182415,0.790802,...,544366,183515,544543,183531,0.796626,0.079911,51.532079,180624488,180624488,5933
4,10,603,4,1781.67,Newham,542413,182380,542450,182415,0.790802,...,532416,197315,532432,197391,1.28299,-0.087059,51.659039,11377862,11377862,4127


## **Calculate Displacement**

In [34]:
df["geo_displacement"] = np.linalg.norm(df.loc[:, ["geoeast_src","geonorth_src"]].values - df.loc[:, ["geoeast_dst","geonorth_dst"]], axis=1)
df["pop_displacement"] = np.linalg.norm(df.loc[:, ["popeast_src","popnorth_src"]].values - df.loc[:, ["popeast_dst","popnorth_dst"]], axis=1)

In [35]:
df["geo_displacement_log"] = np.log(df["geo_displacement"].values + 1)
df["geo_displacement_sqrt"] =  np.sqrt(df["geo_displacement"])

## **Calculate Direction**

In [37]:
xDiff = df.loc[:, "geonorth_dst"].values - df.loc[:, "geonorth_src"].values
yDiff = df.loc[:, "geoeast_dst"].values - df.loc[:, "geoeast_src"].values
df["direction"] = np.degrees(np.arctan2(yDiff,xDiff))

In [38]:
# dirs = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
# df["direction"] = np.round(direction / (360 / len(dirs)))

# **OSMnx features**

In [39]:
df.head()

Unnamed: 0,sourceid,dstid,dow,mean_travel_time,la_name_src,geoeast_src,geonorth_src,popeast_src,popnorth_src,area_src,...,dst_x,dst_y,node_id_n_dst,node_id_e_dst,population_density_dst,geo_displacement,pop_displacement,geo_displacement_log,geo_displacement_sqrt,direction
0,10,241,3,2334.43,Newham,542413,182380,542450,182415,0.790802,...,-0.118199,51.452418,33776678,33776696,11998,14114.137204,14141.759155,9.555003,118.802934,-124.997341
1,10,612,5,1529.83,Newham,542413,182380,542450,182415,0.790802,...,0.184471,51.61299,1138048373,25760609,2320,13665.746851,12486.204868,9.522721,116.900585,40.825036
2,10,905,4,1390.04,Newham,542413,182380,542450,182415,0.790802,...,-0.056014,51.490606,104397683,270932211,11091,8249.044187,8333.425766,9.017974,90.824249,-116.953333
3,10,407,7,157.91,Newham,542413,182380,542450,182415,0.790802,...,0.079911,51.532079,180624488,180624488,5933,2258.856791,2371.941188,7.723057,47.527432,59.836679
4,10,603,4,1781.67,Newham,542413,182380,542450,182415,0.790802,...,-0.087059,51.659039,11377862,11377862,4127,17972.04034,18017.793983,9.796628,134.059839,-33.797057


In [None]:
# Distances are based on Djikstra algorithm
df["route_length"] = get_route(df)
df["route_travel_time"] = get_route(df,weight="travel_time")

In [None]:
missing_route_length = df[df["route_length"].isna()]
len(missing_route_length)

In [None]:
missing_route_travel_time = df[df["route_travel_time"].isna()]
len(missing_route_travel_time)

## Missing route handling

In [None]:
df.loc[missing_route_length.index,"route_length"] = get_route(missing_route_length,"edges")
df.loc[missing_route_travel_time.index,"route_travel_time"] = get_route(missing_route_travel_time,"edges","travel_time")

In [None]:
missing_route = df[df["route_length"].isna()]
len(missing_route)

In [None]:
missing_route_travel_time = df[df["route_travel_time"].isna()]
len(missing_route_travel_time)

In [None]:
# df["route_length"].to_pickle("./training_route_by_length.pkl")

In [40]:
route_len = pd.read_pickle("./training_route_by_length.pkl")
route_tt = pd.read_pickle("../MEC_dataset/route.pkl")
df["route_length"] = route_len
df["route_travel_time"] = route_tt

## Extract Features

In [41]:
edge_attributes = ox.graph_to_gdfs(G, nodes=False).columns
print(edge_attributes)

Index(['osmid', 'name', 'highway', 'access', 'oneway', 'length', 'speed_kph',
       'travel_time', 'maxspeed', 'geometry', 'lanes', 'ref', 'bridge',
       'junction', 'tunnel', 'est_width', 'width', 'service', 'area'],
      dtype='object')


In [42]:
df.columns

Index(['sourceid', 'dstid', 'dow', 'mean_travel_time', 'la_name_src',
       'geoeast_src', 'geonorth_src', 'popeast_src', 'popnorth_src',
       'area_src', 'src_x', 'src_y', 'node_id_n_src', 'node_id_e_src',
       'population_density_src', 'la_name_dst', 'geoeast_dst', 'geonorth_dst',
       'popeast_dst', 'popnorth_dst', 'area_dst', 'dst_x', 'dst_y',
       'node_id_n_dst', 'node_id_e_dst', 'population_density_dst',
       'geo_displacement', 'pop_displacement', 'geo_displacement_log',
       'geo_displacement_sqrt', 'direction', 'route_length',
       'route_travel_time'],
      dtype='object')

In [None]:
df["shortest_road_distance"] = [round(sum(get_attr(row["route_length"], "length"))) for index, row in df.iterrows()]
df["maxspeed_length"] = [round(np.mean(get_attr(row["route_length"], "speed_kph"))) for index, row in df.iterrows()]
df["fastest_travel_time_by_length"] = [round(sum(get_attr(row["route_length"], "travel_time"))) for index, row in df.iterrows()]

In [56]:
r = route_tt[0]

In [55]:
oneway_len=[]
for index, row in df.iterrows():
    oneway = get_attr(row["route_travel_time"], "oneway")
    length = get_attr(row["route_travel_time"], "length")
    one_len = np.array(length)[np.array(oneway)]
    oneway_len.append(round(np.sum(one_len)))
# df["oneway_length_by_length"] = oneway_len

AttributeError: 'NoneType' object has no attribute 'values'

In [57]:
get_attr(r, "oneway")

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 False,
 False,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 False,
 True,


In [None]:
df["road_distance_by_travel_time"] = [round(sum(get_attr(row["route_travel_time"], "length"))) for index, row in df.iterrows()]
df["maxspeed_travel_time"] = [round(np.mean(get_attr(row["route_travel_time"], "speed_kph"))) for index, row in df.iterrows()]
df["fastest_travel_time"] = [round(sum(get_attr(row["route_travel_time"], "travel_time"))) for index, row in df.iterrows()]

In [None]:
oneway_len=[]
for index, row in df.iterrows():
    oneway = get_attr(row["route_travel_time"], "oneway")
    length = get_attr(row["route_travel_time"], "length")
    one_len = np.array(length)[np.array(oneway)]
    oneway_len.append(round(np.sum(one_len)))
    
df["oneway_length_by_travel_time"] = oneway_len

# ***Categorical to Numerical***

In [None]:
ordinal_encoder = OrdinalEncoder()
object_cols = ["la_name_src","la_name_dst"]
df[["la_name_src_num","la_name_dst_num"]] = ordinal_encoder.fit_transform(df[object_cols])

In [None]:
df.head(3)

# ***Save DataFrame***

In [None]:
# df.to_csv("../MEC_dataset/training_dataset.csv", index=False)

# Solving for testing dataset

In [None]:
test = pd.read_csv("../MEC_dataset/testing_route.csv",index_col=0)

In [None]:
test.head()

In [None]:
ldn = london.drop(["DISPLAY_NAME","geometry","msoa_code","msoa_name","la_code","nodes_id_2","to_drop"],axis=1)

tsrc = test.merge(ldn, left_on = "sourceid", right_on = "MOVEMENT_ID", how = "left")
tsrc = tsrc.rename({"geoeast":"geoeast_src", "geonorth":"geonorth_src","popeast":"popeast_src","popnorth":"popnorth_src",
                  "la_name":"la_name_src","area_km2":"area_src", "msoa_code":"msoa_code_src", "x" : "src_x", "y" : "src_y",
                 "node_id_n" : "node_id_n_src", "node_id_e" : "node_id_e_src"},axis = 1)

tdst = tsrc.merge(ldn, left_on = "dstid", right_on = "MOVEMENT_ID", how = "left")
tdst = tdst.rename({"geoeast":"geoeast_dst", "geonorth":"geonorth_dst","popeast":"popeast_dst","popnorth":"popnorth_dst",
                  "la_name":"la_name_dst","area_km2":"area_dst","msoa_code":"msoa_code_dst","x" : "dst_x", "y" : "dst_y",
                  "node_id_n" : "node_id_n_dst", "node_id_e" : "node_id_e_dst"},axis = 1)

test = tdst.drop(["MOVEMENT_ID_x","MOVEMENT_ID_y"],axis=1)

In [None]:
test.head()

In [None]:
test["geo_displacement"] = np.linalg.norm(test.loc[:, ["geoeast_src","geonorth_src"]].values - test.loc[:, ["geoeast_dst","geonorth_dst"]], axis=1)
test["pop_displacement"] = np.linalg.norm(test.loc[:, ["popeast_src","popnorth_src"]].values - test.loc[:, ["popeast_dst","popnorth_dst"]], axis=1)

In [None]:
test["geo_displacement_log"] = np.log(test["geo_displacement"].values + 1)
test["geo_displacement_sqrt"] =  np.sqrt(test["geo_displacement"])

In [None]:
xDiff = test.loc[:, "geonorth_dst"].values - test.loc[:, "geonorth_src"].values
yDiff = test.loc[:, "geoeast_dst"].values - test.loc[:, "geoeast_src"].values
test["direction"] = np.degrees(np.arctan2(yDiff,xDiff))  % 360

In [None]:
test["route_length"] = get_route(test)
test["route_travel_time"] = get_route(test,weight="travel_time")

In [None]:
tmissing_route = test[test["route_length"].isna()]
len(tmissing_route)

In [None]:
tmissing_route = test[test["route_travel_time"].isna()]
len(tmissing_route)

In [None]:
test.loc[tmissing_route.index,"route_length"] = get_route(tmissing_route,"edges")
test.loc[tmissing_route.index,"route_travel_time"] = get_route(tmissing_route,"edges","travel_time")

In [None]:
tmissing_route = test[test["route_length"].isna()]
len(tmissing_route)

In [None]:
tmissing_route = test[test["route_travel_time"].isna()]
len(tmissing_route)

In [None]:
# test["route_length"].to_pickle("./testing_route_by_length.pkl")
# test["route_length"].to_pickle("./testing_route_by_travel_time.pkl")

In [None]:
test["shortest_road_distance"] = [round(sum(get_attr(row["route_length"], "length"))) for index, row in test.iterrows()]
test["maxspeed_length"] = [round(np.mean(get_attr(row["route_length"], "speed_kph"))) for index, row in test.iterrows()]
test["fastest_travel_time_by_length"] = [round(sum(get_attr(row["route_length"], "travel_time"))) for index, row in test.iterrows()]

In [None]:
oneway_len=[]
for index, row in test.iterrows():
    oneway = get_attr(row["route_length"], "oneway")
    length = get_attr(row["route_length"], "length")
    one_len = np.array(length)[np.array(oneway)]
    oneway_len.append(round(np.sum(one_len)))
    
test["oneway_length_by_length"] = oneway_len

In [None]:
test["road_distance_by_travel_time"] = [round(sum(get_attr(row["route_travel_time"], "length"))) for index, row in test.iterrows()]
test["maxspeed_travel_time"] = [round(np.mean(get_attr(row["route_travel_time"], "speed_kph"))) for index, row in test.iterrows()]
test["fastest_travel_time"] = [round(sum(get_attr(row["route_travel_time"], "travel_time"))) for index, row in test.iterrows()]

In [None]:
oneway_len_travel_time=[]
for index, row in test.iterrows():
    oneway = get_attr(row["route_traveltime"], "oneway")
    length = get_attr(row["route_traveltime"], "length")
    one_len = np.array(length)[np.array(oneway)]
    oneway_len.append(round(np.sum(one_len)))
    
test["oneway_length_by_travel_time"] = oneway_len_travel_time

In [None]:
test.head()

In [None]:
# test.to_csv("../MEC_dataset/testing_dataset.csv")

# Visualization

In [None]:
routes = [df.loc[0,"route_length"],df.loc[0,"route_travel_time"]]
rc = ['r', 'y']
fig, ax = ox.plot_graph_routes(G, routes, route_colors=rc, route_linewidth=6, node_size=0)

In [None]:
len(get_attr_count(df.loc[0,"route_length"],"bridge"))