## Import

In [1]:
import sys
sys.path.append(r'C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\src\facility_location_Bergen\custome_modules')

In [2]:
import warnings
from shapely.errors import ShapelyDeprecationWarning
# Ignore the ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning)

In [46]:
import pytz
import folium
import numpy as np
import pickle as pkl
from PIL import Image
import networkx as nx
import geopandas as gpd
import cartopy.crs as ccrs
from datetime import datetime
import matplotlib.pyplot as plt
import cartopy.io.img_tiles as cimgt
from urllib.request import urlopen, Request
from matplotlib.animation import FuncAnimation
from mongo_db import retrieve_database_and_collections
from networkx.drawing.nx_agraph import graphviz_layout
from convert_geometry import toMultiLineString, toExtremePoints

## Retrieve collection and convert to a dataframe

In [4]:
day = "20_04_2023"

In [5]:
with open(rf"C:\Users\Marco\Documents\GitHub\GeoSpatial-analysis\facility-location-Bergen\data\03_primary\{day}.geojson", "rb") as f:
    gdf = pkl.load(f)

In [6]:
gdf = gdf.set_geometry("geometry.extreme_point")

## Data preparation

### Find unique identifier for road segments

In [22]:
for date in gdf["api_call_time"].unique():
    sub_gdf = gdf[gdf["api_call_time"] == date]
    key = sub_gdf["geometry.extreme_point"]
    print(date, len(sub_gdf), len(key.unique()), len(sub_gdf)-len(key.unique()))

2023-04-20 13:00:00+00:00 620 616 4
2023-04-20 13:05:00+00:00 622 618 4
2023-04-20 13:10:00+00:00 624 620 4
2023-04-20 13:15:00+00:00 626 622 4
2023-04-20 13:20:00+00:00 624 620 4
2023-04-20 13:25:00+00:00 625 621 4
2023-04-20 13:30:00+00:00 623 619 4
2023-04-20 13:35:00+00:00 620 616 4
2023-04-20 13:40:00+00:00 623 619 4
2023-04-20 13:45:00+00:00 623 619 4
2023-04-20 13:50:00+00:00 627 623 4
2023-04-20 13:55:00+00:00 621 617 4
2023-04-20 14:00:00+00:00 628 624 4
2023-04-20 14:05:00+00:00 624 620 4
2023-04-20 14:10:00+00:00 625 621 4
2023-04-20 14:15:00+00:00 623 619 4
2023-04-20 14:20:00+00:00 625 621 4
2023-04-20 14:25:00+00:00 622 618 4
2023-04-20 14:30:00+00:00 625 621 4
2023-04-20 14:35:00+00:00 622 618 4
2023-04-20 14:40:00+00:00 622 618 4
2023-04-20 14:45:00+00:00 623 619 4
2023-04-20 14:50:00+00:00 623 619 4
2023-04-20 14:55:00+00:00 626 622 4
2023-04-20 15:00:00+00:00 624 620 4
2023-04-20 09:00:00+00:00 619 615 4
2023-04-20 09:05:00+00:00 620 616 4
2023-04-20 09:10:00+00:00 62

In [43]:
for date in list(gdf["api_call_time"].unique()):
    sub_gdf = gdf[(gdf["api_call_time"] == date)]
    vc = sub_gdf["geometry.extreme_point"].value_counts(sort=False)
    sub_sub_gdf = sub_gdf[sub_gdf["geometry.extreme_point"].isin(vc[vc > 1].index)]
    for geometry in sub_sub_gdf["geometry.extreme_point"].unique():
        print("the description field is different: ", len(sub_sub_gdf[sub_sub_gdf["geometry.extreme_point"] == geometry]["description"].unique())>1)
        if len(sub_sub_gdf[sub_sub_gdf["geometry.extreme_point"] == geometry]["description"].unique())<2:
            print(date)
            break

the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different:  True
the description field is different

From the previous cells we observe that the tuple ("description, "geometry") is unique for each road segment. We can therefore use this tuple as a unique identifier for each road segment. 

### Split gdf according to time

In [51]:
first_morning_date = datetime.strptime("2023-04-20T07:30:00+02:00", "%Y-%m-%dT%H:%M:%S%z")
last_morning_date = datetime.strptime("2023-04-20T09:30:00+02:00", "%Y-%m-%dT%H:%M:%S%z")

In [52]:
gdf_morning = gdf[(gdf["api_call_time"] >= first_morning_date) & (gdf["api_call_time"] <= last_morning_date)]

In [54]:
first_midday_date = datetime.strptime("2023-04-20T11:00:00+02:00", "%Y-%m-%dT%H:%M:%S%z")
last_midday_date = datetime.strptime("2023-04-20T12:00:00+02:00", "%Y-%m-%dT%H:%M:%S%z")

In [58]:
gdf_midday = gdf[(gdf["api_call_time"] >= first_midday_date) & (gdf["api_call_time"] <= last_midday_date)]

In [56]:
first_afternoon_date = datetime.strptime("2023-04-20T15:00:00+02:00", "%Y-%m-%dT%H:%M:%S%z")
last_afternoon_date = datetime.strptime("2023-04-20T17:00:00+02:00", "%Y-%m-%dT%H:%M:%S%z")

In [59]:
gdf_afternoon = gdf[(gdf["api_call_time"] >= first_afternoon_date) & (gdf["api_call_time"] <= last_afternoon_date)]

### Take average value in each road segment for each gdf

#### Morning

In [64]:
gdf.columns

Index(['sourceUpdated', 'api_call_time', 'description', 'length',
       'currentFlow.speed', 'currentFlow.speedUncapped',
       'currentFlow.freeFlow', 'currentFlow.jamFactor',
       'currentFlow.confidence', 'currentFlow.traversability', 'geometry.type',
       'currentFlow.jamTendency', 'currentFlow.junctionTraversability',
       'geometry.multi_line', 'geometry.extreme_point'],
      dtype='object')

In [105]:
gdf_morning_average = gdf_morning.groupby(["description", "geometry.extreme_point"], sort=False, dropna=False)[['length',
       'currentFlow.speed', 'currentFlow.speedUncapped',
       'currentFlow.freeFlow', 'currentFlow.jamFactor',
       'currentFlow.confidence']].mean().reset_index()

In [107]:
gdf_morning_average

Unnamed: 0,description,geometry.extreme_point,length,currentFlow.speed,currentFlow.speedUncapped,currentFlow.freeFlow,currentFlow.jamFactor,currentFlow.confidence
0,Herland,"MULTIPOINT (5.53747 60.42335, 5.53903 60.42214)",374.0,12.300000,12.300000,16.111110,1.912,0.7536
1,Herland,"MULTIPOINT (5.53492 60.42177, 5.53757 60.42317)",313.0,15.344445,15.344445,15.000000,0.096,0.7708
2,Takvam,"MULTIPOINT (5.53492 60.42177, 5.51596 60.42212)",1173.0,19.066667,19.244445,19.444445,0.164,0.8996
3,Takvam,"MULTIPOINT (5.46050 60.42195, 5.51596 60.42212)",8122.0,11.344445,11.344445,12.222222,0.596,0.8144
4,Takvam,"MULTIPOINT (5.51612 60.42207, 5.51624 60.42231)",31.0,6.344444,6.344444,8.611112,2.144,0.7276
...,...,...,...,...,...,...,...,...
628,Salhus,"MULTIPOINT (5.26737 60.50590, 5.26773 60.50591)",20.0,7.777778,7.777778,7.777778,0.000,0.7072
629,Smedneset,"MULTIPOINT (5.26722 60.50718, 5.26710 60.50710)",12.0,4.166667,4.166667,4.166667,0.000,0.7000
630,Salhus,"MULTIPOINT (5.26710 60.50710, 5.26737 60.50590)",134.0,6.377778,6.377778,7.777778,1.572,0.7000
631,Smedneset,"MULTIPOINT (5.26737 60.50590, 5.26722 60.50718)",146.0,4.722222,4.722222,5.277778,0.800,0.7000


##### Check for the presence of NaN values

In [108]:
gdf_morning_average[gdf_morning_average.isna().apply(lambda x: x["currentFlow.speedUncapped"], axis=1)]

Unnamed: 0,description,geometry.extreme_point,length,currentFlow.speed,currentFlow.speedUncapped,currentFlow.freeFlow,currentFlow.jamFactor,currentFlow.confidence
189,,"MULTIPOINT (5.33799 60.32356, 5.34689 60.31863)",1374.0,,,7.222222,10.0,
300,Myraskjenet,"MULTIPOINT (5.21953 60.34025, 5.19267 60.34695)",3273.0,,,12.222222,10.0,
327,Nyborg,"MULTIPOINT (5.19267 60.34695, 5.19708 60.37056)",2727.0,,,12.222222,10.0,


##### Delete NaN values

In [109]:
gdf_morning_average.dropna(subset=["currentFlow.speedUncapped"], inplace=True)

In [111]:
gdf_morning_average[gdf_morning_average.isna().apply(lambda x: x["currentFlow.speedUncapped"], axis=1)]

Unnamed: 0,description,geometry.extreme_point,length,currentFlow.speed,currentFlow.speedUncapped,currentFlow.freeFlow,currentFlow.jamFactor,currentFlow.confidence


#### Midday

In [None]:
gdf.columns

Index(['sourceUpdated', 'api_call_time', 'description', 'length',
       'currentFlow.speed', 'currentFlow.speedUncapped',
       'currentFlow.freeFlow', 'currentFlow.jamFactor',
       'currentFlow.confidence', 'currentFlow.traversability', 'geometry.type',
       'currentFlow.jamTendency', 'currentFlow.junctionTraversability',
       'geometry.multi_line', 'geometry.extreme_point'],
      dtype='object')

In [114]:
gdf_midday_average = gdf_midday.groupby(["description", "geometry.extreme_point"], sort=False, dropna=False)[['length',
       'currentFlow.speed', 'currentFlow.speedUncapped',
       'currentFlow.freeFlow', 'currentFlow.jamFactor',
       'currentFlow.confidence']].mean().reset_index()

In [115]:
gdf_midday_average

Unnamed: 0,description,geometry.extreme_point,length,currentFlow.speed,currentFlow.speedUncapped,currentFlow.freeFlow,currentFlow.jamFactor,currentFlow.confidence
0,Herland,"MULTIPOINT (5.53747 60.42335, 5.53903 60.42214)",374.0,12.863248,12.863248,16.111110,1.738462,0.720769
1,Herland,"MULTIPOINT (5.53492 60.42177, 5.53757 60.42317)",313.0,14.358975,14.358975,15.000000,0.346154,0.736154
2,Takvam,"MULTIPOINT (5.53492 60.42177, 5.51596 60.42212)",1173.0,19.209402,19.615385,19.444445,0.092308,0.870000
3,Takvam,"MULTIPOINT (5.51612 60.42207, 5.51624 60.42231)",31.0,5.619658,5.619658,8.611112,2.707692,0.706154
4,Takvam,"MULTIPOINT (5.46050 60.42195, 5.51596 60.42212)",8122.0,10.940172,10.940172,12.222222,0.876923,0.826154
...,...,...,...,...,...,...,...,...
620,Salhus,"MULTIPOINT (5.26710 60.50710, 5.26737 60.50590)",134.0,5.555556,5.555556,7.777778,2.500000,0.700000
621,Smedneset,"MULTIPOINT (5.26737 60.50590, 5.26722 60.50718)",146.0,4.722222,4.722222,5.277778,0.800000,0.700000
622,Smedneset,"MULTIPOINT (5.26722 60.50718, 5.26710 60.50710)",12.0,4.166667,4.166667,4.166667,0.000000,0.700000
623,Salhus,"MULTIPOINT (5.26737 60.50590, 5.26773 60.50591)",20.0,7.799145,7.799145,7.777778,0.000000,0.708462


##### Check for the presence of NaN values

In [117]:
gdf_midday_average[gdf_midday_average.isna().apply(lambda x: x["currentFlow.speedUncapped"], axis=1)]

Unnamed: 0,description,geometry.extreme_point,length,currentFlow.speed,currentFlow.speedUncapped,currentFlow.freeFlow,currentFlow.jamFactor,currentFlow.confidence
188,,"MULTIPOINT (5.33799 60.32356, 5.34689 60.31863)",1374.0,,,7.222222,10.0,
189,,"MULTIPOINT (5.33952 60.31717, 5.33961 60.31630)",106.0,,,0.0,10.0,
300,Myraskjenet,"MULTIPOINT (5.21953 60.34025, 5.19267 60.34695)",3273.0,,,12.222222,10.0,
325,Nyborg,"MULTIPOINT (5.19267 60.34695, 5.19708 60.37056)",2727.0,,,12.222222,10.0,


##### Delete NaN values

In [118]:
gdf_midday_average.dropna(subset=["currentFlow.speedUncapped"], inplace=True)

In [119]:
gdf_midday_average[gdf_midday_average.isna().apply(lambda x: x["currentFlow.speedUncapped"], axis=1)]

Unnamed: 0,description,geometry.extreme_point,length,currentFlow.speed,currentFlow.speedUncapped,currentFlow.freeFlow,currentFlow.jamFactor,currentFlow.confidence


#### Afternoon

In [120]:
gdf.columns

Index(['sourceUpdated', 'api_call_time', 'description', 'length',
       'currentFlow.speed', 'currentFlow.speedUncapped',
       'currentFlow.freeFlow', 'currentFlow.jamFactor',
       'currentFlow.confidence', 'currentFlow.traversability', 'geometry.type',
       'currentFlow.jamTendency', 'currentFlow.junctionTraversability',
       'geometry.multi_line', 'geometry.extreme_point'],
      dtype='object')

In [121]:
gdf_afternoon_average = gdf_afternoon.groupby(["description", "geometry.extreme_point"], sort=False, dropna=False)[['length',
       'currentFlow.speed', 'currentFlow.speedUncapped',
       'currentFlow.freeFlow', 'currentFlow.jamFactor',
       'currentFlow.confidence']].mean().reset_index()

In [122]:
gdf_afternoon_average

Unnamed: 0,description,geometry.extreme_point,length,currentFlow.speed,currentFlow.speedUncapped,currentFlow.freeFlow,currentFlow.jamFactor,currentFlow.confidence
0,Herland,"MULTIPOINT (5.53747 60.42335, 5.53903 60.42214)",374.0,13.755556,13.755556,16.111110,1.244,0.7220
1,Herland,"MULTIPOINT (5.53492 60.42177, 5.53757 60.42317)",313.0,15.333334,15.366667,15.000000,0.148,0.8708
2,Takvam,"MULTIPOINT (5.53492 60.42177, 5.51596 60.42212)",1173.0,19.344445,19.744445,19.444445,0.040,0.9388
3,Takvam,"MULTIPOINT (5.51612 60.42207, 5.51624 60.42231)",31.0,5.766667,5.766667,8.611112,2.484,0.7172
4,Takvam,"MULTIPOINT (5.46050 60.42195, 5.51596 60.42212)",8122.0,11.044445,11.044445,12.222222,0.796,0.8792
...,...,...,...,...,...,...,...,...
654,Smedneset,"MULTIPOINT (5.26722 60.50718, 5.26710 60.50710)",12.0,4.122223,4.122223,4.166667,0.000,0.7016
655,Smedneset,"MULTIPOINT (5.26737 60.50590, 5.26722 60.50718)",146.0,4.244445,4.244445,5.277778,0.512,0.7216
656,Salhus,"MULTIPOINT (5.26710 60.50710, 5.26737 60.50590)",134.0,5.522223,5.522223,7.777778,2.460,0.7016
657,Salhus,"MULTIPOINT (5.26737 60.50590, 5.26773 60.50591)",20.0,7.611111,7.611111,7.777778,0.188,0.7180


##### Check for the presence of NaN values

In [124]:
gdf_afternoon_average[gdf_afternoon_average.isna().apply(lambda x: x["currentFlow.speedUncapped"], axis=1)]

Unnamed: 0,description,geometry.extreme_point,length,currentFlow.speed,currentFlow.speedUncapped,currentFlow.freeFlow,currentFlow.jamFactor,currentFlow.confidence
65,,"MULTIPOINT (5.28976 60.29086, 5.29016 60.29079)",24.0,,,0.0,10.0,
66,,"MULTIPOINT (5.29016 60.29079, 5.29048 60.29037)",58.0,,,0.0,10.0,
67,,"MULTIPOINT (5.29048 60.29037, 5.29046 60.29013)",27.0,,,0.0,10.0,
193,,"MULTIPOINT (5.33799 60.32356, 5.34689 60.31863)",1374.0,,,7.222222,10.0,
194,,"MULTIPOINT (5.33952 60.31717, 5.33961 60.31630)",106.0,,,0.0,10.0,
310,Myraskjenet,"MULTIPOINT (5.21953 60.34025, 5.19267 60.34695)",3273.0,,,12.222222,10.0,
334,Nyborg,"MULTIPOINT (5.19267 60.34695, 5.19708 60.37056)",2727.0,,,12.222222,10.0,


##### Delete NaN values

In [125]:
gdf_afternoon_average.dropna(subset=["currentFlow.speedUncapped"], inplace=True)

In [126]:
gdf_afternoon_average[gdf_afternoon_average.isna().apply(lambda x: x["currentFlow.speedUncapped"], axis=1)]

Unnamed: 0,description,geometry.extreme_point,length,currentFlow.speed,currentFlow.speedUncapped,currentFlow.freeFlow,currentFlow.jamFactor,currentFlow.confidence


## Build average graph

### Morning