![title](LogoEPL.jpg)
<b><p style='text-align: center;'> Algorithms in Data Science  </p> </b>

<b><p style='text-align: center;'> London Transport System Study </p> </b>
<b><p style='text-align: center;'> Commuters data from November 2009 </p> </b>

<i><p style='text-align: Center;'> Nima Farnoodian , Charles Rongione, Breno Tiburico</p> </i>

### <u>Charles</u>


#### <u>Libraries</u>

In [3]:
import folium
from math import sin, cos, sqrt, atan2, radians
from folium import plugins
import pandas as pd 
import numpy as np 
import scipy.stats as ss
import networkx as nx
import csv
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
import pickle
import geopandas as gpd
import os
import random

#### <u>Declare Functions</u>

In [4]:
def LineDrawer(location, tubes, m):
    import matplotlib
    
    color_dict = {}
    for i, j in zip(list(matplotlib.colors.cnames.items()), tubes['Tube Line'].unique()):
        color_dict[j] = i[0]
    
    
    for fro, to, line in zip(tubes['From Station'], tubes['To Station'], tubes['Tube Line']):
                         
            Lat_from = location.loc[fro]['Latitude']
            Long_from = location.loc[fro]['Longitude']
                         
            start = [Lat_from, Long_from]
            
                         
    
            Lat_to = location.loc[to]['Latitude']
            Long_to = location.loc[to]['Longitude']
                         
            end = [Lat_to, Long_to]
    
            folium.PolyLine(locations = [start, end], color = color_dict[line], smoot_factor = 5, opacity = 0.5).add_to(m)
            #plugins.AntPath([start, end]).add_to(m)

            
            
def Distances(Lat_from, Long_from, Lat_to, Long_to):
    R = 6373.0

    lat1 = radians(Lat_from)
    lon1 = radians(Long_from)
    lat2 = radians(Lat_to)
    lon2 = radians(Long_to)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    dist = R * c
        
    return dist





def LineDrawer2(location, tubes, m, color):
    
    for fro, to, in zip(tubes['station1'], tubes['station2']):
                         
            Lat_from = location.loc[fro]['Latitude']
            Long_from = location.loc[fro]['Longitude']
            start = [Lat_from, Long_from]
            
            
            
    
            Lat_to = location.loc[to]['Latitude']
            Long_to = location.loc[to]['Longitude']
            end = [Lat_to, Long_to]
            
            
            
            folium.PolyLine(locations = [start, end], smoot_factor = 5, opacity = 4, color = color, weight = 5).add_to(m)
            
            


def PointDrawer(location, tubes, m, color):
    
    for fro in (tubes['station1']):
                         
            Lat_from = location.loc[fro]['Latitude']
            Long_from = location.loc[fro]['Longitude']
                         
            start = [Lat_from, Long_from]
            
                         
    
            
    
            folium.CircleMarker(location = start, radius=10, color= color, fill_color=color).add_to(m).add_child(folium.Popup(str(fro+ '\n' +'Lat :' + str(round(Lat_from, 3))+ ('\n') + 'Lon :' + str(round(Long_from, 3))))).add_to(m)
         
    

def PointDrawer2(location, tubes, m, color):
    
    for fro in (tubes['station1']):
                         
            Lat_from = location.loc[fro]['Latitude']
            Long_from = location.loc[fro]['Longitude']
                         
            start = [Lat_from, Long_from]
            
                         
    
            
    
            folium.CircleMarker(location = start, radius=1, color= color, fill_color=color).add_to(m)
        
        
    for fro in (tubes['station2']):
                         
            Lat_from = location.loc[fro]['Latitude']
            Long_from = location.loc[fro]['Longitude']
                         
            end = [Lat_from, Long_from]
            
                         
    
            
    
            folium.CircleMarker(location = end, radius=1, color= color, fill_color='blue').add_to(m)
    
    
    
def PolygonDrawer(data, color, m):
    
    style = {'fillColor': color, 'color': '#000000'}
    
    folium.GeoJson(data = data["geometry"], style_function=lambda x:style).add_to(m)
            
        
  


def Percentilizer(df_to_subset, parameter_to_evaluate):
   
    p = parameter_to_evaluate
    test_df = df_to_subset
    
    
    test_df_90 = test_df[(test_df[p] >= np.percentile(test_df[p], 90))]
    test_df_80 = test_df[(test_df[p] >= np.percentile(test_df[p], 80)) & ((test_df[p] < np.percentile(test_df[p], 90)))]
    test_df_70 = test_df[(test_df[p] >= np.percentile(test_df[p], 70)) & ((test_df[p] < np.percentile(test_df[p], 80)))]
    test_df_60 = test_df[(test_df[p] >= np.percentile(test_df[p], 60)) & ((test_df[p] < np.percentile(test_df[p], 70)))]
    test_df_50 = test_df[(test_df[p] >= np.percentile(test_df[p], 50)) & ((test_df[p] < np.percentile(test_df[p], 60)))]
    test_df_40 = test_df[(test_df[p] >= np.percentile(test_df[p], 40)) & ((test_df[p] < np.percentile(test_df[p], 50)))]
    test_df_30 = test_df[(test_df[p] >= np.percentile(test_df[p], 30)) & ((test_df[p] < np.percentile(test_df[p], 40)))]
    test_df_20 = test_df[(test_df[p] >= np.percentile(test_df[p], 20)) & ((test_df[p] < np.percentile(test_df[p], 30)))]
    test_df_10 = test_df[(test_df[p] >= np.percentile(test_df[p], 10)) & ((test_df[p] < np.percentile(test_df[p], 20)))]
    test_df_00 = test_df[(test_df[p] <= np.percentile(test_df[p], 10))]
    

    df_list = (test_df_90, test_df_80, test_df_70, test_df_60, test_df_50, test_df_40, test_df_30, test_df_20, test_df_10, test_df_00)
    
    return df_list

#### <u>Import Data</u>


In [5]:
locations = pd.read_csv('London_stations_matched (1).csv')
locations = locations.set_index('Station')

tubes = pd.read_csv('tube_matched (1).csv')



In [1232]:
m = folium.Map(location=[lat_center, long_center], zoom_start=11, width=1000, height=800, control_scale=True)
m

#### <u>Map Design</u>

In [1230]:
lat_center, long_center = locations[['Latitude', 'Longitude']].mean()


map_circle = folium.Map(location=[lat_center, long_center], zoom_start=11, width=1000, height=800, control_scale=True)


for i,j, name in zip(locations['Latitude'], locations['Longitude'], locations.index):

    folium.CircleMarker(location=[i, j], radius=3, color='black', fill_color='blue') \
    .add_child(folium.Popup(str(name+ '\n' +'Lat :' + str(round(i, 3))+ ('\n') + 'Lon :' + str(round(j,3))))).add_to(map_circle)

LineDrawer(location = locations, tubes = tubes, m = map_circle)
map_circle

In [606]:

file  = open('tube_matched (1).csv', "r", encoding='utf8')
a=file.readlines()
G=nx.Graph()
for i in a[1:]:
    i=i[:-1]
    i=i.split(',')
    print(i)
    G.add_edge(i[2],i[3],weight=float(i[4]))



['0', 'Bakerloo', 'baker-street', 'regents-park', '0.8035911111386282']
['1', 'Bakerloo', 'charing-cross', 'embankment', '0.20487024933693085']
['2', 'Bakerloo', 'edgware-road-bakerloo', 'marylebone', '0.5251813229659411']
['3', 'Bakerloo', 'embankment', 'waterloo', '0.782531067418954']
['4', 'Bakerloo', 'harlesden', 'willesden-junction', '1.0411887202589505']
['5', 'Bakerloo', 'harrow-and-wealdstone', 'kenton', '1.8040553938363457']
['6', 'Bakerloo', 'kensal-green', 'queens-park', '1.4409834494349716']
['7', 'Bakerloo', 'kenton', 'south-kenton', '1.2612092071622054']
['8', 'Bakerloo', 'kilburn-park', 'maida-vale', '0.8219229421551929']
['9', 'Bakerloo', 'lambeth-north', 'elephant-and-castle', '0.8464435931625731']
['10', 'Bakerloo', 'maida-vale', 'warwick-avenue', '0.7854124277295627']
['11', 'Bakerloo', 'marylebone', 'baker-street', '0.4631963576754993']
['12', 'Bakerloo', 'north-wembley', 'wembley-central', '1.282676423112401']
['13', 'Bakerloo', 'oxford-circus', 'piccadilly-circus'

In [28]:
file = 'LondonCovid.csv'
fname = "Classeur3.geojson"

gdf = gpd.read_file(fname)

df = pd.read_csv(file, sep = ';')
a = list(df['covid_19_deaths_per_thousand'])
a=[float(i.replace(',','.'))for i in a]
df['covid_19_deaths_per_thousand']=a


gdf['covid_19_deaths_per_thousand'] = df['covid_19_deaths_per_thousand']
gdf.to_file("output.json", driver="GeoJSON")


In [29]:
gdf.head()

Unnamed: 0,MSOA11CD,geometry,covid_19_deaths_per_thousand
0,E02000001,"MULTIPOLYGON (((-0.09679 51.52325, -0.09772 51...",0.34459
1,E02000002,"MULTIPOLYGON (((0.14809 51.59678, 0.14757 51.5...",0.259067
2,E02000003,"MULTIPOLYGON (((0.15063 51.58306, 0.15102 51.5...",0.271469
3,E02000004,"MULTIPOLYGON (((0.18508 51.56480, 0.18514 51.5...",1.366535
4,E02000005,"MULTIPOLYGON (((0.14988 51.56807, 0.14924 51.5...",0.488806


In [1023]:
from shapely.geometry import Point, Polygon

p1 = Point(51.337, -0.114)
poly = gdf['geometry'][0]

poly.contains(p1)
poly.boundary.distance(p1)

72.85614134600459

In [1042]:
gdf2 = gpd.GeoDataFrame(
    strees, geometry=gpd.points_from_xy(strees.longitude, strees.latitude))
p1 = gdf2['geometry'][0]

podicti = {}

In [1042]:
check = []
for i in range(len(gdf['geometry'])):
    poly = gdf['geometry'][i]
    check.append(p1.within(poly))
    
check.index(True)
#gdf['geometry'][check.index(True)]

255

In [1062]:
poly_dict = {}
c=0
for i in range(len(gdf2)):
    p1 = gdf2['geometry'][i]
    check = False
    
    for k in range(len(gdf)):
        if p1.within(gdf['geometry'][k]):
            
            poly_dict[i] = [gdf['covid_19_deaths_per_thousand'][k], k]
            check = True
            
    if check == False:
        poly_dict[i] = [2.5, 1000]
            
            


In [1063]:
poly_dict
covid = []

for station in poly_dict:
    
    covid.append(poly_dict[station][0])

gdf2['covid_19_deaths_per_thousand'] = covid    
    

    

In [1066]:
gdf2.to_csv('stress_death.csv')

In [1240]:
m = folium.Map(location=[lat_center, long_center], zoom_start=11, width=1000, height=800, control_scale=True)
df_list = Percentilizer(df_to_subset = gdf, parameter_to_evaluate = 'covid_19_deaths_per_thousand')
color_list = ['#00ff00', '#80ff00', '#aaff00', '#d5ff00', '#ffff00', '#ffd500', '#ffaa00', '#ff8000', '#ff5500', '#ff0000']
color_list.reverse()

for df, color in zip(df_list, color_list):
    
    PolygonDrawer(data = df, color = color, m = m)

for i,j, name in zip(locations['Latitude'], locations['Longitude'], locations.index):

    folium.CircleMarker(location=[i, j], radius=3, color='white', fill_color='blue') \
    .add_child(folium.Popup(str(name+ '\n' +'Lat :' + str(round(i, 3))+ ('\n') + 'Lon :' + str(round(j,3))))).add_to(m)

In [1241]:
m

In [1125]:
styledbrown = {'fillColor': '#ff0000', 'color': '#000000'}
stylelbrown = {'fillColor': '#ffa500', 'color': '#000000'}
styledarkorange = {'fillColor': '#C97401', 'color': '#000000'}
styleyellow = {'fillColor': '#ffff00', 'color': '#000000'}
stylegreen = {'fillColor': '#00ff00', 'color': '#000000'}


m = folium.Map(location=[lat_center, long_center], zoom_start=11, width = 2000, height = 2000)
folium.GeoJson(data=gdfdbrown["geometry"], style_function=lambda x:styledbrown).add_to(m)
folium.GeoJson(data=gdflbrown["geometry"], style_function=lambda x:styledarkorange).add_to(m)
folium.GeoJson(data=gdfdarkorange["geometry"], style_function=lambda x:stylelbrown).add_to(m)
folium.GeoJson(data=gdfyellow["geometry"], style_function=lambda x:styleyellow).add_to(m)
folium.GeoJson(data=gdfgreen["geometry"], style_function=lambda x:stylegreen).add_to(m)




    




In [802]:
locations.head()
journeys = pd.read_csv('Journey_Matched (1).csv')
tubes.head()



Unnamed: 0.1,Unnamed: 0,Tube Line,From Station,To Station,Distance km
0,0,Bakerloo,baker-street,regents-park,0.803591
1,1,Bakerloo,charing-cross,embankment,0.20487
2,2,Bakerloo,edgware-road-bakerloo,marylebone,0.525181
3,3,Bakerloo,embankment,waterloo,0.782531
4,4,Bakerloo,harlesden,willesden-junction,1.041189


In [1088]:
sample_size = len(journeys)

day_of_the_week = list(journeys['daytype'].sample(sample_size))
period_of_the_day =  list(journeys['Time_category'].sample(sample_size))
station1 = list(tubes['From Station'][:sample_size])
station2 = list(tubes['To Station'][:sample_size])

speed_mean = abs(np.random.normal(loc=50, scale=10, size=sample_size))
speed_std = abs(np.random.normal(loc=10, scale=5, size=sample_size))
speed_var = [i**2 for i in speed_std]
speed_out = abs(np.random.normal(loc=50, scale=100, size=sample_size))
speed_max = abs(np.random.normal(loc=80, scale=20, size=sample_size))
speed_min = abs(np.random.normal(loc=10, scale=20, size=sample_size))

volume_mean = abs(np.random.normal(loc=500, scale=10, size=sample_size))
volume_std = abs(np.random.normal(loc=50, scale=5, size=sample_size))
volume_var = [i**2 for i in speed_std]
volume_out = abs(np.random.normal(loc=800, scale=100, size=sample_size))
volume_max = abs(np.random.normal(loc=1000, scale=20, size=sample_size))
volume_min = abs(np.random.normal(loc=100, scale=20, size=sample_size))

JNYTYP = list(journeys['JNYTYP'].sample(sample_size))
FinalProduct = list(journeys['FinalProduct'].sample(sample_size))
DFare = list(journeys['DFare'].sample(sample_size))

test_df = pd.DataFrame(zip(day_of_the_week, period_of_the_day, station1, station2, speed_mean, speed_std, speed_var, speed_out, speed_max,
                          speed_min, volume_mean, volume_std, volume_var, volume_out, volume_max, volume_min, JNYTYP, FinalProduct, DFare))
test_df.columns = ['day_of_the_week', 'period_of_the_day', 'station1', 'station2', 'speed_mean', 'speed_std', 'speed_var', 'speed_out', 'speed_max',
                  'speed_min', 'volume_mean', 'volume_std', 'volume_var', 'volume_out', 'volume_max', 'volume_min', 'JNYTYP', 'FinalProduct', 'DFare']



In [1089]:
df_map = pd.read_csv('Journey_Matched_formap.csv')
df_map = df_map.drop([0,1])
df_map['journey_avgspeed(km/h)'] = df_map['journey_avgspeed(km/h)'].astype(float)
df_map['steps_list'] = df_map['steps_list'].astype(float)


In [1231]:
df_list = Percentilizer(df_to_subset = df_map, parameter_to_evaluate = 'steps_list')


test_map =folium.Map(location=[lat_center, long_center], zoom_start=11, width=1000, height=800, control_scale=True)

color_list = ['#00ff00', '#80ff00', '#aaff00', '#d5ff00', '#ffff00', '#ffd500', '#ffaa00', '#ff8000', '#ff5500', '#ff0000']
color_list.reverse()

for tube, c in zip(df_list, color_list):

    LineDrawer2(location = locations, tubes = tube, m = test_map, color = c)
    
test_map

## <u> Loading Network and printing shortest/safest path</u>

In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from datetime import datetime
    
import pickle
with open('New Full_Transportation_Network.directed', 'rb') as network_file:
    Full_Transportation_Network = pickle.load(network_file)
# After config_dictionary is read from file
#print(config_dictionary)


In [1138]:
col=['downo', 'daytype','SubSystem','StartStn','EndStation','EntTime','EntTimeHHMM','ExTime','DFare','RouteID','FinalProduct','JourneyLength','Time_category','stations_count','stations_list','journey_dist(km)','journey_dur(hh)','journey_avgspeed(km/h)']
jr_data=pd.read_csv('Journey_Matched_Bystation.csv',usecols=col)

In [14]:
def time_category(time):
    # time is a string
    temptime=''
    timeInt=int(time.split(':')[0])
    if timeInt>=24:
        timeInt=timeInt-24
        temptime=str(timeInt)+':'+time.split(':')[1]
        time=temptime
    
    date_time_str = time
    date_time_obj = datetime.strptime(date_time_str, '%H:%M')
    time_24=datetime.strptime('19:00', '%H:%M')
    time_5=datetime.strptime('5:00', '%H:%M')
    time_8=datetime.strptime('8:00', '%H:%M')
    time_11=datetime.strptime('11:00', '%H:%M')
    time_12=datetime.strptime('12:00', '%H:%M')
    time_15=datetime.strptime('15:00', '%H:%M')
    time_16=datetime.strptime('16:00', '%H:%M')
    time_17=datetime.strptime('17:00', '%H:%M')
    time_19=datetime.strptime('19:00', '%H:%M')
    time_21=datetime.strptime('21:00', '%H:%M')
    time_24=datetime.strptime('00:00', '%H:%M')
    pod=''
    if time_5<date_time_obj<=time_8:
        pod='Early Morning'
    if time_8<date_time_obj<=time_11:
        pod='Morning'
    if time_11<date_time_obj<=time_12:
        pod='Late Morning'
    if time_12<date_time_obj<=time_15:
        pod='Early Afternoon'
    if time_15<date_time_obj<=time_16:
        pod='Afternoon'
    if time_16<date_time_obj<=time_17:
        pod='Late Afternoon'    
    if time_17<date_time_obj<=time_19:
        pod='Early Evening'
    if time_19<date_time_obj<=time_21:
        pod='Evening'
    if time_21<date_time_obj:
        pod='Late Evening'
    if time_24<=date_time_obj<=time_5:
        pod='Mid night'
    return pod

In [15]:
def get_groups(temp,cols,interest):
    
    cols.append(interest)
    interest_Group=temp.groupby(by=cols).count()['downo']
    vals=temp.groupby(by=cols).count()['downo'].values
    group={}
    for i in range(len(interest_Group)):
        #print(temp.groupby(by=cols).count()['downo'].index[i][2], vals[i])
        group[temp.groupby(by=cols).count()['downo'].index[i][2]]=vals[i]
    return group


In [16]:
def get_avg_time_lenght(Group_time):
    avg_time_length={}
    for idx in range(len(Group_time.index)):
        group=Group_time.index[idx]
        avg_time_length[group[2]]=Group_time[idx]
    return avg_time_length

In [17]:
import networkx as nx
import csv

file  = open('tube_matched (1).csv', "r", encoding='utf8')
a=file.readlines()
G=nx.Graph()
for i in a[1:]:
    i=i[:-1]
    i=i.split(',')
    #print(i)
    G.add_edge(i[2],i[3],weight=float(i[4]))

In [18]:
def distance(G, source,target,weight):
    path=nx.shortest_path(G, source=source,target =target, weight=weight)
    distance=0
    for idx in range(len(path)-1):
        source=path[idx]
        target=path[idx+1]
        distance+=G[source][target]['weight']
    return distance,path

In [19]:
stress_death=pd.read_csv('stress_death.csv',index_col='station')
stress_death.drop('Unnamed: 0',axis=1,inplace=True)
stress_death.head()

Unnamed: 0_level_0,stress,latitude,longitude,geometry,covid_19_deaths_per_thousand
station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
acton-central,859,51.508758,-0.26343,POINT (-0.263430199363473 51.5087577801896),0.376553
willesden-junction,3657,51.532234,-0.243909,POINT (-0.2439091432974829 51.5322339634936),0.210504
kensal-green,2578,51.530524,-0.224713,POINT (-0.224713384073025 51.53052421415789),0.225327
queens-park,3900,51.534313,-0.204798,POINT (-0.2047981493867569 51.53431265057009),0.467344
kilburn-high-road,1016,51.537268,-0.191113,POINT (-0.19111310573361 51.5372677411075),0.135722


In [20]:
stationPosition=pd.read_csv('London_stations_matched (1).csv', index_col='Station')

In [1152]:
##Stress csv file that has information about covid death cases must be already calculated 
#Network G that has all distances must be already calculated 

all_finalProduct=list(jr_data.groupby(by='FinalProduct').count()['downo'].index)
all_time_parts=list(jr_data.groupby(by='Time_category').count()['downo'].index)
all_day=list(jr_data.groupby(by='daytype').count()['downo'].index)

Full_Transportation_Network=nx.DiGraph()
for node in Transportation_Network:
    for neighbor in Transportation_Network[node]:
        _,path=distance(G,node,neighbor,'weight')
        #print(path)
        for idx in range(len(path)-1):
            node_from=path[idx]
            node_to=path[idx+1]
            if (node_from,node_to) not in Full_Transportation_Network.edges:
                trip_no=Transportation_Network[node][neighbor]['trip_no']
                subsystem=Transportation_Network[node][neighbor]['subsystem']
                full_jr_time=float(Transportation_Network[node][neighbor]['length'])
                jr_length=full_jr_time/(len(path)-1)
                jr_dist=float(G[node_from][node_to]['weight'])
                #r_stations_count=1
                Covid_weight=stress_death.loc[node_to]['covid_19_deaths_per_thousand']
                jr_avgspeed=float(Transportation_Network[node][neighbor]['avgspeed']/(len(path)-1))
                
                Full_Transportation_Network.add_edge(node_from,node_to,subsystem=subsystem,trip_no=trip_no,
                                                Covid_weight=Covid_weight,length=jr_length, dist=jr_dist,avgspeed=jr_avgspeed)                
                
                # Final Product
                FinalProduct_dist=Transportation_Network[node][neighbor]['FD']
                for FP in all_finalProduct:
                    Full_Transportation_Network[node_from][node_to]['FP_'+FP]=FinalProduct_dist.get(FP,0)
                    
                # Stress per parts of time Category  
                Time_category_dist=Transportation_Network[node][neighbor]['TD']
                for TD in all_time_parts:
                    Full_Transportation_Network[node_from][node_to]['Stress_'+TD]=Time_category_dist.get(TD,0)
                    
                # Average Journey Time per parts of time Category     
                avg_length_per_time=Transportation_Network[node][neighbor]['avg_length_per_time']
                for TD in all_time_parts:
                    Full_Transportation_Network[node_from][node_to]['Avglength_'+TD]=avg_length_per_time.get(TD,full_jr_time)/(len(path)-1)
                
                # Stress per days of the week
                day_dist=Transportation_Network[node][neighbor]['DD']
                for day in all_day:
                    Full_Transportation_Network[node_from][node_to]['DayStress_'+day]=day_dist.get(day,0)
                
                
                '''
                station_stress[node_in_between]={'strees':0, 'latitude':0, 'longitude':0 }
                station_stress[node_in_between]['strees']=Transportation_Network[node][neighbor]['trip_no']
                station_stress[node_in_between]['latitude']=stationPosition.loc[node_in_between]['Latitude']
                station_stress[node_in_between]['longitude']=stationPosition.loc[node_in_between]['Longitude']
                '''
            else:
                full_jr_time=float(Transportation_Network[node][neighbor]['length'])
                jr_length=full_jr_time/(len(path)-1)
                #print(node_from)
                #print(node_to)
                Full_Transportation_Network[node_from][node_to]['trip_no']+=Transportation_Network[node][neighbor]['trip_no']
                # add subsystems
                sbsystems=list(Full_Transportation_Network[node_from][node_to]['subsystem'])+[Transportation_Network[node][neighbor]['subsystem']]
                Full_Transportation_Network[node_from][node_to]['subsystem']=set(sbsystems)
                # length update
                updated_lenght=(Full_Transportation_Network[node_from][node_to]['length']+
                                float(Transportation_Network[node][neighbor]['length']/(len(path)-1)))/2
                Full_Transportation_Network[node_from][node_to]['length']=updated_lenght
                #update average speed
                updated_avg=(Full_Transportation_Network[node_from][node_to]['avgspeed']
                 +float(Transportation_Network[node][neighbor]['avgspeed']/(len(path)-1)))/2
                Full_Transportation_Network[node_from][node_to]['avgspeed']=updated_avg
                
                
                # update findal products
                FinalProduct_dist=Transportation_Network[node][neighbor]['FD']
                for FP in all_finalProduct:
                    Full_Transportation_Network[node_from][node_to]['FP_'+FP]+=FinalProduct_dist.get(FP,0)
                
                # Stress per parts of time Category  
                Time_category_dist=Transportation_Network[node][neighbor]['TD']
                for TD in all_time_parts:
                    Full_Transportation_Network[node_from][node_to]['Stress_'+TD]+=Time_category_dist.get(TD,0)
                    
                # Average Journey Time per parts of time Category     
                avg_length_per_time=Transportation_Network[node][neighbor]['avg_length_per_time']
                for TD in all_time_parts:
                    Full_Transportation_Network[node_from][node_to]['Avglength_'+TD]=(Full_Transportation_Network[node_from][node_to]['Avglength_'+TD]+(avg_length_per_time.get(TD,full_jr_time)/(len(path)-1)))/2
                    
                # Stress per days of the week
                day_dist=Transportation_Network[node][neighbor]['DD']
                for day in all_day:
                    Full_Transportation_Network[node_from][node_to]['DayStress_'+day]+=day_dist.get(day,0)
# the rest of the code is in the next cell                

In [1153]:
# the following code is to add Covid_stress_Lenght Function and setting latitude and longitude
for edge in list(Full_Transportation_Network.edges):
    node_from=edge[0]
    node_to=edge[1]
    for time in all_time_parts:
        length=Full_Transportation_Network[node_from][node_to]['Avglength_'+time]
        Stress=Full_Transportation_Network[node_from][node_to]['Stress_'+time]
        Covid_weight=Full_Transportation_Network[node_from][node_to]['Covid_weight']
        Covid_risk=Stress*Covid_weight+1
        Full_Transportation_Network[node_from][node_to]['Covid_risk_'+time]=Covid_risk
        Full_Transportation_Network[node_from][node_to]['risk_lenght_'+time]=length*Covid_risk
nodes={}
for i in Full_Transportation_Network:
    positions={'latitude':0,"longitude":0}
    positions['latitude']=stationPosition.loc[i]['Latitude']
    positions['longitude']=stationPosition.loc[i]['Longitude']
    nodes[i]=positions
for node in nodes:
    Full_Transportation_Network.nodes[node]['latitude'] = nodes[node]['latitude']
    Full_Transportation_Network.nodes[node]['longitude'] = nodes[node]['longitude']

### Shortest and safest path

In [21]:
def Shortest_path(G, Place_from,Place_To,time):
     
    #time: as string in 24 hour setting. Like 14:54
    #Computing based on risk_length function
    #In the coordinate, the left is 'latitude' and the right is 'longitude'
    Coordinate_low_risk={}
    Coordinate_shortest_path={}
    df_safe_path=pd.DataFrame(columns=['station1', 'station2'])
    df_shortest_path=pd.DataFrame(columns=['station1', 'station2'])
    timecat=time_category(time)
    weight='risk_lenght_'+timecat
    #weight='Covid_risk_'+timecat
    path_risk=nx.shortest_path(G, source=Place_from,target =Place_To, weight=weight)
    distance=0
    timelength=0
    Avgrisk=0
    source_list=[]
    target_list=[]
    risk=[]
    for idx in range(len(path_risk)-1):
        source=path_risk[idx]
        #Coordinate_low_risk[source]=(stress_death.loc[source]['latitude'],stress_death.loc[source]['longitude'])
        target=path_risk[idx+1]
        source_list.append(source)
        target_list.append(target)
        distance+=G[source][target]['dist']
        timelength+=G[source][target]['Avglength_'+timecat]
        risk.append(G[source][target]['Covid_risk_'+timecat])
        Avgrisk+=G[source][target]['Covid_risk_'+timecat]
        
    #Coordinate_low_risk[path_risk[-1]]=(stress_death.loc[path_risk[-1]]['latitude'],stress_death.loc[path_risk[-1]]['longitude'])
    Avgrisk=Avgrisk/(len(path_risk)-1)
    df_safe_path['station1']=source_list
    df_safe_path['station2']=target_list
    
    print('Safe and Short Route between', Place_from, 'and', Place_To, 'at',timecat+'.')
    print('\tRoute:', path_risk)
    print('\tDistance:', np.round(distance,3))
    print('\tTime Length:', np.round(timelength,3))
    print('\tAverage Contamination Risk:', np.round(Avgrisk,3))
    
    
    
    #Computing based on shortest_path length (Time length)
    weight='Avglength_'+timecat
    shortest_path=nx.shortest_path(G, source=Place_from,target =Place_To, weight=weight)
    Sdistance=0
    Stimelength=0
    SAvgrisk=0
    source_list=[]
    target_list=[]
    for idx in range(len(shortest_path)-1):
        source=shortest_path[idx]
        #Coordinate_shortest_path[source]=(stress_death.loc[source]['latitude'],stress_death.loc[source]['longitude'])
        target=shortest_path[idx+1]
        source_list.append(source)
        target_list.append(target)
        Sdistance+=G[source][target]['dist']
        Stimelength+=G[source][target]['Avglength_'+timecat]
        SAvgrisk+=G[source][target]['Covid_risk_'+timecat]
    #Coordinate_shortest_path[source]=(stress_death.loc[path_risk[-1]]['latitude'],stress_death.loc[path_risk[-1]]['longitude'])
    SAvgrisk=SAvgrisk/(len(shortest_path)-1)
    df_shortest_path['station1']=source_list
    df_shortest_path['station2']=target_list
    print('Shortest Route between', Place_from, 'and', Place_To, 'at',timecat+'.')
    print('\tRoute:', shortest_path)
    print('\tDistance:', np.round(Sdistance,3))
    print('\tTime Length:', np.round(Stimelength,3))
    print('\tAverage Contamination Risk:', np.round(SAvgrisk,3))
    
    details={}
    details['Low_risk_path']={'Distance':distance, 'Time_lenght':timelength, 'Average_Risk':Avgrisk, 'pairs' :df_safe_path}
    details['Shortest_path']={'Distance':Sdistance, 'Time_lenght':Stimelength, 'Average_Risk':SAvgrisk, 'pairs' :df_shortest_path}
    return details

In [22]:
details=Shortest_path(Full_Transportation_Network, 'oxford-circus','finchley-central','16:35')

Safe and Short Route between oxford-circus and finchley-central at Late Afternoon.
	Route: ['oxford-circus', 'tottenham-court-road', 'farringdon', 'st-pancras', 'kentish-town', 'tufnell-park', 'archway', 'highgate', 'east-finchley', 'finchley-central']
	Distance: 14.247
	Time Length: 43.238
	Average Contamination Risk: 353.851
Shortest Route between oxford-circus and finchley-central at Late Afternoon.
	Route: ['oxford-circus', 'tottenham-court-road', 'farringdon', 'st-pancras', 'kentish-town', 'tufnell-park', 'archway', 'highgate', 'east-finchley', 'finchley-central']
	Distance: 14.247
	Time Length: 43.238
	Average Contamination Risk: 353.851


In [23]:
details['Shortest_path']['pairs']

Unnamed: 0,station1,station2
0,oxford-circus,tottenham-court-road
1,tottenham-court-road,farringdon
2,farringdon,st-pancras
3,st-pancras,kentish-town
4,kentish-town,tufnell-park
5,tufnell-park,archway
6,archway,highgate
7,highgate,east-finchley
8,east-finchley,finchley-central


### <u>Showing the map</u>

In [61]:
#'green-park','st-johns-wood',':00'
details=Shortest_path(Full_Transportation_Network, 'green-park','st-johns-wood','21:00')

safe = details['Low_risk_path']['pairs']
shortest =details['Shortest_path']['pairs']

locations.loc['st-johns-wood']

Safe and Short Route between green-park and st-johns-wood at Evening.
	Route: ['green-park', 'oxford-circus', 'regents-park', 'baker-street', 'st-johns-wood']
	Distance: 4.52
	Time Length: 14.596
	Average Contamination Risk: 204.011
Shortest Route between green-park and st-johns-wood at Evening.
	Route: ['green-park', 'bond-street', 'baker-street', 'st-johns-wood']
	Distance: 3.844
	Time Length: 12.107
	Average Contamination Risk: 439.714


OS X           526742
OS Y           183352
Latitude      51.5349
Longitude   -0.174065
Zone                2
Postcode      NW8 6DN
Name: st-johns-wood, dtype: object

In [64]:
safe_shortest_map = folium.Map(location=[51, 0.8], zoom_start=10, width = 2000, height = 2000)

color_list = ['#00ff00', '#80ff00', '#aaff00', '#d5ff00', '#ffff00', '#ffd500', '#ffaa00', '#ff8000', '#ff5500', '#ff0000']
color_list.reverse()
df_list = Percentilizer(df_to_subset = gdf, parameter_to_evaluate = 'covid_19_deaths_per_thousand')


for df, color in zip(alldata, color_list):
    

    LineDrawer2(location = locations, tubes = df, m = safe_shortest_map, color = color)
    
    #PointDrawer(location = locations, tubes = df, m = safe_shortest_map, color = 'blue')


   # LineDrawer2(location = locations, tubes = shortest, m = safe_shortest_map, color = '#ff0000')

    #PointDrawer(location = locations, tubes = shortest, m = safe_shortest_map, color = 'blue')




folium.CircleMarker(location = [51.5068, -0.142478], radius=10, color= 'blue').add_to(safe_shortest_map)
folium.CircleMarker(location = [51.5349, -0.174065], radius=10, color= 'blue').add_to(safe_shortest_map)
safe_shortest_map

In [44]:
import pickle
with open('path.directed', 'rb') as network_file:

    alldata = pickle.load(network_file)
    
alldata[0]

Unnamed: 0,station1,station2,stress
0,green-park,oxford-circus,354
1,oxford-circus,regents-park,354
2,regents-park,baker-street,354
3,baker-street,st-johns-wood,354


In [45]:
safe_shortest_map = folium.Map(location=[51, 0.8], zoom_start=10, width = 2000, height = 2000)

color_list = ['#00ff00', '#80ff00', '#aaff00', '#d5ff00', '#ffff00', '#ffd500', '#ffaa00', '#ff8000', '#ff5500', '#ff0000']
color_list.reverse()
#df_list = Percentilizer(df_to_subset = , parameter_to_evaluate = 'covid_19_deaths_per_thousand')


for df, color in zip(alldata, color_list):
    
    PolygonDrawer(data = df, color = color, m = safe_shortest_map)


LineDrawer2(location = locations, tubes = safe, m = safe_shortest_map, color = '#00ff00')

PointDrawer(location = locations, tubes = safe, m = safe_shortest_map, color = 'blue')


LineDrawer2(location = locations, tubes = shortest, m = safe_shortest_map, color = '#ff0000')

PointDrawer(location = locations, tubes = shortest, m = safe_shortest_map, color = 'blue')





safe_shortest_map

KeyError: 'geometry'