# Define number of crashes and trips per link

In this notebook:

(1) The trips passing through each node of the biketrackcarall network are counted.

(2) The number of crashes and trips passing through each potential link is counted.

(3) The number of crashes and trips passing through the links of the existing infrastructure is counted.

## Parameters

In [None]:
%run -i "../parameters/parameters.py"

## Setup

In [None]:
%run -i path.py
%run -i setup.py

%load_ext watermark
%watermark -n -v -m -g -iv

## Functions

In [None]:
%run -i functions.py

## Import the data

In [None]:
# Load networks

G_biketrackcarall_OX = csv_to_ox(PATH["data"] + placeid + "/", placeid, 'biketrackcarall')
G_biketrackcarall_ALL = csv_to_ig(PATH["data"] + placeid + "/", placeid, 'biketrackcarall')
G_biketrackcarall = csv_to_ig(PATH["data"] + placeid + "/", placeid, 'biketrackcarall')
G_biketrackcarall_simplified = csv_to_ig(PATH["data"] + placeid + "/", placeid, 'biketrackcarall_simplified')
G_biketrack = csv_to_ig(PATH["data"] + placeid + "/", placeid, 'biketrack')
G_biketracksimplified = csv_to_ig(PATH["data"] + placeid + "/", placeid, 'biketrack_simplified')

In [None]:
#load OD data

OD = pd.read_csv(PATH["data"] + placeid + "/" + "OD_data.csv")
OD = OD.drop(['Unnamed: 0'], axis = 1)

#load 2019 crash data

accidents_softmobility = pd.read_csv(PATH["data"] + placeid + "/" +'accidents_softmobility2019.csv')


## (1) Create a df with the number of trips passing through each node of the biketrackcarall network

### Get shortest paths

In [None]:
# choose "howmany" OD data randomly and get the shortest path on the network

howmany = 30000 # number of trips requested. Make sure you exclude at least M trips (e.g. 500 trips) to evaluate the trip coverage on unseen trips
s = 10 # random seed to order randomly the trips. Make sure you provided the same random seed to the function calculate_tripscovered()

ids = list(np.arange(len(OD)))

random.seed(s)
random.shuffle(ids)
selected_ids = ids[:howmany]

routenodepairs = list()

for j,i in enumerate(selected_ids):
    
    print(j+1,'/',len(selected_ids))
    nO=ox.distance.get_nearest_node(G_biketrackcarall_OX, [OD['O_lat'].iloc[i], OD['O_lon'].iloc[i]])
    nD=ox.distance.get_nearest_node(G_biketrackcarall_OX, [OD['D_lat'].iloc[i], OD['D_lon'].iloc[i]])
    routenodepairs.append((nO,nD))
    clear_output(wait=True)

### Snap the shortest paths on the road network
The following cell create a network based on the "biketrackcarall" network, selecting all the edges crossed at least once by a trip.

In [None]:
tot_e = []
bike_e = []
GT_indices = set()
GT_indices_list = list()
for i,poipair in enumerate(routenodepairs):
    
    print(i+1,'/',len(routenodepairs))
    poipair_ind = (G_biketrackcarall_ALL.vs.find(id = poipair[0]).index, G_biketrackcarall_ALL.vs.find(id = poipair[1]).index)
    sp = set(G_biketrackcarall_ALL.get_shortest_paths(poipair_ind[0], poipair_ind[1], weights = "weight", output = "vpath")[0])
    GT_indices = GT_indices.union(sp)
    for n in sp:
        GT_indices_list.append(n)
    clear_output(wait=True)
    
GT = G_biketrackcarall_ALL.induced_subgraph(GT_indices)    
    
tot_e.append(len(GT.es))

### snap the shortest paths on the road network and weight the network with the frequency of usage of links
The following cell create a network based on the "biketrackcarall" network, selecting all the edges crossed at least once by a trip. It also adds an edge weight labeled "n_trips" with the number of trips that cross the edge.

In [None]:
tot_e = []
bike_e = []

used_links = {}

for i,poipair in enumerate(routenodepairs):
    GT_indices = set()
    print(i+1,'/',len(routenodepairs))
    poipair_ind = (G_biketrackcarall.vs.find(id = poipair[0]).index, G_biketrackcarall.vs.find(id = poipair[1]).index)
    sp = set(G_biketrackcarall.get_shortest_paths(poipair_ind[0], poipair_ind[1], weights = "weight", output = "epath")[0])
    for n in sp:
        if str(n) not in list(used_links.keys()):
            used_links[str(n)] = 1
        else:
            used_links[str(n)] += 1
        
        G_biketrackcarall.es[n]['n_trips'] = used_links[str(n)]
            
    clear_output(wait=True)

for c,e in enumerate(G_biketrackcarall.es):
    if e['n_trips'] == None:
        e['n_trips'] = 0

In [None]:
# calculate how many trips pass through each node

trips_nodes = {}

for vv in G_biketrackcarall.vs:
    trips_nodes[vv['id']] = {'x': vv['x'], 'y': -vv['y'], 'trips': 0}

for i,poipair in enumerate(routenodepairs):
    GT_indices = set()
    print(i+1,'/',len(routenodepairs))
    poipair_ind = (G_biketrackcarall.vs.find(id = poipair[0]).index, G_biketrackcarall.vs.find(id = poipair[1]).index)
    sp = set(G_biketrackcarall.get_shortest_paths(poipair_ind[0], poipair_ind[1], weights = "weight", output = "vpath")[0])
    
    for n in sp:
        trips_nodes[G_biketrackcarall.vs[n]['id']]['trips'] += 1
    
    clear_output(wait=True)

In [None]:
# save in csv file the trips per node

node_ids = []
lat = []
lon = []
trips = []

for i,t in enumerate(trips_nodes):
    print(i+1, '/',len(trips_nodes))
    node_ids.append(t)
    lat.append(trips_nodes[t]['y'])
    lon.append(trips_nodes[t]['x'])
    trips.append(trips_nodes[t]['trips'])
    clear_output(wait=True)
    

data = {'ids' : node_ids, 'lat' : lat, 'lon' : lon, 'trips' : trips}    
trips_per_node_df = pd.DataFrame(data)

trips_per_node_df.to_csv(PATH['data'] + placeid + "/" +'/trips_per_node-seed10.csv')

## (2) Count the number of crashes and trips passing through each potential link

In [None]:
# Load seed points

delta = 300

with open(PATH["data"] + placeid + "/" + placeid + '_seed-points_delta' + str(delta) + '.csv') as f:
    nnids = [int(line.rstrip()) for line in f]
    
len(nnids)

In [None]:
#calculate distance between each pair of seed points

poipairs = poipairs_by_distance(G_biketrackcarall_ALL, nnids,True)

print(len(poipairs))

### Calculate the number of crashes for each routed link

In [None]:
start = time.time()
poipairs_list = list()
poipairs_dict = dict()

i = 1
middle_start = time.time()

# iterate over all poipairs (each poipair has a link in GT_abstract)
for poipair, poipair_distance in poipairs:
    
    print(i,'/',len(poipairs))
    print('Time last poipair: ', np.round((time.time()-middle_start),2), ' s')
    print('Tot time: ', np.round((time.time()-start)/60,2),' minutes')
    text_file = open("Accidents&Trips.txt", "w")
    n = text_file.write(str(i)+'/'+str(len(poipairs))+' - Time last poipair: '+str(np.round((time.time()-middle_start),2))+ ' s' + ' - Tot time: '+str(np.round((time.time()-start)/60,2)) + ' minutes')
    text_file.close()
    middle_start = time.time()
    
    acc_counter = 0
   
    
    GT_indices = set()
        
    poipair_ind = (G_biketrackcarall_ALL.vs.find(id = poipair[0]).index, G_biketrackcarall_ALL.vs.find(id = poipair[1]).index)
    sp = set(G_biketrackcarall_ALL.get_shortest_paths(poipair_ind[0], poipair_ind[1], weights = "weight", output = "vpath")[0])
    GT_indices = GT_indices.union(sp)
    
    G_temp = G_biketrackcarall_ALL.induced_subgraph(GT_indices)
    
    
    # compute the cover area of the link (with a buffer = 50 m)
    covered_area,cov = calculate_coverage_edges(G_temp, buffer_m = 50, return_cov = True)
    
    # count how many accidents are located in the covered area
    for k in range(len(accidents_softmobility)):
        if Point(accidents_softmobility.iloc[k]['longitude'],-accidents_softmobility.iloc[k]['latitude']).within(cov):
            acc_counter += 1
    
    # add to a list poipairs, how many accidents the link covers
    poipairs_list.append([poipair,poipair_distance,acc_counter])
    
    # add to a dict poipairs (as key), how many accidents the link covers
    poipairs_dict[str(poipair[0])+','+str(poipair[1])] = acc_counter
    
    i += 1
    clear_output(wait=True)
    
    
end = time.time()
print('Total time: ', np.round((end-start)/60,2),' minutes')

Save the accidents per link data

In [None]:
mp = []
ma = []
for k in poipairs_dict.keys():
    ma.append(poipairs_dict[k])
    mp.append(k)

data = {'poipairs' : mp, 'accidents' : ma}    
df = pd.DataFrame(data)

path_file = PATH["data"] + placeid + "/" + placeid + '_ONLYaccidentsrouting_counter' + str(poi_distance) + '.csv'
df.to_csv(path_file)

### Calculate the number of trips per link
To do this, we consider the nodes belonging to a given trip and we sum the number of trips passing through these nodes

In [None]:
# define a dict using the dataframe 'trips_per_node_df'
trips_per_node_dict = {}

for i in range(len(trips_per_node_df)):
    print(i+1,'/',len(trips_per_node_df))
    trips_per_node_dict[str(int(trips_per_node_df.iloc[i]['ids']))] = trips_per_node_df.iloc[i]['trips']
    clear_output(wait=True)
trips_per_node_dict

For each trip, count the number of trips passing through the nodes of the trip

In [None]:
poipairs_dict = {}
c = 1
for poipair, poipair_distance in poipairs:
    
    print(c,'/',len(poipairs))
    
    poipair_ind = (G_biketrackcarall_ALL.vs.find(id = poipair[0]).index, G_biketrackcarall_ALL.vs.find(id = poipair[1]).index)
    sp = set(G_biketrackcarall_ALL.get_shortest_paths(poipair_ind[0], poipair_ind[1], weights = "weight", output = "vpath")[0])
    counter = 0
    for n in sp:
        counter = counter + trips_per_node_dict[str(G_biketrackcarall_ALL.vs[n]['id'])]
    poipairs_dict[str(poipair[0])+','+str(poipair[1])] = counter
    
    c += 1
    
    clear_output(wait=True)


### Save the trips per link data

In [None]:
mp = []
mt = []
for k in poipairs_dict.keys():
    mt.append(poipairs_dict[k])
    mp.append(k)

data = {'poipairs' : mp, 'trips' : mt}    
df = pd.DataFrame(data)

path_file = PATH["data"] + placeid + "/" + placeid + '_ONLYtrips_counter' + str(delta) + '.csv'

df.to_csv(path_file)

## (3) Count the number of crashes and trips passing through the links of the existing infrastructure

In [None]:
# CRASHES

# calculate the number of CRASHES for each link

start = time.time()
poipairs_list = list()
poipairs_dict = dict()

i = 0
middle_start = time.time()

# iterate over all poipairs (each poipair has a link in GT_abstract)
for e in G_biketrack.es:
    
    print(i,'/',len(G_biketrack.es))
    print('Time last poipair: ', np.round((time.time()-middle_start),2), ' s')
    print('Tot time: ', np.round((time.time()-start)/60,2),' minutes')
    text_file = open("Accidents&Trips.txt", "w")
    n = text_file.write(str(i)+'/'+str(len(poipairs))+' - Time last poipair: '+str(np.round((time.time()-middle_start),2))+ ' s' + ' - Tot time: '+str(np.round((time.time()-start)/60,2)) + ' minutes')
    text_file.close()
    middle_start = time.time()
    
    acc_counter = 0
    
    
    G_temp = copy.deepcopy(G_biketrack)
    
    G_temp.es.delete()

    G_temp.add_edge(e.source,e.target,weight=e['weight'],osmid = e['osmid'])
  
    
    
    # compute the cover area of the link (with a buffer = 50 m)
    covered_area,cov = calculate_coverage_edges(G_temp, buffer_m = 50, return_cov = True)
    
    # count how many accidents are located in the covered area
    for k in range(len(accidents_softmobility)):
        if Point(accidents_softmobility.iloc[k]['longitude'],-accidents_softmobility.iloc[k]['latitude']).within(cov):
            acc_counter += 1

    poipairs_dict[str(i)] = acc_counter
    
    i += 1
    clear_output(wait=True)
    
end = time.time()
print('Total time: ', np.round((end-start)/60,2),' minutes')

In [None]:
# save the accidents per link data
mp = []
ma = []
for k in poipairs_dict.keys():
    ma.append(poipairs_dict[k])
    mp.append(k)

data = {'link' : mp, 'accidents' : ma}    
df = pd.DataFrame(data)

path_file = PATH["data"] + placeid + "/" + placeid + + '_ONLYaccidentsrouting_EXISTING_counter.csv'

df.to_csv(path_file)

To count the number of trips through a link of the existing bike network, we sum the number of trips passing through the source and target nodes.

In [None]:
poipairs_dict = {}

for c,e in enumerate(G_biketrack.es):
    
    print(c,'/',len(G_biketrack.es))
    
    n_s = e.source
    n_t = e.target
    
    poipairs_dict[str(c)] = trips_per_node_dict[str(G_carall.vs[n_s]['id'])] + trips_per_node_dict[str(G_carall.vs[n_t]['id'])]
    

    clear_output(wait=True)

In [None]:
# save the accidents per link data
mp = []
mt = []
for k in poipairs_dict.keys():
    mt.append(poipairs_dict[k])
    mp.append(k)

data = {'link' : mp, 'trips' : mt}    
df = pd.DataFrame(data)

path_file = PATH["data"] + placeid + "/" + placeid + '_ONLYtrips_EXISTING_counter.csv'

df.to_csv(path_file)