# Step 4 - Preprocessing bicycle count data
## Project: Data-driven bicycle networks

This notebook is embedding bicycle counts onto the street network of Copenhagen

## Preliminaries

### Parameters

In [1]:
debug = False # If True, will produce plots and/or verbose output to double-check
%run -i "../parameters/parameters.py"

Loaded parameters.



### Setup

In [None]:
%run -i path.py
#%run -i setup.py
%run -i setupCPH.py

%load_ext watermark
%watermark -n -v -m -g -iv

### Functions

In [3]:
%run -i functions.py

Loaded functions.



### Load street network graph from Copenhagen

In [None]:
G_carall = ox.load_graphml('../../bikenwgrowth_external/data/copenhagen/bikedata/G_carall_graphml.graphml')

## Embedding bicycle count data into the copenhagen street network graph

### Load bicycle counts and assign to nearest edges in the Copenhagen street network

In [None]:
count_df = pd.read_csv("../../bikenwgrowth_external/data/copenhagen/bikedata/bicycle_counts.csv")
count_df = count_df[count_df.year ==2019]

bcount_unassigned = count_df['bicycle_count'].mean()

nx.set_edge_attributes(G_carall, 0.0, 'bcount')

for i in range(len(count_df)):
    ne = ox.distance.get_nearest_edge(G_carall, [count_df.iloc[i].lat,count_df.iloc[i].long])
    nx.set_edge_attributes(G_carall, {ne: {'bcount': count_df.iloc[i].bicycle_count }})
    
bcount_dict = nx.get_edge_attributes(G_carall, "bcount")

ox_to_csv(G_carall, PATH["data"] + 'copenhagen' + "/", 'copenhagen', 'carall')
ox_to_csv(ox.simplify_graph(G_carall), PATH["data"] + 'copenhagen' + "/", 'copenhagen', 'carall', "_simplified")

### Apply generalisations for missing edge weights

In [None]:
with zipfile.ZipFile("../../bikenwgrowth_external/data/copenhagen/copenhagen_carall_edges.zip", 'r') as zip_ref:
    zip_ref.extractall("../../bikenwgrowth_external/data/copenhagen/")

edges = pd.read_csv("../../bikenwgrowth_external/data/copenhagen/copenhagen_carall_edges.csv")
#We take only assigned edges
edges1 = edges[edges.bcount !=0.0]
#we remove unnecessery data
edges2 = edges1
#we apply the mean of bikecounts for all assigned streets, by grouping on the street name and calculating the mean.
edges3 = edges2.groupby('name')['bcount'].mean().to_frame()
#renaming this column
edges3 = edges3.rename(columns={"bcount": "mean_group_bcount"})
#the initial edges are joined with the new generalised counts
result = pd.merge(edges,edges3,on='name',how = 'left')
result.to_csv("../../bikenwgrowth_external/data/copenhagen/copenhagen_carall_edges.csv")
compress_file("../../bikenwgrowth_external/data/copenhagen/","copenhagen_carall_edges")

result2 = result

#we apply the 'bcount_attr' the the result2 dataframe, representing all edges in the carall network.
# for edges that 'still' do not have a bikecount, a standard value is assigned 'bcount_attr_unassigned',
# otherwise the new mean fraction of bikecounts for each edge, is multiplied with the length of this edge.
# For now we lost the original fraction of bikecounts for assigned edges, but we will add them back in next step.
for i in range(len(result2)):
    num = result2['mean_group_bcount'].iloc[i]
    #if num is NaN
    if num != num:
        result2['mean_group_bcount'].iloc[i]= bcount_unassigned
    else:
        result2['mean_group_bcount'].iloc[i]= num 

# The new results are added to the carall network,
i=-1
for e in G_carall.edges():
    i+=1
    a,b=e
    old_bcount = G_carall.get_edge_data(a,b,0)['bcount']
    # if the edge is initially unassigned in the network we check if a generalised mean value of the whole street, 
    #belonging to this street exist, otherwise a standard value is given. If an edge is initially assigned a bikecount
    # we assign it the bcount_attr again, represented as edgelength multiplied by the fraction of bikecounts.
    if old_bcount ==0.0:
        if  result2['mean_group_bcount'].iloc[i] == bcount_unassigned:
            nx.set_edge_attributes(G_carall, {(a,b,0): {"bcount": bcount_unassigned }})
        else:
            mean_group_count = result2['mean_group_bcount'].iloc[i]
            nx.set_edge_attributes(G_carall, {(a,b,0): {"bcount": mean_group_count }})
    else:
        nx.set_edge_attributes(G_carall, {(a,b,0): {"bcount": old_bcount }})

bcount_dict = nx.get_edge_attributes(G_carall, "bcount")
bcount_mean = statistics.mean([float(k) for k in bcount_dict.values()])

with open('../../bikenwgrowth_external/data/copenhagen/bikedata/mean_bcount.pkl', 'wb') as f:
    pickle.dump(bcount_mean, f)


ox_to_csv(G_carall, PATH["data"] + 'copenhagen' + "/", 'copenhagen', 'carall')
ox_to_csv(ox.simplify_graph(G_carall), PATH["data"] + 'copenhagen' + "/", 'copenhagen', 'carall', "_simplified")
ox.save_graphml(G_carall, "../../bikenwgrowth_external/data/copenhagen/bikedata/G_carall_graphml.graphml")

with open('../../bikenwgrowth_external/data/copenhagen/bikedata/edges_dict_bcount.pkl', 'wb') as f:
    pickle.dump(bcount_dict, f)