# Step 3 - Preprocessing population density data
## Project: Data-driven bicycle networks

This notebook is embedding population density counts onto the street network of Copenhagen

## Preliminaries

### Parameters

In [1]:
debug = False # If True, will produce plots and/or verbose output to double-check
%run -i "../parameters/parameters.py"

Loaded parameters.



### Setup

In [None]:
%run -i path.py
#%run -i setup.py
%run -i setupCPH.py

%load_ext watermark
%watermark -n -v -m -g -iv

### Functions

In [3]:
%run -i functions.py

Loaded functions.



### Load street network graph from Copenhagen

In [None]:
G_carall = ox.load_graphml('../../bikenwgrowth_external/data/copenhagen/bikedata/G_carall_graphml.graphml')

In [None]:
with open("../../bikenwgrowth_external/data/copenhagen/bikedata/networks.pkl", 'rb') as f:
    Gs = pickle.load(f)

## Embedding population density data into the copenhagen street network graph

### Load and filter data

In [None]:
pop_den_df = pd.read_csv("../../bikenwgrowth_external/data/copenhagen/bikedata/dnk_pd_2019_1km_ASCII_XYZ.csv")
pop_den_df.rename(columns = {'X':'long', 'Y':'lat','Z':'pop_den'}, inplace = True)

#pop_den_df holds population densities for whole Denmark, next we filter out only the ones from Copenhagen
CPH = []

for i in range(len(pop_den_df)):
    n = ox.distance.get_nearest_node(G_carall, [pop_den_df.iloc[i].lat,pop_den_df.iloc[i].long])
    if haversine([pop_den_df.iloc[i].lat,pop_den_df.iloc[i].long], (G_carall.nodes[n]["y"], G_carall.nodes[n]["x"]), unit="m") <= snapthreshold:
        CPH.append(i) 
        
pop_den_df = pd.DataFrame([pop_den_df.iloc[i] for i in CPH])

### Assign edges in network graphs the value from the nearest population density coordinate

In [None]:
networktypes = ['biketrack',
 'carall',
 'bikeable',
 'biketrackcarall']

for networktype in networktypes:
    print(networktype)
    i=0
    nDict2 = {}
    for i in range(len(pop_den_df)):
        n = ox.distance.get_nearest_node(Gs[networktype], [pop_den_df.iloc[i].lat,pop_den_df.iloc[i].long])
        if haversine([pop_den_df.iloc[i].lat,pop_den_df.iloc[i].long], (Gs[networktype].nodes[n]["y"], Gs[networktype].nodes[n]["x"]), unit="m") <= snapthreshold:
            nDict2 = {**nDict2, n: pop_den_df.iloc[i].pop_den}
            i+=1
            print(i) 
            
    nx.set_edge_attributes(Gs[networktype], 0.0, 'pop_den')
    
    for e in Gs[networktype].edges():
        a,b=e
        lat = Gs[networktype].nodes[a]["y"]
        long = Gs[networktype].nodes[a]["x"]
        eDistDict2 = {}
    
        for n in nDict2:
            y = Gs[networktype].nodes[n]["y"]
            x = Gs[networktype].nodes[n]["x"]
            dist = haversine([lat,long], (y,x), unit="m")
            eDistDict2 = {**eDistDict2, dist:nDict2[n]}

        minDist2 = min(eDistDict2.keys()) 
        pop_den = eDistDict2[minDist2]
        
        nx.set_edge_attributes(Gs[networktype], {(a,b,0): {"pop_den": pop_den }})
        
    ox_to_csv(Gs[networktype], PATH["data"] + 'copenhagen' + "/", 'copenhagen', networktype)
    ox_to_csv(ox.simplify_graph(Gs[networktype]), PATH["data"] + 'copenhagen' + "/", 'copenhagen', networktype, "_simplified")
    ox.save_graphml(Gs[networktype], "../../bikenwgrowth_external/data/copenhagen/bikedata/G_"+networktype+"_graphml.graphml")
    