# Ingest and Clean Data
## MSDS 452 Final Project -- Sarah Rodenbeck & Mike Soukup

### Import Necessary Libraries

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import networkx as nx
import pygraphviz
from networkx.drawing.nx_agraph import graphviz_layout
import holoviews as hv
import hvplot.networkx as hvnx
import matplotlib.pyplot as plt
pd.options.display.max_columns = None

### Streamlined hvnx_plot function

In [220]:
def hvnx_plot(G,title,pos,nodelabels=0,edge_color = 'lightgrey'):
    in_neighbors={} 
    out_neighbors={} 
    for n in G.nodes():
        pre=list(G.predecessors(n))
        post=list(G.successors(n))
        if type(n)==int:            
            in_neighbors[n]=", ".join(sorted([str(x) for x in pre]))
            out_neighbors[n]=", ".join(sorted([str(x) for x in post]))
        else:
            in_neighbors[n]=", ".join(sorted([x for x in pre]))
            out_neighbors[n]=", ".join(sorted([x for x in post]))
        G.nodes[n]['in-degree'] = G.in_degree(n)
        G.nodes[n]['in_neighbors'] = in_neighbors[n] 
        G.nodes[n]['out-degree'] = G.out_degree(n)
        G.nodes[n]['out_neighbors'] = out_neighbors[n] 
    graph = hvnx.draw(G, pos,node_color='maroon',edge_color=edge_color,edge_cmap = plt.cm.RdYlGn_r)
    graph.opts(title=title,edge_hover_line_color='green',edge_selection_color='green',node_hover_fill_color='red',node_selection_color='red',arrowhead_length=0.001,node_size = 1+hv.dim('in-degree'),width=1000,height=1500)
    if nodelabels==1:
        labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
        graph=(graph * labels.opts(xoffset=0, yoffset=-5,text_color='lightgrey'))
        return graph
    else:
        return graph


### Read in data and create graph

In [221]:
net = pd.read_csv('RawData/ChicagoSketch_net.tntp', skiprows=8, sep='\t')
net.columns = [s.strip().lower() for s in net.columns]
net.drop(['~', ';'], axis=1, inplace=True)

flow = pd.read_csv('RawData/ChicagoSketch_flow.tntp', sep='\t')
flow.columns = [s.strip().lower() for s in flow.columns]

net = pd.merge(net,flow,'inner',left_on=['init_node','term_node'],right_on=['from','to'])

G = nx.from_pandas_edgelist(net, 'init_node', 'term_node', ['capacity','length','free_flow_time','b','power','speed','toll','link_type','volume','cost'],create_using=nx.DiGraph)


### Initial plot of graph

In [222]:
hvnx_plot(G,pos = graphviz_layout(G),title="Chicago Network")

### Spatially plotted network with edge color gradient for volume

In [223]:
node_pos = pd.read_csv('RawData/ChicagoSketch_node.tntp', sep='\t')
node_pos['coords'] = list(zip(node_pos['X'],node_pos['Y']))
node_pos = node_pos[['node','coords']]
xy = node_pos.set_index('node').to_dict()['coords']

In [224]:
weight = []
for e in list(G.edges()):
    start,end = e
    weight.append(G[start][end]['volume'])
hvnx_plot(G,pos = xy,title="Chicago Network",edge_color=weight)

### Explore attributes of network

In [229]:
net.describe()

Unnamed: 0,init_node,term_node,capacity,length,free_flow_time,b,power,speed,toll,link_type,from,to,volume,cost
count,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0
mean,608.398644,608.398644,15836.610169,2.778227,3.38259,0.15,4.0,0.0,0.0,1.646102,608.398644,608.398644,2399.298662,3.666383
std,217.19414,217.19414,20200.472433,2.42286,2.969652,7.273194e-15,0.0,0.0,0.0,0.868133,217.19414,217.19414,2661.518472,3.116103
min,1.0,1.0,500.0,0.061,0.0,0.15,4.0,0.0,0.0,1.0,1.0,1.0,0.0,0.034507
25%,480.0,480.0,2500.0,0.86267,0.0,0.15,4.0,0.0,0.0,1.0,480.0,480.0,488.845,0.034507
50%,635.0,635.0,5000.0,2.74568,3.6,0.15,4.0,0.0,0.0,1.0,635.0,635.0,1507.921298,3.910887
75%,781.0,781.0,49500.0,3.189982,4.9375,0.15,4.0,0.0,0.0,3.0,781.0,781.0,3470.007065,5.427562
max,933.0,933.0,49500.0,38.3558,24.92,0.15,4.0,0.0,0.0,3.0,933.0,933.0,22380.62,25.755935
