In [1]:
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '/Users/rtealwitter/Github/OpenStreets/code')


import data
import models
from itertools import islice
import matplotlib.colors as colors
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt
import time
import networkx as nx
import geopandas as gpd
import pickle
import torch
import numpy as np
import torch.nn.functional as F
import pandas as pd
import copy
import momepy
import math
import json

from xgboost import XGBClassifier    


In [32]:
class Static:
    # Things we only need to load once
    years = ['2013', '2014', '2015']
    weather = data.preprocess_weather(years)
    links = gpd.read_file('data/links.json')
    data_constant = data.prepare_links(links)
    graph = momepy.gdf_to_nx(links, directed=True)
    links_to_edges = {}
    for u,v,_ in graph.edges:
        edges = graph.get_edge_data(u,v)
        for key in edges:
            edge = edges[key]
            if edge['OBJECTID'] not in links_to_edges:
                links_to_edges[edge['OBJECTID']] = []
            links_to_edges[edge['OBJECTID']] += [(u,v)]
    graph = nx.Graph(graph) # convert from multigraph to graph
    dual_graph = pickle.load(open('data/dual_graph.pkl', 'rb'))
    # links and capacity
    link_to_capacity = dict(zip(links['OBJECTID'], links['Number_Tra']))
    link_to_length = dict(zip(links['OBJECTID'], links['SHAPE_Leng']))
    for link in link_to_capacity:
        if link_to_capacity[link] == None: link_to_capacity[link] = 1
        elif math.isnan(float(link_to_capacity[link])): link_to_capacity[link] = 1
        else: link_to_capacity[link] = int(link_to_capacity[link])

    openstreets = gpd.read_file('data/Open_Streets_Locations.csv')

    mask = np.isin(links['SegmentID'], openstreets['segmentidt'])

    osid_indices = list(links[mask]['OBJECTID'])
    
    # LUCAS
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    bst = XGBClassifier(n_estimators=20, max_depth=6, learning_rate=0.3, objective='binary:logistic')
    bst.load_model('saved_models/xgb.json')

    #model = models.ScalableRecurrentGCN(node_features = 127, hidden_dim_sequence=[1024,512,768,256,128,64,64]).to(device)
    #model.load_state_dict(torch.load('saved_models/best_scalable_rgnn.pt', map_location=device))
    #model.eval()
    #for p in model.parameters(): p.requires_grad = False


In [39]:
# Helper functions for the state
def k_shortest_paths(graph, source, target, k):
    return list(
        islice(nx.shortest_simple_paths(graph, source, target, weight='expected_time'), k)
    )

def normalize_weights(weights):
    # Invert, as we want shortest expected time to be most likely
    weights = [(1.0 / weight) for weight in weights]
    # Standard normalization, weights over sum
    sum_weights = sum(weights)
    return [weight / sum_weights for weight in weights]

def redistribute_flow(graph, source, target, flow_day, flow_link, k=5):
    if not nx.has_path(graph, source, target):
        return flow_day, True

    weights, paths = [], []
    for path in k_shortest_paths(graph, source, target, k):
        weight = 0
        for i in range(len(path)-1):
            weight += graph[path[i]][path[i+1]]['expected_time']
        weights.append(weight)
        paths.append(path)

    weights_norm = normalize_weights(weights)
    for path, weight in zip(paths, weights_norm):
        for i in range(len(path)-1):
            current_node = path[i]
            next_node = path[i+1]
            edge = graph.edges[(current_node, next_node)]['OBJECTID']
            flow_day[edge] += weight * flow_link
    
    return flow_day, False

def remove_one_link(remove_this_link, flow_day, graph, k=5):
    # done if no flow in either direction or no path without this edge
    no_path = False
    edges = Static.links_to_edges[remove_this_link] 
    flow_link = flow_day['increasing_order'][remove_this_link] + flow_day['decreasing_order'][remove_this_link]
    no_flow = flow_link == 0
    for u,v in edges:
        if graph.has_edge(u,v):
            graph.remove_edge(u,v)
            for order in ['increasing_order', 'decreasing_order']:
                flow_day_new, no_path_now = redistribute_flow(graph, u,v, flow_day[order], flow_link, k=k)
                flow_day[order][remove_this_link] = flow_day_new
                no_path = no_path or no_path_now

    if no_path: print('no path!')
    if no_flow: print('no flow!')
    return flow_day, graph, no_path or no_flow

def calculate_traffic(remaining_links, flows_day):
    traffic = []
    for link in sorted(remaining_links):
        total_flow = 0
        flow_on_link1 = flows_day['increasing_order'][link]
        flow_on_link2 = flows_day['decreasing_order'][link]
        capacity = Static.link_to_capacity[link]
        length = Static.link_to_length[link]
        # Get density of traffic per lane
        if flow_on_link1 * flow_on_link2 > 0: # assume half traffic lanes in each direction
            total_flow += flow_on_link1 / (capacity / 2 * length) + flow_on_link2 / (capacity / 2 * length)
        elif flow_on_link1 != 0:
            total_flow += flow_on_link1 / (capacity * length)
        elif flow_on_link2 != 0:
            total_flow += flow_on_link2 / (capacity * length)
        traffic += [total_flow]
    return traffic

class State:
    def __init__(self, day, removed_links, remaining_links, flows_month, tradeoff=.5):
        self.tradeoff = tradeoff
        self.day = day
        self.removed_links = removed_links
        self.flows_month = flows_month
        self.remaining_links = [x for x in remaining_links if x not in removed_links]
        # done if flows are 0 or if there is no path without the removed links
        self.flows_day, self.is_done = self.remove_links_from_flows()        
        self.edges = self.remove_links_from_edges().to(Static.device)
        self.node_features = self.remove_links_from_node_features().to(Static.device)
        self.value, self.total_flow, self.total_probability = self.calculate_value()

    def remove_links_from_flows(self):
        # Subset graph to nodes connected to remaining links and removed links
        graph = Static.graph.copy()
        flow_day = self.flows_month[str(self.day)]
        is_done = False
        for remove_this_link in self.removed_links:
            flow_day, graph, is_done_now = remove_one_link(remove_this_link, flow_day, graph)
            is_done = is_done or is_done_now
        flow_day_remaining = {}
        for order in ['increasing_order', 'decreasing_order']:
            flow_day_remaining[order] = {k: v for k, v in flow_day[order].items() if k not in self.removed_links}
        return flow_day_remaining, is_done
        
    def remove_links_from_edges(self): 
        # We could use from torch_geometric.utils.convert import from_networkx
        # to convert the graph to a torch_geometric.data.Data object
        # The problem is that it doesn't preserve the node order so we'd need to
        # add the data to the networkx graph and
        # the best way seems like using set_node_attributes which takes a dictionary
        # and turning pandas dataframe into a dictionary takes way longer than relabeling
        dual_graph = Static.dual_graph.subgraph(self.remaining_links).copy()
        assert 0 not in dual_graph.nodes # check we're not already relabeled
        dual_graph = nx.convert_node_labels_to_integers(dual_graph, ordering='sorted')
        assert 0 in dual_graph.nodes # check the relabeling worked        
        return torch.tensor(np.array(list(dual_graph.edges))).long().T

    def remove_links_from_node_features(self):
        data_constant = Static.data_constant[Static.data_constant['OBJECTID'].isin(self.remaining_links)]
        X = data.get_X_day(data_constant, Static.weather, self.flows_day, self.day)
        return torch.tensor(X.values).float().unsqueeze(0)
    
    def calculate_value(self):
        # get total flow
        traffic = calculate_traffic(self.remaining_links, self.flows_day)
#        print('traffic sum:', sum(traffic))
        total_flow = sum(traffic) / 1754308 * 1000 # normalize from random day
        # get total probability of collision
        probabilities = get_probabilities(self.node_features, self.edges)
        if hasattr(Static, 'model'):
            total_probability = probabilities.sum().item() / 7000 * 1000 # normalize from random day
        elif hasattr(Static, 'bst'):
            total_probability = probabilities.sum() / 6653 * 1000 # normalize from random day
        print('traffic', total_flow)
        print('probability', total_probability)
        return (1-self.tradeoff) * total_flow + self.tradeoff * total_probability, total_flow, total_probability

def subset_flows(flows_month, remaining_links):
    set_remaining_links = set(remaining_links)
    flows_month_new = {}
    for day in flows_month:
        flows_month_new[day] = {}
        for order in ['increasing_order', 'decreasing_order']: 
            flows_month_new[day][order] = {k: v for k, v in flows_month[day][order].items() if k in set_remaining_links}
    return flows_month_new

def new_state(years = ['2013', '2014', '2015'],
              months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11']):
    year, month = np.random.choice(years, 1)[0], np.random.choice(months, 1)[0]
    remaining_links = list(Static.links['OBJECTID'])
    day = f'{year}-{month}-01'
    flows_month = pickle.load(open(f'flows/flow_{year}_{month}.pickle', 'rb'))
    flows_month = subset_flows(flows_month, remaining_links)
    return State(day, [], remaining_links, flows_month)

def get_probabilities(node_features, edges):
    if hasattr(Static, 'model'):
        output = Static.model(node_features, edges).squeeze()
        return F.softmax(output, dim=1)[:,1]
    if hasattr(Static, 'bst'):
        features = node_features.squeeze().cpu().numpy()
        return Static.bst.predict_proba(features)[:,1]
    


current_state = new_state()

traffic 1001.400830541625
probability 900.2002444855705
