In [1]:
%matplotlib inline

# Path management
import os
from pathlib import Path

# Get main project directory 
main_dir = str(Path(os.path.abspath('')).parents[0])
os.chdir(main_dir)
print('main dir:',main_dir)

main dir: /Users/pablo/OneDrive/data-science/github/transportAI


In [2]:
# Internal modules
from src import transportAI as tai

# External modules
import sys
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

main dir: /Users/pablo/OneDrive/data-science/github/transportAI


In [3]:
# =============================================================================
# 2) NETWORKS FACTORY
# =============================================================================

# network_name = 'SiouxFalls'
# network_name = 'Eastern-Massachusetts'
# network_name =  'Berlin-Friedrichshain'
# network_name =  'Berlin-Mitte-Center'
network_name =  'Barcelona'

# Reporter of estimation results
estimation_reporter = tai.writer.Reporter(foldername=network_name, seed = 2022)

## Read data from tntp repository

In [4]:
links_df = tai.reader.read_tntp_linkdata(
    folderpath=os.getcwd() + "/input/public/networks/github/",
    subfoldername=network_name)

links_df['link_key'] = [(i, j, '0') for i, j in zip(links_df['init_node'], links_df['term_node'])]

## Build network

In [5]:
network_generator = tai.factory.NetworkGenerator()

A = network_generator.generate_adjacency_matrix(links_keys=list(links_df['link_key'].values))

tntp_network = network_generator.build_network(A=A,network_name=network_name)


Creating Barcelona network

Nodes: 930, Links: 2522


### Link performance functions

In [6]:
bpr_parameters_df = pd.DataFrame({'link_key': tntp_network.links_dict.keys(),
                                  'alpha': links_df.b,
                                  'beta': links_df.power,
                                  'tf': links_df.free_flow_time,
                                  'k': links_df.capacity
                                  })

tntp_network.set_bpr_functions(bprdata=bpr_parameters_df)

### Exogenous link attributes

In [7]:
# Link features from TNTP repo

link_features_df = links_df[['link_key','length', 'speed', 'link_type', 'toll']]

# Synthetic link features

linkdata_generator = tai.factory.LinkDataGenerator()

synthetic_features_df = linkdata_generator.simulate_features(links=tntp_network.links,
                                                             features_Z= ['c', 'w', 's'],
                                                             option='continuous',
                                                             range=(0, 1))
# Sparse features

n_sparse_features = 1 

sparse_features_labels = ['k' + str(i) for i in np.arange(0, n_sparse_features)]

sparse_features_df = linkdata_generator.simulate_features(
    links=tntp_network.links,
    features_Z=sparse_features_labels,
    option='continuous',
    range=(-1, 1))

# Merge dataframes with existing dataframe
link_features_df = link_features_df.merge(synthetic_features_df, left_on='link_key', right_on='link_key')
link_features_df = link_features_df.merge(sparse_features_df, left_on='link_key', right_on='link_key')

# Load features data
tntp_network.load_features_data(linkdata=link_features_df)

### Utility function

In [8]:
utility_parameters = tai.estimation.Parameters(features_Y=['tt'],
                                               # features_Z= [],
                                               features_Z=['c', 's'],
                                               # features_Z= ['s', 'c'],
                                               # initial_values={'tt': -0.5, 'c': -4, 's': -2},
                                               # initial_values={'tt': -1, 'c': -6, 's': -2},
                                               # initial_values={'tt': -1.4, 'c': -6.4},
                                               true_values={'tt': -1, 'c': -6, 's': -3}
                                               )

utility_function = tai.estimation.UtilityFunction(utility_parameters)

# Add parameters for sparse features
utility_function.add_sparse_features(Z=sparse_features_labels)

### OD matrix

In [9]:
Q = tai.reader.read_tntp_od(folderpath=os.getcwd() + "/input/public/networks/github/",
                            subfoldername=network_name)

tntp_network.load_OD(Q= Q)

Reading Q from external file
Matrix Q (110, 110) read in 0.1[s]
184679.561 trips were loaded among 7922 o-d pairs


## Paths

In [1]:
paths_generator = tai.factory.PathsGenerator()

paths_generator.load_k_shortest_paths(network=tntp_network, k=3)

NameError: name 'tai' is not defined

### Equilibrator

In [None]:
equilibrator = tai.equilibrium.LUE_Equilibrator(network=tntp_network,
                                                utility_function=utility_function,
                                                uncongested_mode=False,
                                                max_iters=100,
                                                method='fw',
                                                iters_fw=100,
                                                search_fw='grid'
                                                # , path_size_correction = 20
                                                )

### Synthetic traffic counts

In [11]:
counts, _ = linkdata_generator.simulate_counts(network=tntp_network,
                                               equilibrator=equilibrator,
                                               noise_params={'mu_x': 0, 'sd_x': 0},
                                               coverage=0.75
                                               )
tntp_network.load_traffic_counts(counts=counts)


Generating synthetic link counts via Frank-Wolfe

SUE via fw (max iters: 100)

Equilibrium gaps: ['1E-02', '2E-04', '3E-05']
Initial Fisk Objective: -257,713,281.64
Final Fisk Objective: -203,952,328.08
Improvement Fisk Objective: 20.86%
Final gap: 3E-05. Acc. bound: 1E-04. Time: 605.4 [s]
Ratio of counts versus capacity: 83276.7%
Proportion of links over capacity: 54.5%
Normalized RMSE: 0.0


## Descriptive statistics

### Network topology

In [2]:
tai.descriptive_statistics.summary_table_networks([tntp_network])

NameError: name 'tai' is not defined

### Links data

In [3]:
summary_table_links_df = tntp_network.Z_data
estimation_reporter.write_table(df = summary_table_links_df, filename = 'links_data.csv', float_format = '%.3f')
summary_table_links_df

NameError: name 'tntp_network' is not defined

In [15]:
summary_table_links_df.describe()

Unnamed: 0,link_type,alpha,beta,tf,k,length,speed,toll,c,w,s,k0
count,2522.0,2522.0,2522.0,2522.0,2522.0,2522.0,2522.0,2522.0,2522.0,2522.0,2522.0,2522.0
mean,2.792228,8.789544e-12,4.245316,0.645347,1.0,0.645347,0.0,0.0,0.499005,0.502402,0.495623,-0.005301
std,3.336189,1.029939e-10,3.607161,1.650823,0.0,1.650823,0.0,0.0,0.293243,0.290215,0.290686,0.571338
min,1.0,0.0,0.0,0.051429,1.0,0.051429,0.0,0.0,0.001002,0.000465,0.000416,-0.998784
25%,1.0,1.96715e-67,4.21725,0.188571,1.0,0.188571,0.0,0.0,0.246293,0.249529,0.238235,-0.486158
50%,1.0,1.087306e-18,4.446,0.373333,1.0,0.373333,0.0,0.0,0.496151,0.498352,0.492623,-0.001136
75%,1.0,7.010272e-18,4.924,0.778333,1.0,0.778333,0.0,0.0,0.760901,0.754625,0.752403,0.496352
max,9.0,2.176649e-09,16.83,55.0,1.0,55.0,0.0,0.0,0.999948,0.999605,0.999842,0.998854


In [16]:
# =============================================================================
# BENCHMARK PREDICTIONS
# =============================================================================

# Naive prediction using mean counts
mean_counts_prediction_loss, mean_count_benchmark_model, \
    = tai.estimation.mean_count_prediction(x_bar=np.array(list(counts.values()))[:, np.newaxis])

print('\nObjective function under mean count prediction: ' + '{:,}'.format(round(mean_counts_prediction_loss, 1)))

# Naive prediction using uncongested network
equilikely_prediction_loss, x_eq_equilikely \
    = tai.estimation.loss_counts_uncongested_network(
    network = tntp_network,
    equilibrator=equilibrator,
    counts=tntp_network.counts_vector,
    utility_function=utility_function)

print('Objective function under equilikely route choices: ' + '{:,}'.format(round(equilikely_prediction_loss, 1)))


Objective function under mean count prediction: 4,336,962,541.3
Objective function under equilikely route choices: 270,160,496.7


In [17]:
# =============================================================================
# 5) BILEVEL OPTIMIZATION
# =============================================================================

outer_optimizer_norefined = tai.estimation.LUE_OuterOptimizer(
    method='ngd',
    iters=1,  # 10
    eta=1e-1,
    # path_size_correction = 1
)

learner_norefined = tai.estimation.LUE_Learner(
    equilibrator=equilibrator,
    outer_optimizer=outer_optimizer_norefined,
    utility_function=utility_function,
    network=tntp_network,
    name='norefined'
)

outer_optimizer_refined = tai.estimation.LUE_OuterOptimizer(
    # method='gauss-newton',
    method='lm',
    # method='ngd',
    # eta=1e-2,
    iters=10,
    # path_size_correction = 1
)

learner_refined = tai.estimation.LUE_Learner(
    network=tntp_network,
    equilibrator=equilibrator,
    outer_optimizer=outer_optimizer_refined,
    utility_function=utility_function,
    name='refined'
)

## Estimation

In [None]:
print('\nStatistical Inference with no refined solution')

learning_results_norefined, inference_results_norefined, best_iter_norefined = \
    learner_norefined.statistical_inference(h0=0, bilevel_iters=10, alpha=0.05, iteration_report = True)

theta_norefined = learning_results_norefined[best_iter_norefined]['theta']


Statistical Inference with no refined solution

Bilevel optimization for Barcelona network 

Iteration : 1/10

Initial theta: {'tt': '0.0E+00', 'c': '0.0E+00', 's': '0.0E+00', 'k0': '0.0E+00'}

SUE via fw (max iters: 100)

Equilibrium gaps: ['0E+00']
Initial Fisk Objective: -527,830.05
Final Fisk Objective: -527,830.05
Improvement Fisk Objective: 0.00%
Final gap: 0E+00. Acc. bound: 1E-04. Time: 250.4 [s]
Initial objective: 270,160,497
Initial RMSE: 378.0
Initial Normalized RMSE: 0.454

Iteration : 2/10

Learning params via ngd (1 iters, eta = 1.0E-01)

theta: {'tt': '-1.0E-01', 'c': '-1.2E-05', 's': '1.6E-07', 'k0': '5.5E-06'}
Current ratio theta: 8546.0467
time: 1654.1[s]

SUE via fw (max iters: 100)

Equilibrium gaps: ['3E-03', '6E-05']
Initial Fisk Objective: -26,891,439.62
Final Fisk Objective: -20,003,722.58
Improvement Fisk Objective: 25.61%
Final gap: 6E-05. Acc. bound: 1E-04. Time: 444.1 [s]

Time current iteration: 2098.4 [s]
Current objective_value: 156,795,952
Current objec

In [None]:
print('\nStatistical Inference with refined solution')

learner_refined.utility_function.parameters.initial_values = theta_norefined

learning_results_refined, inference_results_refined, best_iter_refined = \
    learner_refined.statistical_inference(h0=0, bilevel_iters=10, alpha=0.05, iteration_report = True)

In [None]:
# =============================================================================
# 6) REPORTS
# =============================================================================

estimation_reporter.add_items_report(
    theta_norefined=theta_norefined,
    theta_refined=learning_results_refined[best_iter_refined]['theta'],
    best_objective_norefined = learning_results_norefined[best_iter_norefined]['objective'],
    best_objective_refined = learning_results_refined[best_iter_refined]['objective'],
    mean_count=mean_count_benchmark_model,
    mean_counts_prediction_loss = mean_counts_prediction_loss,
    equilikely_prediction_loss = equilikely_prediction_loss
)

# Summary with most relevant options, prediction error, initial parameters, etc
estimation_reporter.write_estimation_report(
    network=tntp_network,
    learners=[learner_norefined, learner_refined],
    linkdata_generator=linkdata_generator,
    utility_function=utility_function)

# Write tables with results on learning and inference
estimation_reporter.write_learning_tables(
    results_norefined=learning_results_norefined,
    results_refined=learning_results_refined,
    network = tntp_network,
    utility_function = utility_function,
    simulated_data = True)

estimation_reporter.write_inference_tables(
    results_norefined=inference_results_norefined,
    results_refined=inference_results_refined,
    float_format = '%.3f')

## Visualizations

In [None]:
#plt.rcParams['figure.figsize'] = [8, 6]

# Convergence

results_df = tai.descriptive_statistics \
    .get_loss_and_estimates_over_iterations(results_norefined=learning_results_norefined
                                            , results_refined=learning_results_refined)

fig = tai.visualization.Artist().convergence(
    results_norefined_df=results_df[results_df['stage'] == 'norefined'],
    results_refined_df=results_df[results_df['stage'] == 'refined'],
    simulated_data= True,
    filename='convergence_' + tntp_network.key,
    methods=[outer_optimizer_norefined.options['method'], outer_optimizer_refined.options['method']],
    theta_true = utility_function.parameters.true_values,
    folder = estimation_reporter.dirs['estimation_folder']
)

fig.savefig(estimation_reporter.dirs['estimation_folder'] + '/' + 'bilevel_optimization_convergence.pdf',
            pad_inches=0.1, bbox_inches="tight")

plt.show()

plt.close(fig)

In [None]:
# Distribution of errors across link counts

best_x_norefined = np.array(list(learning_results_norefined[best_iter_refined]['x'].values()))[:, np.newaxis]
best_x_refined = np.array(list(learning_results_refined[best_iter_refined]['x'].values()))[:, np.newaxis]

fig, axs = plt.subplots(1, 2, sharey=True, tight_layout=True, figsize=(8, 4))

# We can set the number of bins with the `bins` kwarg
axs[0].hist(tai.estimation.error_by_link(observed_counts=tntp_network.counts_vector, predicted_counts=best_x_norefined))
axs[1].hist(tai.estimation.error_by_link(observed_counts=tntp_network.counts_vector, predicted_counts=best_x_refined))

for axi in [axs[0], axs[1]]:
    axi.tick_params(axis='x', labelsize=16)
    axi.tick_params(axis='y', labelsize=16)

fig.savefig(estimation_reporter.dirs['estimation_folder'] + '/' + 'distribution_predicted_count_error.pdf',
            pad_inches=0.1, bbox_inches="tight")

plt.show()

plt.close(fig)

In [None]:
# Heatmap O-D matrix
rows, cols = tntp_network.Q.shape

od_df = pd.DataFrame({'origin': pd.Series([], dtype=int)
                         , 'destination': pd.Series([], dtype=int)
                         , 'trips': pd.Series([], dtype=int)})

counter = 0
for origin in range(0, rows):
    for destination in range(0, cols):
        # od_df.loc[counter] = [(origin+1,destination+1), N['train'][current_network].Q[(origin,destination)]]
        od_df.loc[counter] = [int(origin + 1), int(destination + 1), tntp_network.Q[(origin, destination)]]
        counter += 1

od_df.origin = od_df.origin.astype(int)
od_df.destination = od_df.destination.astype(int)

od_pivot_df = od_df.pivot_table(index='origin', columns='destination', values='trips')

fig, ax = plt.subplots(figsize=(8, 6))
ax = sns.heatmap(od_pivot_df, linewidth=0.5, cmap="Blues")

plt.show()

fig.savefig(estimation_reporter.dirs['estimation_folder'] + '/' + 'heatmap_OD_matrix.pdf',
            pad_inches=0.1, bbox_inches="tight")
plt.close(fig)