In [None]:
from matplotlib import *
from __future__ import division
%matplotlib inline
import pandas as pd
import numpy as np
import csv
from matplotlib import pyplot as plt
import networkx as nx
import geopandas as gpd

import shapely
import warnings
from shapely.errors import ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 
from scipy.stats import linregress, spearmanr, chi2


### download the shapefile 
https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html   
choose the States map

In [None]:
us = gpd.read_file('Origin_and_Destination_Survey_DB1BCoupon_2014_1/us-state-boundaries.geojson')

In [None]:
us.head()

download the US airports IATA codes at https://davidmegginson.github.io/ourairports-data/   
choose airports.csv

In [None]:
cod = pd.read_csv('Origin_and_Destination_Survey_DB1BCoupon_2014_1/airports.csv', sep=',')
cod = cod[cod['iso_country']=='US']
cod = cod[pd.isna(cod.iata_code)==False]
codes = dict(zip(cod['iata_code'],cod['municipality']))
#cod.drop_duplicates('Airport Name', keep='first')
cod['pos'] = cod[['longitude_deg', 'latitude_deg']].values.tolist()
pos = dict(zip(cod['municipality'],cod['pos']))

### download the origin-destination air travels from    
https://transtats.bts.gov/PREZIP/Origin_and_Destination_Survey_DB1BCoupon_{year}_{quarter}.zip  
replace {year} and {quarter} in the URL

In [None]:
df = pd.read_csv('Origin_and_Destination_Survey_DB1BCoupon_2014_1/Origin_and_Destination_Survey_DB1BCoupon_2014_1.csv',)
df = df[['Year','Origin','OriginCountry','OriginState','Dest','DestCountry','DestState','Passengers','Distance']]
df = df[(df.Origin.isin(codes))&(df.Dest.isin(codes))]

In [None]:
df['OrigCity'] = df['Origin'].apply(lambda x: codes[x])
df['DestCity'] = df['Dest'].apply(lambda x: codes[x])

In [None]:
df.head()

sum all trips from i to j

In [None]:
entr = df.groupby(['OrigCity','DestCity'])['Passengers'].sum()


In [None]:
airports = set(df.OrigCity.unique()).union(set(df.DestCity.unique()))

### define fraction of passengers from origin traveling to each destination

In [None]:
OD_matrix = entr.to_frame().reset_index()
tot_outflows = OD_matrix.groupby...
OD_matrix = OD_matrix.merge(tot_outflows, left_on='OrigCity', right_on='OrigCity').rename(columns={'Passengers_x': 'Passengers','Passengers_y': 'outPassengers'})
OD_matrix['fraction_passengers'] = ...

### define the effective distance

Effective distance is defined as $d_{ij} = 1 - logP_{ij}$  
where $P_{ij}$ is the fraction of passengers from i moving to j

In [None]:
OD_matrix['effective_length'] = ...

### draw network of passengers flows

here, airports are nodes, link weights are the number of passengers

In [None]:
G = nx.DiGraph()
G.add_nodes_from(airports)
for row in OD_matrix.iterrows():
    i = (row[1].OrigCity)
    d = (row[1].DestCity)
    flow = (row[1].Passengers)
    G.add_edge(i,d,weight=flow)

In [None]:
widths = nx.get_edge_attributes(G, 'weight')
nodelist = G.nodes()

fig,ax = plt.subplots(figsize=(12,8))

us.plot(ax=ax,facecolor='#faedcd')
nx.draw_networkx_nodes(G,pos,
                       nodelist=nodelist,
                       node_size=3,
                       node_color='black',
                       alpha=0.7);
nx.draw_networkx_edges(G,pos,
                       edgelist = widths.keys(),
                       width=np.array(list(widths.values()))/80000,
                       edge_color='#219ebc',
                       alpha=1,arrows=False);

ax.set_ylim(10,80)
ax.set_xlim(-180,-60)
plt.axis('off')

### draw network with effective length between airports

here link weights is the effective distance

In [None]:
G = nx.DiGraph()
G.add_nodes_from(airports)
for row in OD_matrix.iterrows():
    i = (row[1].OrigCity)
    d = (row[1].DestCity)
    flow = (row[1].effective_length)
    G.add_edge(i,d,weight=flow)

In [None]:
widths = nx.get_edge_attributes(G, 'weight')
nodelist = G.nodes()

fig,ax = plt.subplots(figsize=(12,8))

us.plot(ax=ax,facecolor='#faedcd')
nx.draw_networkx_nodes(G,pos,
                       nodelist=nodelist,
                       node_size=3,
                       node_color='black',
                       alpha=0.7);
nx.draw_networkx_edges(G,pos,
                       edgelist = widths.keys(),
                       width=np.fromiter(widths.values(), dtype=float)/500,
                       edge_color='#f28482',
                       alpha=1,arrows=False);

ax.set_ylim(10,80)
ax.set_xlim(-180,-60)
plt.axis('off')

### estimate arrival times from given epidemic source using effective distance  
use networkx.shortest_path_length

arrival times are estimated from the shortest path length measured on the effective distance network  
$\Large D_{ij} = min_\Gamma \lambda(\Gamma_{ij})$  
where $\Gamma_{ij}$ is the path from i to j on the effective distance network and $\lambda$ is the path length

In [None]:
from networkx.algorithms.shortest_paths.generic import shortest_path

In [None]:
#decide the source of the epidemic
source_1 = 'New York'

#compute arrival times with the effective distance
arrival_times_1 = ...


G_at = nx.DiGraph()
G_at.add_nodes_from(airports)
nodelist_at = G_at.nodes()
cmap = cm.Accent
max_at = 20 #max(arrival_times.values())

fig,ax = plt.subplots(figsize=(12,8))

us.plot(ax=ax,facecolor='#faedcd')
nx.draw_networkx_nodes(G_at,pos,
                       nodelist=nodelist_at,
                       node_size=20,
                       node_color=cmap(np.fromiter(arrival_times_1.values(), dtype=float)/max_at),
                       alpha=0.7);

ax.scatter(pos[source_1][0],pos[source_1][1],marker='*',s=150,color='green')

ax.set_ylim(10,80)
ax.set_xlim(-180,-60)
plt.axis('off')
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin = 0, vmax=max_at))
plt.colorbar(sm,shrink=0.4,label='arrival time',pad=-.01)

## travel bans  
create new column with new flows cut by 50% from source to all destinations  
recompute effective length  
see difference in arrival times

In [None]:
ban_fraction = #value from 0 to 1
OD_matrix['fraction_passengers_with_ban'] = OD_matrix[['Passengers','outPassengers','OrigCity']].apply(lambda x: ...
OD_matrix['effective_length_with_ban'] = OD_matrix['fraction_passengers_with_ban'].apply(lambda x: ...


In [None]:

G2 = nx.DiGraph()
G2.add_nodes_from(airports)
for row in OD_matrix.iterrows():
    i = (row[1].OrigCity)
    d = (row[1].DestCity)
    flow = (row[1].effective_length_with_ban)
    G2.add_edge(i,d,weight=flow)

In [None]:
#decide the source of the epidemic

#compute arrival times with the effective distance
arrival_times_2 = ...

G_at = nx.DiGraph()
G_at.add_nodes_from(airports)
nodelist_at = G_at.nodes()

fig,ax = plt.subplots(figsize=(12,8))

us.plot(ax=ax,facecolor='#faedcd')
nx.draw_networkx_nodes(G_at,pos,
                       nodelist=nodelist_at,
                       node_size=20,
                       node_color=cmap(np.fromiter(arrival_times_2.values(), dtype=float)/max_at),
                       alpha=0.7);

ax.scatter(pos[source_1][0],pos[source_1][1],marker='*',s=150,color='green')

ax.set_ylim(10,80)
ax.set_xlim(-180,-60)
plt.axis('off')
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin = 0, vmax=max_at))
plt.colorbar(sm,shrink=0.4,label='arrival time',pad=-.01)

### plot the two arrival times , with vs without travel bans
scatter plot

In [None]:
fig,ax=plt.subplots()
ax.plot([0,25],[0,25],lw=1,ls='--',zorder=0);
...
plt.xlabel('arrival times')
plt.ylabel('arrival times with travel bans')

### plot difference in arrival times with banned fraction of trips from the source  
what was the delay in arrival times with flows cut by w  
From Gautreau et al:  
$\Large \langle t_{arr,TR} \rangle - \langle t_{arrival} \rangle \simeq  -log (w) $ 

In [None]:
plt.scatter(np.arange(len(at1)),np.array(list(at2.values())) - np.array(list(at1.values())))
w = ban_fraction
plt.axhline(-np.log(1-w))

### now try placing the epidemic source in another airport and see the differences in arrival times  
1) try peripheral vs central nodes  
2) east coast vs west coast  
3) islands