In [None]:
from matplotlib import *
from __future__ import division
%matplotlib inline
import pandas as pd
import numpy as np
import csv
from matplotlib import pyplot as plt
import networkx as nx
import geopandas as gpd

import shapely
import warnings
from shapely.errors import ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 
from scipy.stats import linregress, spearmanr, chi2


download the shapefile at https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html   
choose the States map

In [None]:
us = gpd.read_file('Origin_and_Destination_Survey_DB1BCoupon_2014_1/us-state-boundaries.geojson')

In [None]:
us.head()

download the US airports IATA codes at https://davidmegginson.github.io/ourairports-data/   
choose airports.csv

In [None]:
cod = pd.read_csv('Origin_and_Destination_Survey_DB1BCoupon_2014_1/airports.csv', sep=',')
cod = cod[cod['iso_country']=='US']
cod = cod[pd.isna(cod.iata_code)==False]
codes = dict(zip(cod['iata_code'],cod['municipality']))
#cod.drop_duplicates('Airport Name', keep='first')
cod['pos'] = cod[['longitude_deg', 'latitude_deg']].values.tolist()
pos = dict(zip(cod['municipality'],cod['pos']))

download the origin-destination air travels from    
https://transtats.bts.gov/PREZIP/Origin_and_Destination_Survey_DB1BCoupon_{year}_{quarter}.zip

In [None]:
df = pd.read_csv('Origin_and_Destination_Survey_DB1BCoupon_2014_1/Origin_and_Destination_Survey_DB1BCoupon_2014_1.csv',)
df = df[['Year','Origin','OriginCountry','OriginState','Dest','DestCountry','DestState','Passengers','Distance']]
df = df[(df.Origin.isin(codes))&(df.Dest.isin(codes))]

In [None]:
df['OrigCity'] = df['Origin'].apply(lambda x: codes[x])
df['DestCity'] = df['Dest'].apply(lambda x: codes[x])

In [None]:
df.head()

In [None]:
entr = df.groupby(['OrigCity','DestCity'])['Passengers'].sum()


In [None]:

plt.hist(entr,bins=30);



In [None]:
airports = set(df.OrigCity.unique()).union(set(df.DestCity.unique()))

In [None]:
OD_matrix = entr.to_frame().reset_index()
tot_outflows = OD_matrix.groupby(by='OrigCity', axis=0)[['Passengers']].sum().fillna(0)
OD_matrix = OD_matrix.merge(tot_outflows, left_on='OrigCity', right_on='OrigCity').rename(columns={'Passengers_x': 'Passengers','Passengers_y': 'outPassengers'})
OD_matrix['fraction_passengers'] = OD_matrix.Passengers/OD_matrix.outPassengers

In [None]:
OD_matrix['effective_length'] = OD_matrix['fraction_passengers'].apply(lambda x: 1-np.log(x))

### draw network of passengers flows

In [None]:
G = nx.DiGraph()
G.add_nodes_from(airports)
for row in OD_matrix.iterrows():
    i = (row[1].OrigCity)
    d = (row[1].DestCity)
    flow = (row[1].Passengers)/80000
    G.add_edge(i,d,weight=flow)

In [None]:
widths = nx.get_edge_attributes(G, 'weight')
nodelist = G.nodes()

fig,ax = plt.subplots(figsize=(12,8))

us.plot(ax=ax,facecolor='#faedcd')
nx.draw_networkx_nodes(G,pos,
                       nodelist=nodelist,
                       node_size=3,
                       node_color='black',
                       alpha=0.7);
nx.draw_networkx_edges(G,pos,
                       edgelist = widths.keys(),
                       width=list(widths.values()),
                       edge_color='#219ebc',
                       alpha=1,arrows=False);

ax.set_ylim(10,80)
ax.set_xlim(-180,-60)
plt.axis('off')

### draw network with effective length between airports

In [None]:
G = nx.DiGraph()
G.add_nodes_from(airports)
for row in OD_matrix.iterrows():
    i = (row[1].OrigCity)
    d = (row[1].DestCity)
    flow = (row[1].effective_length)
    G.add_edge(i,d,weight=flow)

In [None]:
widths = nx.get_edge_attributes(G, 'weight')
nodelist = G.nodes()

fig,ax = plt.subplots(figsize=(12,8))

us.plot(ax=ax,facecolor='#faedcd')
nx.draw_networkx_nodes(G,pos,
                       nodelist=nodelist,
                       node_size=3,
                       node_color='black',
                       alpha=0.7);
nx.draw_networkx_edges(G,pos,
                       edgelist = widths.keys(),
                       width=np.fromiter(widths.values(), dtype=float)/500,
                       edge_color='#f28482',
                       alpha=1,arrows=False);

ax.set_ylim(10,80)
ax.set_xlim(-180,-60)
plt.axis('off')

### find effective distance from given epidemic source

In [None]:
from networkx.algorithms.shortest_paths.generic import shortest_path
from networkx.classes.function import path_weight

In [None]:
#decide the source of the epidemic
source_1 = 'Los Alamos'

#compute arrival times with the effective distance
arrival_times_1 = nx.shortest_path_length(G,source=source_1,weight='weight')
arrival_times_1

G_at = nx.DiGraph()
G_at.add_nodes_from(airports)
nodelist_at = G_at.nodes()
cmap = cm.Accent
max_at = 20 #max(arrival_times.values())

fig,ax = plt.subplots(figsize=(12,8))

us.plot(ax=ax,facecolor='#faedcd')
nx.draw_networkx_nodes(G_at,pos,
                       nodelist=nodelist_at,
                       node_size=20,
                       node_color=cmap(np.fromiter(arrival_times_1.values(), dtype=float)/max_at),
                       alpha=0.7);

ax.scatter(pos[source_1][0],pos[source_1][1],marker='*',s=150,color='green')

ax.set_ylim(10,80)
ax.set_xlim(-180,-60)
plt.axis('off')
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin = 0, vmax=max_at))
plt.colorbar(sm,shrink=0.4,label='arrival time',pad=-.01)

In [None]:
#decide the source of the epidemic
source_2 = 'New York'

#compute arrival times with the effective distance
arrival_times_2 = nx.shortest_path_length(G,source=source_2,weight='weight')

G_at = nx.DiGraph()
G_at.add_nodes_from(airports)
nodelist_at = G_at.nodes()

fig,ax = plt.subplots(figsize=(12,8))

us.plot(ax=ax,facecolor='#faedcd')
nx.draw_networkx_nodes(G_at,pos,
                       nodelist=nodelist_at,
                       node_size=20,
                       node_color=cmap(np.fromiter(arrival_times_2.values(), dtype=float)/max_at),
                       alpha=0.7);

ax.scatter(pos[source_2][0],pos[source_2][1],marker='*',s=150,color='green')

ax.set_ylim(10,80)
ax.set_xlim(-180,-60)
plt.axis('off')
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin = 0, vmax=max_at))
plt.colorbar(sm,shrink=0.4,label='arrival time',pad=-.01)

In [None]:
arrival_times_1

In [None]:
fig,ax=plt.subplots()
ax.plot([0,25],[0,25],lw=1,ls='--',zorder=0);
at1 = dict(sorted(arrival_times_1.items()))
at2 = dict(sorted(arrival_times_2.items()))
ax.scatter(at1.values(),at2.values(),s=10,color='#84a59d',zorder=1);

plt.xlabel(source_2)
plt.ylabel(source_1)