In [None]:
from matplotlib import *
from __future__ import division
%matplotlib inline
import pandas as pd
import numpy as np
import csv
from matplotlib import pyplot as plt
import skmob
from skmob.utils import utils, constants
import geopandas as gpd
from skmob.models.epr import DensityEPR
import shapely
import warnings
from shapely.errors import ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 
from scipy.stats import linregress, spearmanr, chi2


In [None]:
pip install scikit-mobility

## get the data

In [None]:
url = skmob.utils.constants.NY_COUNTIES_2011
tessellation = gpd.read_file(url).rename(columns={'tile_id': 'tile_ID'})

tessellation.plot(facecolor='w',edgecolor='grey', lw=0.2)
plt.axis('off')

In [None]:
# load data about mobility flows into a FlowDataFrame
fdf = skmob.FlowDataFrame.from_file(skmob.utils.constants.NY_FLOWS_2011,
                                        tessellation=tessellation,
                                        tile_id='tile_ID',
                                        sep=",")

print(fdf.head())

### plot the empirical flows

In [None]:
for line in fdf.iterrows():
    orig = line[1].origin
    dest = line[1].destination
    flow = line[1].flow / 100000
    #print(flow)
    x1 = points[orig].x
    y1 = points[orig].y
    x2 = points[dest].x
    y2 = points[dest].y
    plt.plot([x1,x2],[y1, y2], '#c1121f',linewidth=flow)
    
plt.axis('off')

## build the Gravity model

In [None]:
from skmob.models import gravity

#build the centroids dictionary
tessellation = gpd.read_file(url).rename(columns={'tile_id': 'tile_ID'})
tessellation['center'] = tessellation.centroid
points = dict(zip(tessellation.tile_ID,tessellation.center))

# compute the total outflows from each location of the tessellation (excluding self loops)
tot_outflows = fdf.groupby(by='origin', axis=0)[['flow']].sum().fillna(0)

tessellation = tessellation.merge(tot_outflows, left_on='tile_ID', right_on='origin').rename(columns={'flow': constants.TOT_OUTFLOW})

# instantiate a singly constrained Gravity model
gravity_singly = gravity.Gravity(gravity_type='singly constrained')

# instantiate a globally constrained Gravity model
gravity_doubly = gravity.Gravity(gravity_type='globally constrained')

print(gravity_singly)


### fit gravity model with exponential deterrence function

In [None]:
# fit the parameters of the Gravity model from real fluxes
gravity_fit_exp = gravity.Gravity(gravity_type='globally constrained',deterrence_func_type="exponential")
print(gravity_fit_exp)
gravity_fit_exp.fit(fdf, relevance_column='population')

synth_fdf_exp = gravity_fit_exp.generate(tessellation,
                                        tile_id_column='tile_ID',
                                        tot_outflows_column='tot_outflow',
                                        relevance_column= 'population',
                                        out_format='flows')
print(synth_fdf_exp.head())


In [None]:
### check the fitted params
print(gravity_fit_exp)

### fit gravity model with power law deterrence function

In [None]:
# fit the parameters of the Gravity model from real fluxes
gravity_fit_pow = gravity.Gravity(gravity_type='globally constrained',deterrence_func_type="power law")
print(gravity_fit_pow)
gravity_fit_pow.fit(fdf, relevance_column='population')

synth_fdf_pow = gravity_fit_pow.generate(tessellation,
                                        tile_id_column='tile_ID',
                                        tot_outflows_column='tot_outflow',
                                        relevance_column= 'population',
                                        out_format='flows')
print(synth_fdf_pow.head())


In [None]:
### check the fitted params
print(gravity_fit_pow)

### plot the  fitted gravity model flows

In [None]:
fig,axes = plt.subplots(ncols=3,nrows=1,figsize=(12,4))
ax=axes.flat[0]
for line in fdf.iterrows():
    orig = line[1].origin
    dest = line[1].destination
    flow = line[1].flow / 100000
    #print(flow)
    x1 = points[orig].x
    y1 = points[orig].y
    x2 = points[dest].x
    y2 = points[dest].y
    ax.plot([x1,x2],[y1, y2], '#c1121f',linewidth=flow)
    ax.set_title('data')
    ax.axis('off')
    
ax=axes.flat[1]
for line in synth_fdf_exp.iterrows():
    orig = line[1].origin
    dest = line[1].destination
    flow = line[1].flow / 100000
    #print(flow)
    x1 = points[orig].x
    y1 = points[orig].y
    x2 = points[dest].x
    y2 = points[dest].y
    ax.plot([x1,x2],[y1, y2], '#c1121f',linewidth=flow)
    ax.set_title('exp')
    ax.axis('off')

ax=axes.flat[2]
for line in synth_fdf_pow.iterrows():
    orig = line[1].origin
    dest = line[1].destination
    flow = line[1].flow / 100000
    #print(flow)
    x1 = points[orig].x
    y1 = points[orig].y
    x2 = points[dest].x
    y2 = points[dest].y
    ax.plot([x1,x2],[y1, y2], '#c1121f',linewidth=flow)
    ax.set_title('pow')
    ax.axis('off')


In [None]:
#fill missing routes from the original dataset, set them to 0
places = set(fdf.origin.unique()).union(fdf.destination.unique())

missing_flows = []
for ori in places:
    for dest in places:
        if ((fdf['origin'] == ori) & (fdf['destination'] == dest)).any() == False:
            missing_flows.append([0,ori,dest])

#remove internal mobility
fdf_base = fdf[fdf.origin!=fdf.destination]
synth_fdf_exp = synth_fdf_exp[synth_fdf_exp.origin!=synth_fdf_exp.destination]
synth_fdf_pow = synth_fdf_pow[synth_fdf_pow.origin!=synth_fdf_pow.destination]

#add missing routes to original dataset for comparison
nr = pd.DataFrame(missing_flows,columns=['flow', 'origin', 'destination'])
fdf_new = pd.concat([fdf_base, nr])


In [None]:
#set same order to compare values
fdf_new = pd.DataFrame(fdf_new.sort_values(by=['origin','destination']))
synth_fdf_exp = synth_fdf_exp.sort_values(by=['origin','destination'])
synth_fdf_pow = synth_fdf_pow.sort_values(by=['origin','destination'])

In [None]:
len(fdf_new.flow),len(synth_fdf_exp.flow)

### model selection

In [None]:
#common part of commuters
print('pow',skmob.measures.evaluation.common_part_of_commuters(fdf_new.flow.values, synth_fdf_pow.flow.values))
print('exp',skmob.measures.evaluation.common_part_of_commuters(fdf_new.flow.values, synth_fdf_exp.flow.values))

In [None]:
#pearson correlation
print('pow',linregress(fdf_new.flow.values, synth_fdf_pow.flow.values).rvalue)
print('exp',linregress(fdf_new.flow.values, synth_fdf_exp.flow.values).rvalue)


In [None]:
fig,ax=plt.subplots(figsize=(4,4))
print(linregress(fdf_new.flow.values, synth_fdf_exp.flow.values).rvalue)
plt.loglog(fdf_new.flow.values, synth_fdf_exp.flow.values,'o',lw=0,markersize=1)
plt.loglog([1,100000],[1,100000],lw=1,color='k',ls='--')
plt.xlabel('data')
plt.ylabel('gravity model exp')

## Radiation model

In [None]:
from skmob.models import radiation
rad_model = radiation.Radiation()
synth_fdf_rad = rad_model.generate(tessellation,
                                        tile_id_column='tile_ID',
                                        tot_outflows_column='tot_outflow',
                                        relevance_column= 'population',
                                        out_format='flows')
synth_fdf_rad = pd.DataFrame(synth_fdf_rad.sort_values(by=['origin','destination']))


In [None]:
missing_flows = []
for ori in places:
    for dest in places:
        if ((synth_fdf_rad['origin'] == ori) & (synth_fdf_rad['destination'] == dest)).any() == False:
            missing_flows.append([0,ori,dest])



#add missing routes to radiation modelled flows, for comparison
nr = pd.DataFrame(missing_flows,columns=['flow', 'origin', 'destination'])
synth_fdf_new = pd.concat([synth_fdf_rad, nr])
#remove internal mobility
synth_fdf_rad = synth_fdf_new[synth_fdf_new.origin!=synth_fdf_new.destination]

In [None]:
fig,axes = plt.subplots(ncols=3,nrows=1,figsize=(12,4))
ax=axes.flat[0]
for line in fdf.iterrows():
    orig = line[1].origin
    dest = line[1].destination
    flow = line[1].flow / 100000
    #print(flow)
    x1 = points[orig].x
    y1 = points[orig].y
    x2 = points[dest].x
    y2 = points[dest].y
    ax.plot([x1,x2],[y1, y2], '#c1121f',linewidth=flow)
    ax.set_title('data')
    ax.axis('off')
    
ax=axes.flat[1]
for line in synth_fdf_exp.iterrows():
    orig = line[1].origin
    dest = line[1].destination
    flow = line[1].flow / 100000
    #print(flow)
    x1 = points[orig].x
    y1 = points[orig].y
    x2 = points[dest].x
    y2 = points[dest].y
    ax.plot([x1,x2],[y1, y2], '#c1121f',linewidth=flow)
    ax.set_title('gravity exp')
    ax.axis('off')

ax=axes.flat[2]
for line in synth_fdf_rad.iterrows():
    orig = line[1].origin
    dest = line[1].destination
    flow = line[1].flow / 100000
    #print(flow)
    x1 = points[orig].x
    y1 = points[orig].y
    x2 = points[dest].x
    y2 = points[dest].y
    ax.plot([x1,x2],[y1, y2], '#c1121f',linewidth=flow)
    ax.set_title('radiation')
    ax.axis('off')


### model selection

In [None]:
#common part of commuters
print('rad',skmob.measures.evaluation.common_part_of_commuters(fdf_new.flow.values, synth_fdf_rad.flow.values))
print('exp',skmob.measures.evaluation.common_part_of_commuters(fdf_new.flow.values, synth_fdf_exp.flow.values))

In [None]:
#pearson correlation
print('rad',linregress(fdf_new.flow.values, synth_fdf_rad.flow.values).rvalue)
print('exp',linregress(fdf_new.flow.values, synth_fdf_exp.flow.values).rvalue)

In [None]:
fig,ax=plt.subplots(figsize=(4,4))
plt.loglog(fdf_new.flow.values, synth_fdf_rad.flow.values,'o',lw=0,markersize=1)
plt.loglog([1,100000],[1,100000],lw=1,color='k',ls='--')
plt.xlabel('data')
plt.ylabel('radiation model')