# Mobility Data Science 

## Modules

In [None]:
# Data Wrangling
import pandas as pd

# Geospatial Data Wrangling 
import geopandas as gpd
from shapely.geometry import Point, LineString

# Data Visualization
import matplotlib.pyplot as plt 
import matplotlib.ticker as ticker
%pylab inline

# Mobility Data Analysis 
import skmob

# Basic Utilities 
import warnings
warnings.filterwarnings('ignore')

### `scikit-mobility`

A module to analyze mobility data, suited for working with:

- **trajectories** composed by lat/long points (e.g. GPS data)
- **fluxes** of movements between places (e.g. OD matrix)

## Data 

### `TrajDataFrame`

Each row describes a trajectory's point and contains the following columns:

- `lat` - latitude of the point
- `lng` - longitude of the point
- `datetime` - date and time of the point

For multi-user data sets, there are two *optional* columns:

- `uid` - user's identifier to which the trajectory belongs to
- `tid` - identifier for the trajectory

A `TrajDataFrame` can be created from:

- a python list or *numpy* array
- a python dictionary
- a *pandas* `DataFrame`
- a text file

In [10]:
# From a list
data_list = [[1, 39.984094, 116.319236, '2008-10-23 13:53:05'],
             [1, 39.984198, 116.319322, '2008-10-23 13:53:06'],
             [1, 39.984224, 116.319402, '2008-10-23 13:53:11'],
             [1, 39.984211, 116.319389, '2008-10-23 13:53:16']]
data_list

# We must set the indexes of the mandatory columns using arguments latitude, longitude and datetime.
tdf = skmob.TrajDataFrame(data_list, 
                          latitude=1, longitude=2, 
                          datetime=3)
print(type(tdf))
tdf

<class 'skmob.core.trajectorydataframe.TrajDataFrame'>


Unnamed: 0,0,lat,lng,datetime
0,1,39.984094,116.319236,2008-10-23 13:53:05
1,1,39.984198,116.319322,2008-10-23 13:53:06
2,1,39.984224,116.319402,2008-10-23 13:53:11
3,1,39.984211,116.319389,2008-10-23 13:53:16


In [11]:
# From a DataFrame 
data_df = pd.DataFrame(data_list, 
                       columns=['user', 'latitude', 'lng', 'hour']) 

print(type(data_df)) # type of the structure
data_df.head()       # head of the DataFrame

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,user,latitude,lng,hour
0,1,39.984094,116.319236,2008-10-23 13:53:05
1,1,39.984198,116.319322,2008-10-23 13:53:06
2,1,39.984224,116.319402,2008-10-23 13:53:11
3,1,39.984211,116.319389,2008-10-23 13:53:16


Note that: 
- name of columns in `data_df` don't match the names required
- you must specify the names of the mandatory columns using arguments `latitude`, `longitude` and `datetime` 

In [12]:
# Create a TrajDataFrame from a DataFrame
tdf = skmob.TrajDataFrame(data_df, 
                          latitude='latitude', # name of columns in `data_df` don't match the names required
                          datetime='hour',     # you must specify the names of the mandatory columns using arguments `latitude`, `longitude` and `datetime` 
                          user_id='user')

print(type(tdf))
tdf.head()

<class 'skmob.core.trajectorydataframe.TrajDataFrame'>


Unnamed: 0,uid,lat,lng,datetime
0,1,39.984094,116.319236,2008-10-23 13:53:05
1,1,39.984198,116.319322,2008-10-23 13:53:06
2,1,39.984224,116.319402,2008-10-23 13:53:11
3,1,39.984211,116.319389,2008-10-23 13:53:16


In [31]:
tessellation = gpd.GeoDataFrame.from_file( "/Users/Pit/GitHub/DigitalEpidemiologyProject/Data/Shapefiles/EGM19_Italy/NUTS_3.shp") # load a tessellation

In [32]:
tessellation

Unnamed: 0,inspireId,beginLifes,ICC,NUTS_CODE,NUTS_LABEL,TAA,Shape_Leng,Shape_Area,geometry
0,_EG.EGM.NUTS3:4ae95bfc-24fc-4d37-9359-5777084f...,2019-02-07,IT,ITI16,Livorno,2,0.127188,0.001205,"POLYGON ((10.31323 42.35093, 10.31426 42.35072..."
1,_EG.EGM.NUTS3:e0d27089-03c2-4c41-8ce1-daf8ee28...,2019-02-07,IT,ITG13,Messina,2,4.391351,0.320924,"POLYGON ((15.54770 38.30078, 15.55342 38.29912..."
2,_EG.EGM.NUTS3:38b58736-ee49-4357-a2c6-2812a7ec...,2019-02-07,IT,ITG19,Siracusa,2,3.764754,0.213670,"POLYGON ((14.92275 37.41228, 14.94277 37.40493..."
3,_EG.EGM.NUTS3:a2b24ea9-5686-436e-9444-8306b217...,2019-02-07,IT,ITG12,Palermo,2,0.099764,0.000523,"POLYGON ((13.18325 37.63532, 13.15886 37.62598..."
4,_EG.EGM.NUTS3:fc3f1bd2-1cf1-4b37-8318-c53cee09...,2019-02-07,IT,ITG14,Agrigento,2,4.631894,0.307148,"POLYGON ((13.01888 37.74009, 13.01975 37.74233..."
...,...,...,...,...,...,...,...,...,...
161,_EG.EGM.NUTS3:5a0a269f-ee37-41bf-b017-05b2c7e6...,2019-02-07,IT,ITH41,Pordenone,2,3.122070,0.264557,"POLYGON ((12.50321 46.40480, 12.51954 46.39812..."
162,_EG.EGM.NUTS3:4c861902-ae75-47db-8ac0-ec4e4a7e...,2019-02-07,IT,ITC14,Verbano-Cusio-Ossola,2,3.286720,0.263018,"POLYGON ((8.44572 46.46372, 8.45001 46.46184, ..."
163,_EG.EGM.NUTS3:9a6beda6-a32c-4fc7-93d9-ed4e261a...,2019-02-07,IT,ITH20,Trento,2,6.830553,0.720968,"POLYGON ((11.82553 46.52180, 11.82476 46.51387..."
164,_EG.EGM.NUTS3:6352ebe4-13cc-43ed-bbc4-112edbf5...,2019-02-07,IT,ITH33,Belluno,2,4.807178,0.428822,"POLYGON ((12.47990 46.67725, 12.48127 46.67701..."


In [34]:
# create a FlowDataFrame from a file and a tessellation
fdf = skmob.FlowDataFrame.from_file(
    "data/NUTS3_flows.csv",
    tessellation=tessellation, tile_id='NUTS_LABEL', sep=",")

In [35]:
fdf.head()

Unnamed: 0,flow,origin,destination
0,10000,Torino,Milano
1,1000,Torino,Roma
2,1000,Torino,Genova


In [36]:
# The tessellation is an attribute of the FlowDataFrame
fdf.tessellation.head() 

Unnamed: 0,inspireId,beginLifes,ICC,NUTS_CODE,tile_ID,TAA,Shape_Leng,Shape_Area,geometry
0,_EG.EGM.NUTS3:4ae95bfc-24fc-4d37-9359-5777084f...,2019-02-07,IT,ITI16,Livorno,2,0.127188,0.001205,"POLYGON ((10.31323 42.35093, 10.31426 42.35072..."
1,_EG.EGM.NUTS3:e0d27089-03c2-4c41-8ce1-daf8ee28...,2019-02-07,IT,ITG13,Messina,2,4.391351,0.320924,"POLYGON ((15.54770 38.30078, 15.55342 38.29912..."
2,_EG.EGM.NUTS3:38b58736-ee49-4357-a2c6-2812a7ec...,2019-02-07,IT,ITG19,Siracusa,2,3.764754,0.21367,"POLYGON ((14.92275 37.41228, 14.94277 37.40493..."
3,_EG.EGM.NUTS3:a2b24ea9-5686-436e-9444-8306b217...,2019-02-07,IT,ITG12,Palermo,2,0.099764,0.000523,"POLYGON ((13.18325 37.63532, 13.15886 37.62598..."
4,_EG.EGM.NUTS3:fc3f1bd2-1cf1-4b37-8318-c53cee09...,2019-02-07,IT,ITG14,Agrigento,2,4.631894,0.307148,"POLYGON ((13.01888 37.74009, 13.01975 37.74233..."


In [1]:
fdf.plot_tessellation(popup_features=['tile_ID']) 

NameError: name 'fdf' is not defined