In [37]:
from pyspainmobility import Mobility, Zones
import pandas as pd 

In [None]:
# getting 1 week of mobility data. In this case, we download the data from March 10 to March 16 
mobility_data = Mobility(version=2, zones='municipalities', start_date='2022-03-10', end_date='2022-03-16')
# and we extract the OD matrices 
mobility_data.get_od_data()

In [24]:
# now we load the parquet file produced into a pandas dataframe
# !ALERT! adjust the path by looking at the output of the previous cell
od = pd.read_parquet('/Users/mluca/data/Viajes_municipios_2022-03-10_2022-03-16_v2.parquet')


In [25]:
# similarly, we download the zones concerning municipalities using the Zones module
zones = Zones ( zones = 'districts', version=2)
zones = zones.get_zone_geodataframe()
# optional, remap the CRS 
zones = zones.to_crs('4326')

Zones already downloaded. Reading the files....
File /Users/mluca/data/distritos_2.geojson already exists. Loading it...


In [26]:
# we remove flows from and to outside spain
od = od[od['id_origin']!='externo']
od = od[od['id_destination']!='externo']

# and we aggregate the flows without caring about the hour. Also, we discard the total lenght of trips and we keep only the number of trips
od = od.groupby(['date','id_origin','id_destination'])['n_trips'].sum()
od = pd.DataFrame(od).reset_index()

In [27]:
# we obtain something like this
od

Unnamed: 0,date,id_origin,id_destination,n_trips
0,2022-03-10,01001,01002,13.945
1,2022-03-10,01001,01009_AM,390.185
2,2022-03-10,01001,01017_AM,138.615
3,2022-03-10,01001,01028_AM,4.786
4,2022-03-10,01001,01047_AM,16.110
...,...,...,...,...
2141197,2022-03-16,PT187,39075,1.000
2141198,2022-03-16,PT187,41091,1.000
2141199,2022-03-16,PT187,45168,1.000
2141200,2022-03-16,PT187,50034_AM,3.551


In [28]:
# now, before joining the dataframes, we reduce the number of information needed within the zoning dataset
# we do it by keeping only the latitude and longitude of the centroid of the areas instead of keeping the entire
# geometry
zones['x'] = zones['geometry'].centroid.x
zones['y'] = zones['geometry'].centroid.y
zones = zones[['id','x','y']]
zones.set_index('id', inplace=True)


  zones['x'] = zones['geometry'].centroid.x

  zones['y'] = zones['geometry'].centroid.y


In [29]:
# now we add two columns about the x and y of origin and destination into the od dataframe 
# the only purpose is to plot them on a map
od = od.set_index('id_origin').join(zones).reset_index()
od.rename(columns={'x':'x_origin','y':'y_origin'}, inplace=True)
od = od.set_index('id_destination').join(zones).reset_index()
od.rename(columns={'x':'x_destination','y':'y_destination'}, inplace=True)

# we remove zones that have not been matched
od = od[~od['x_origin'].isna()]
od = od[~od['x_destination'].isna()]

In [34]:
# now we split weekend and weekdays and, as the purpose of this notebook is just to showcase 
# functionalities of the library, we take the average flows for weekend and for weekdays and we plot them 
# on two maps 

weekends = od[od['date'].isin(['2022-03-15','2022-03-16'])]
weekdays = od[~od['date'].isin(['2022-03-15','2022-03-16'])]

weekends = weekends.groupby(['id_origin','id_destination']).sum().reset_index().drop(columns=['date'])
weekdays = weekdays.groupby(['id_origin','id_destination']).sum().reset_index().drop(columns=['date'])

In [35]:
weekends

Unnamed: 0,id_origin,id_destination,n_trips,x_origin,y_origin,x_destination,y_destination
0,01001,01002,28.246,-5.022530,85.658142,-5.943351,86.050964
1,01001,01004_AM,2.478,-2.511265,42.829071,-3.076240,43.150024
2,01001,01009_AM,688.823,-5.022530,85.658142,-4.861977,85.766321
3,01001,01010,3.459,-2.511265,42.829071,-3.078197,43.075564
4,01001,01017_AM,326.009,-5.022530,85.658142,-4.871228,85.400379
...,...,...,...,...,...,...,...
183838,50903,50252,3.646,-0.738987,41.694659,-1.202184,41.948576
183839,50903,50272,80.692,-1.477973,83.389318,-2.000553,83.428175
183840,50903,50288,204.077,-1.477973,83.389318,-1.723361,83.577767
183841,50903,50298,57.949,-1.477973,83.389318,-1.635445,83.816035


In [36]:
weekdays

Unnamed: 0,id_origin,id_destination,n_trips,x_origin,y_origin,x_destination,y_destination
0,01001,01002,45.410,-12.556325,214.145354,-14.858376,215.127410
1,01001,01004_AM,4.648,-2.511265,42.829071,-3.076240,43.150024
2,01001,01009_AM,1910.178,-12.556325,214.145354,-12.154943,214.415801
3,01001,01017_AM,768.991,-12.556325,214.145354,-12.178071,213.500947
4,01001,01028_AM,71.843,-12.556325,214.145354,-13.437359,213.004185
...,...,...,...,...,...,...,...
329514,50903,50252,29.777,-2.216960,125.083977,-3.606551,125.845728
329515,50903,50272,157.166,-3.694934,208.473296,-5.001384,208.570437
329516,50903,50288,390.856,-3.694934,208.473296,-4.308401,208.944418
329517,50903,50298,113.002,-3.694934,208.473296,-4.088613,209.540088
