# Mobility Trend based on Google Data

----

<div style="text-align: right"> Songphol Arrewijit, FCAS<br>12 April 2021</div>

**Data Source:**

* Google's Community Mobility Report (https://www.google.com/covid19/mobility/)

### Package and Data Import

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import matplotlib.patches as patch
import matplotlib.ticker as mtick
pd.options.mode.chained_assignment = None

In [2]:
# Downloaded from Google Website
google_raw = pd.read_csv('input/google_Global_Mobility_Report_20210402.csv', low_memory=False)
google_raw = google_raw.rename(
    columns={'country_region':'Country',
             'retail_and_recreation_percent_change_from_baseline':'Retail/Recreation',
             'grocery_and_pharmacy_percent_change_from_baseline':'Grocery/Pharmacy',
             'parks_percent_change_from_baseline':'Parks',
             'transit_stations_percent_change_from_baseline':'PublicTransport',
             'residential_percent_change_from_baseline':'Residential',
            'workplaces_percent_change_from_baseline':'Workplace'})
categories = ['Retail/Recreation', 'Grocery/Pharmacy', 'Parks', 'PublicTransport', 'Residential', 'Workplace']
google_raw[categories] = google_raw[categories]/100
google_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4838804 entries, 0 to 4838803
Data columns (total 15 columns):
country_region_code    object
Country                object
sub_region_1           object
sub_region_2           object
metro_area             object
iso_3166_2_code        object
census_fips_code       float64
place_id               object
date                   object
Retail/Recreation      float64
Grocery/Pharmacy       float64
Parks                  float64
PublicTransport        float64
Workplace              float64
Residential            float64
dtypes: float64(7), object(8)
memory usage: 553.8+ MB


### Data Cleaning

In [3]:
print('Last Date: %s' % np.max(google_raw['date']))

Last Date: 2021-03-31


In [4]:
google_raw['date'] = pd.to_datetime(google_raw['date'], format='%Y-%m-%d')

# Remove partial months
time_filter = (google_raw['date'] <= pd.to_datetime('2021-03-31'))

# Remove region/city level details
country_level_filter = (google_raw['sub_region_1']).isna() & (google_raw['metro_area']).isna() 
google_all = google_raw[country_level_filter & time_filter]
google_all.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 55263 entries, 0 to 4831695
Data columns (total 15 columns):
country_region_code    54852 non-null object
Country                55263 non-null object
sub_region_1           0 non-null object
sub_region_2           0 non-null object
metro_area             0 non-null object
iso_3166_2_code        0 non-null object
census_fips_code       0 non-null float64
place_id               55263 non-null object
date                   55263 non-null datetime64[ns]
Retail/Recreation      54277 non-null float64
Grocery/Pharmacy       54226 non-null float64
Parks                  54052 non-null float64
PublicTransport        54387 non-null float64
Workplace              55104 non-null float64
Residential            53934 non-null float64
dtypes: datetime64[ns](1), float64(7), object(7)
memory usage: 6.7+ MB


In [5]:
# Namibia's Country Code [NA] seems to have been misrecognized as "NaN".
google_all.loc[google_all['country_region_code'].isna(), "country_region_code"] = "NA"
google_all.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 55263 entries, 0 to 4831695
Data columns (total 15 columns):
country_region_code    55263 non-null object
Country                55263 non-null object
sub_region_1           0 non-null object
sub_region_2           0 non-null object
metro_area             0 non-null object
iso_3166_2_code        0 non-null object
census_fips_code       0 non-null float64
place_id               55263 non-null object
date                   55263 non-null datetime64[ns]
Retail/Recreation      54277 non-null float64
Grocery/Pharmacy       54226 non-null float64
Parks                  54052 non-null float64
PublicTransport        54387 non-null float64
Workplace              55104 non-null float64
Residential            53934 non-null float64
dtypes: datetime64[ns](1), float64(7), object(7)
memory usage: 6.7+ MB


In [6]:
google_all['YearMonth'] = google_all['date'].dt.month_name() + " " + google_all['date'].dt.year.astype(int).astype(str)
ym_list = google_all['YearMonth'].unique()
print(ym_list)

['February 2020' 'March 2020' 'April 2020' 'May 2020' 'June 2020'
 'July 2020' 'August 2020' 'September 2020' 'October 2020' 'November 2020'
 'December 2020' 'January 2021' 'February 2021' 'March 2021']


In [7]:
# 3-letter ISO Alpha code is necessary for Plotly Express Cholopleth
iso_map = pd.read_csv('input/iso_codes.csv').rename(columns={'ISO_2':'country_region_code', 'ISO_3':'iso_alpha'})
google_all = google_all.merge(iso_map, how='left', on='country_region_code')

* Aggregation to monthly

In [8]:
google_all_agg = google_all.groupby(['Country', 'iso_alpha', 'YearMonth']).agg(
    Workplace = ('Workplace', np.mean),
    RetailRecreation = ('Retail/Recreation', np.mean),
    GroceryPharmacy = ('Grocery/Pharmacy', np.mean),
    PublicTransport = ('PublicTransport', np.mean)
).reset_index().rename(columns={
    'Workplace': 'Workplace Mobility',
    'RetailRecreation': 'Retail/Recreation Mobility',
    'GroceryPharmacy': 'Grocery/Pharmacy Mobility',
    'PublicTransport': 'Public Transport Mobility'
})

In [9]:
google_all_agg.describe()

Unnamed: 0,Workplace Mobility,Retail/Recreation Mobility,Grocery/Pharmacy Mobility,Public Transport Mobility
count,1873.0,1859.0,1851.0,1859.0
mean,-0.208603,-0.226881,-0.049178,-0.268617
std,0.151433,0.215211,0.186965,0.222847
min,-0.769,-0.903333,-0.786154,-0.881333
25%,-0.290968,-0.352022,-0.138226,-0.41871
50%,-0.204667,-0.207742,-0.039,-0.274839
75%,-0.109286,-0.074425,0.030323,-0.111613
max,0.261613,0.561935,0.801613,0.700968


### Choropleth Visualization using Plotly Express

In [14]:
hover_data0={'iso_alpha':False,
             'YearMonth':False,
             'Workplace Mobility':':+.0%',
             'Retail/Recreation Mobility':':+.0%',
             'Grocery/Pharmacy Mobility':':+.0%',
             'Public Transport Mobility': ':+.0%'}

time_order = {'YearMonth': list(ym_list)}

fig = px.choropleth(google_all_agg, locations="iso_alpha", color="Workplace Mobility",
                    title="Change in Mobility Trend around the World",
                    hover_name="Country", hover_data=hover_data0,
                    animation_group="YearMonth", animation_frame="YearMonth", category_orders=time_order,
                    range_color=[-0.8, 0.3],
                    projection="natural earth",
                    color_continuous_scale=px.colors.sequential.YlOrBr,
                    width=1000, height=700)

fig.add_annotation(text="Author: Songphol Arrewijit (<a href='https://github.com/jpactuario/GoogleMobility/'>GitHub</a>)<br>Source: <a href='https://www.google.com/covid19/mobilit'>COVID-19 Google Mobility Data</a>",
                  xref="paper", yref="paper", align="left",
                  x=0.0, y=1.08, showarrow=False)

fig.update_layout(geo = dict(showcountries = True))

fig.update_layout(coloraxis_colorbar=dict(
    title="Workplace Mobility Change",
    tickformat = "+.0%",
    dtick=0.1
))

fig.show()