In [None]:
%matplotlib inline
import os
from timeit import default_timer as timer
import pandas as pd
from datetime import datetime, timedelta
from glob import glob
from pathlib import Path
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
path_to_data='/scratch/spf248/covid/data'
start_date='01/01/2020'
end_date='04/16/2020'
source='cuebiq'
country='ID'
admin_id='ADM4_PCODE'
city_id='UC_NM_MN'
demographics=[
'wealth_index',
'population_density',
]
q_days=0.5
min_pings=20

In [None]:
def clean_dates(df,start_date=start_date,end_date=end_date):
    currentYear = datetime.now().year
    df['keep_date']=df.date.apply(lambda x:int(x.split('-')[0])==currentYear)
    df=df[df.keep_date==True].copy()
    df.drop('keep_date',1,inplace=True)
    df.date=pd.to_datetime(df.date)
    df=df[(df['date']>=pd.to_datetime(start_date))&(df['date']<=pd.to_datetime(end_date))].copy()
    return df

def get_coords(df):
    df['latitude']=df['point'].apply(lambda x:x['latitude'])
    df['longitude']=df['point'].apply(lambda x:x['longitude'])
    return df.drop('point',1)

# Users Activity Over Time

In [None]:
print('Import')
start = timer()
data_dir = Path(os.path.join(path_to_data,source,'processed',country,'n_pings_id_date_hour'))
n_pings_id_date_hour = pd.concat(
    pd.read_parquet(parquet_file)
    for parquet_file in data_dir.glob('*.parquet')
)
print("Done in", round(timer()-start), "sec")  

In [None]:
print('Process')
start = timer()
n_pings_id_date_hour=clean_dates(n_pings_id_date_hour)
print("Done in", round(timer()-start), "sec")

In [None]:
print('Group')
start = timer()
n_pings_users_date=n_pings_id_date_hour.groupby('date').agg({'cuebiq_id':'nunique','n_pings':'sum'})
print("Done in", round(timer()-start), "sec")

In [None]:
print('Figure')

fig,ax1=plt.subplots(figsize=(8,5))
ax2 = ax1.twinx()

data1=n_pings_users_date['cuebiq_id'].truncate(after=end_date)
ax1.plot(data1.index,data1.values,linewidth=1,color='black')
data2=n_pings_users_date['n_pings'].divide(n_pings_users_date['cuebiq_id']).truncate(after=end_date)
ax2.plot(data2.index,data2.values,linewidth=1,color='b',linestyle='dashed')

ax1.tick_params(which='both',direction='in',pad=3)
ax2.tick_params(which='both',direction='in',pad=3)
ax1.locator_params(axis='y',nbins=6)
ax2.locator_params(axis='y',nbins=6)

ax1.set_xlabel('')
ax1.set_ylabel('Number of users',fontweight='bold',color='black')
ax2.set_ylabel('Average number of pings per user',fontweight='bold',color='b')
fig.autofmt_xdate()

plt.savefig(os.path.join(path_to_data+'/../fig',country,'activity-per-day.pdf'),bbox_inches='tight')

# Home Presence

In [None]:
print('Import')
start = timer()
data_dir = Path(os.path.join(path_to_data,source,'processed',country,'n_pings_id_personal_date_hour'))
n_pings_id_personal_date_hour = pd.concat(
    pd.read_parquet(parquet_file)
    for parquet_file in data_dir.glob('*.parquet')
)
print("Done in", round(timer()-start), "sec")  

In [None]:
print('Process')
start = timer()
n_pings_id_personal_date_hour=clean_dates(n_pings_id_personal_date_hour)
n_pings_id_personal_date_hour=get_coords(n_pings_id_personal_date_hour)
print("Done in", round(timer()-start), "sec")  

In [None]:
print('Import')
start = timer()
data_dir = Path(os.path.join(path_to_data,source,'processed',country,'primary_home'))
primary_home = pd.concat(
    pd.read_parquet(parquet_file)
    for parquet_file in data_dir.glob('*.parquet')
)
print("Done in", round(timer()-start), "sec")  

In [None]:
print('Process')
start = timer()
primary_home=get_coords(primary_home)
print("Done in", round(timer()-start), "sec")  

In [None]:
print('Merge')
start = timer()
n_pings_id_home_date_hour=n_pings_id_personal_date_hour.merge(
primary_home,on=['cuebiq_id','latitude','longitude']).drop(
['perfect_match'],1).rename(columns={'n_pings':'n_home'}).merge(
n_pings_id_date_hour,on=['cuebiq_id','date','hour'])
print("Done in", round(timer()-start), "sec")  

In [None]:
print('Compute share of users whose pings are all in their personal area')
start = timer()
n_pings_home_id_date=n_pings_id_home_date_hour.groupby(['date','cuebiq_id']).agg({'n_home':'sum','n_pings':'sum'})
n_pings_home_id_date['pct_home']=n_pings_home_id_date['n_home'].divide(n_pings_home_id_date['n_pings'])
print("Done in", round(timer()-start), "sec")  

In [None]:
print('Days of Activity')
start = timer()
ndays=n_pings_home_id_date.reset_index().groupby('cuebiq_id')['date'].nunique()
print("Done in", round(timer()-start), "sec")

In [None]:
print('Figure')
fig,ax=plt.subplots(figsize=(8,5))
n_pings_home_id_date[(n_pings_home_id_date['n_pings']>min_pings)&\
(n_pings_home_id_date.index.get_level_values('cuebiq_id').isin(
ndays[ndays>=ndays.quantile(q_days)].index))
].groupby('date')['pct_home'].apply(lambda x:(x>=1).sum()/x.count()).plot(
ax=ax,linewidth=1,color='k')
ax.locator_params(axis='y',nbins=6)
ax.tick_params(which='both',direction='in',pad=3)
ax.set_xlabel('')
ax.set_ylabel('Share of users staying at home',fontweight='bold')
fig.autofmt_xdate()
plt.savefig(os.path.join(path_to_data+'/../fig',country,'share-of-users-staying-at-home.pdf'),bbox_inches='tight')

# Match Homes to Demographics

In [None]:
print('Import admin data')
start = timer()
admin=gpd.read_file(os.path.join(path_to_data,'admin',country,'admin.geojson'))
admin2city=pd.read_excel(os.path.join(path_to_data,'admin',country,'city2adminGHSL.xlsx'))[[city_id,admin_id]].drop_duplicates(admin_id)
print("Done in", round(timer()-start), "sec")  

In [None]:
print('Merge admin and home data')
start = timer()
primary_home=gpd.GeoDataFrame(primary_home,geometry=gpd.points_from_xy(primary_home.longitude,primary_home.latitude))
primary_home.crs='epsg:4326'
geocoded_home=gpd.sjoin(primary_home[['cuebiq_id','geometry']],admin,op='intersects')
geocoded_home=geocoded_home.merge(admin2city,on=admin_id,how='left')
geocoded_home['population_density']=geocoded_home['total_population'].divide(geocoded_home['a_km2'])
for col in demographics:
    print(col)
    geocoded_home[col+'_quantile']=pd.qcut(geocoded_home[col],2,range(1,3))
print("Done in", round(timer()-start), "sec")  

# Home Presence by Demographic Groups

In [None]:
print('Merge Home Indicator With Demographics')
start = timer()
n_pings_home_id_date_demog=n_pings_home_id_date.reset_index().merge(
geocoded_home[['cuebiq_id',city_id]+[x+'_quantile' for x in demographics]],on=['cuebiq_id'])
print("Done in", round(timer()-start), "sec") 

In [None]:
print('Figure')
fig,ax=plt.subplots(figsize=(8,5))

n_pings_home_id_date[(n_pings_home_id_date['n_pings']>min_pings)&\
(n_pings_home_id_date.index.get_level_values('cuebiq_id').isin(
ndays[ndays>=ndays.quantile(q_days)].index))
].groupby('date')['pct_home'].apply(lambda x:(x>=1).sum()/x.count()).plot(
ax=ax,linewidth=1,color='k',label='All users')

for i,(val,name) in enumerate(zip([1,2],['Low wealth users','Highest wealth users'])):
    n_pings_home_id_date_demog[(n_pings_home_id_date_demog['n_pings']>min_pings)&\
    (n_pings_home_id_date_demog['cuebiq_id'].isin(
    ndays[ndays>=ndays.quantile(q_days)].index))].groupby(
    ['wealth_index_quantile','date'])['pct_home'].apply(lambda x:(x==1).sum()/x.count()).loc[val].plot(
    ax=ax,linewidth=1,color=sns.color_palette("coolwarm", 2)[i],label=name)
ax.tick_params(which='both',direction='in',pad=3)
ax.locator_params(axis='y',nbins=6)
ax.set_xlabel('')
ax.set_ylabel('Share of users staying at home',fontweight='bold')
ax.legend(loc='best')
fig.autofmt_xdate()
plt.savefig(os.path.join(path_to_data+'/../fig',country,'share-of-users-staying-at-home-by-wealth.pdf'),bbox_inches='tight')

In [None]:
print('Figure')
fig,ax=plt.subplots(figsize=(8,5))

n_pings_home_id_date[(n_pings_home_id_date['n_pings']>min_pings)&\
(n_pings_home_id_date.index.get_level_values('cuebiq_id').isin(
ndays[ndays>=ndays.quantile(q_days)].index))
].groupby('date')['pct_home'].apply(lambda x:(x==1).sum()/x.count()).plot(
ax=ax,linewidth=1,color='k',label='All users')

top_cities=geocoded_home.UC_NM_MN.value_counts().index[:3]
for i,name in enumerate(top_cities):
    
    n_pings_home_id_date_demog[(n_pings_home_id_date_demog['n_pings']>min_pings)&\
    (n_pings_home_id_date_demog['cuebiq_id'].isin(
    ndays[ndays>=ndays.quantile(q_days)].index))].groupby(
    ['UC_NM_MN','date'])['pct_home'].apply(lambda x:(x>=1).sum()/x.count()).loc[name].plot(
    ax=ax,linewidth=1,color=sns.color_palette("hls", len(top_cities))[i],label='Users in '+name)
    
ax.tick_params(which='both',direction='in',pad=3)
ax.locator_params(axis='y',nbins=6)
ax.set_xlabel('')
ax.set_ylabel('Share of users staying at home',fontweight='bold')
ax.legend(loc='best')
fig.autofmt_xdate()
plt.savefig(os.path.join(path_to_data+'/../fig',country,'share-of-users-staying-at-home-by-city.pdf'),bbox_inches='tight')