In [33]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
from scipy.stats import ttest_ind
import numpy as np
from scipy.stats.stats import pearsonr
from IPython.display import display, Markdown, Latex
plt.style.use('ggplot')
from datetime import timedelta,datetime
from dateutil.parser import parse

  from scipy.stats.stats import pearsonr


In [35]:
pd.set_option('display.max_columns', None)

### Load mobility data files

### Load/Clean Simple Heatwave Data

In [39]:
heatwave_control = pd.read_csv('THI_heatwave_control_90.csv')

In [41]:
len(heatwave_control['location'].unique())

374

In [43]:
heatwave_control['control_day'] = [parse(x) for x in heatwave_control['control_day']]
heatwave_control['datetime'] = [parse(x) for x in heatwave_control['datetime']]

In [45]:
(heatwave_control['datetime'] - heatwave_control['control_day']).describe()

count                          5438
mean     12 days 07:33:36.476645825
std      10 days 03:13:21.598469822
min              -42 days +00:00:00
25%                 7 days 00:00:00
50%                 7 days 00:00:00
75%                14 days 00:00:00
max                70 days 00:00:00
dtype: object

In [47]:
heatwave_control['diff'] = (heatwave_control['datetime'] - heatwave_control['control_day'])

In [49]:
temp = pd.DataFrame(heatwave_control['diff'].value_counts()).sort_index()

In [51]:
temp.columns = ['Number of Instances']

### Merge Mobility with Heat Data

In [58]:
mobility = pd.read_csv('mobility_data_clean/bd_mobility_china.csv')  

In [60]:
mobility['date'] = mobility['date'].astype(str)

In [62]:
mobility['date'] = mobility['date'].apply(lambda x: datetime(int(x[:4]),int(x[4:6]),int(x[6:])))

In [65]:
#### excluding some cities that lack mobility data
mobility_heat = mobility.merge(heatwave_control[['control_day', 'thi','t2m', 'location', 'datetime', 'heat_ID']], left_on=['location', 'date'], right_on=['location', 'datetime'])

In [69]:
# missing mobility data
# ['taizishantianranlinbaohuqu, gansusheng',
#      'zhongnongfashandanmachang, gansusheng',
#      'taiwansheng, taiwansheng',
#      'xianggangtebiexingzhengqu, xianggangtebiexingzhengqu',
#      'baiyangshi, xinjiangweiwuerzizhiqu',
#      'huyangheshi, xinjiangweiwuerzizhiqu',
#      'xinxingshi, xinjiangweiwuerzizhiqu']

In [71]:
mobility_heat = mobility_heat.merge(mobility[['location', 'date', 'mob_index','holiday','makeup_day']], left_on=['location', 'control_day'], right_on=['location', 'date'], suffixes=['_heat', '_control'])

In [74]:
#calculate differences between heat and control 
mobility_heat['mobility_difference'] = mobility_heat['mob_index_heat'] - mobility_heat['mob_index_control']

In [77]:
mobility_heat['day_of_week'] = [x.day_of_week for x in mobility_heat['date_heat']]
mobility_heat['weekend'] = [1 if x in [5,6] else 0 for x in mobility_heat['day_of_week']]

In [80]:
mobility_heat.reset_index(drop=True,inplace=True)

In [83]:
mobility_heat['mobility_difference'].describe()

count    5298.000000
mean        0.134225
std         0.483444
min        -1.863700
25%        -0.096625
50%         0.061750
75%         0.325075
max         4.885300
Name: mobility_difference, dtype: float64

In [88]:
### excluding holiday and makeup day samples
mobility_heat = mobility_heat[(mobility_heat['holiday_heat']==0)&(mobility_heat['holiday_control']==0)&(mobility_heat['makeup_day_heat']==0)&(mobility_heat['makeup_day_control']==0)]

In [90]:
mobility_heat.describe()

Unnamed: 0,date_heat,mob_index_heat,dayofweek,holiday_heat,makeup_day_heat,control_day,thi,t2m,datetime,date_control,mob_index_control,holiday_control,makeup_day_control,mobility_difference,day_of_week,weekend
count,4889,4889.0,4889.0,4889.0,4889.0,4889,4889.0,4889.0,4889,4889,4889.0,4889.0,4889.0,4889.0,4889.0,4889.0
mean,2023-07-18 20:34:42.389036544,7.199639,3.94416,0.0,0.0,2023-07-06 12:11:55.729187840,35.372838,31.606965,2023-07-18 20:34:42.389036544,2023-07-06 12:11:55.729187840,7.054292,0.0,0.0,0.145348,2.94416,0.267539
min,2023-05-04 00:00:00,3.22,1.0,0.0,0.0,2023-05-04 00:00:00,11.363968,12.379485,2023-05-04 00:00:00,2023-05-04 00:00:00,3.4585,0.0,0.0,-1.387,0.0,0.0
25%,2023-07-10 00:00:00,6.5414,2.0,0.0,0.0,2023-06-29 00:00:00,30.793822,30.022495,2023-07-10 00:00:00,2023-06-29 00:00:00,6.4409,0.0,0.0,-0.086,1.0,0.0
50%,2023-07-23 00:00:00,7.1841,4.0,0.0,0.0,2023-07-08 00:00:00,36.121507,32.320057,2023-07-23 00:00:00,2023-07-08 00:00:00,7.0162,0.0,0.0,0.0632,3.0,0.0
75%,2023-08-04 00:00:00,7.8121,6.0,0.0,0.0,2023-07-19 00:00:00,40.989366,33.999104,2023-08-04 00:00:00,2023-07-19 00:00:00,7.627,0.0,0.0,0.3163,5.0,1.0
max,2023-09-28 00:00:00,18.7065,7.0,0.0,0.0,2023-09-21 00:00:00,51.377591,42.959206,2023-09-28 00:00:00,2023-09-21 00:00:00,14.3527,0.0,0.0,4.8853,6.0,1.0
std,,1.064732,1.957599,0.0,0.0,,6.952879,4.226089,,,0.991782,0.0,0.0,0.466521,1.957599,0.442721


In [93]:
mobility_heat['mobility_difference'].describe()

count    4889.000000
mean        0.145348
std         0.466521
min        -1.387000
25%        -0.086000
50%         0.063200
75%         0.316300
max         4.885300
Name: mobility_difference, dtype: float64

In [96]:
mobility_heat.columns

Index(['city', 'adm2', 'adm1', 'date_heat', 'mob_index_heat', 'dayofweek',
       'holiday_heat', 'makeup_day_heat', 'location', 'control_day', 'thi',
       't2m', 'datetime', 'heat_ID', 'date_control', 'mob_index_control',
       'holiday_control', 'makeup_day_control', 'mobility_difference',
       'day_of_week', 'weekend'],
      dtype='object')

In [99]:
mobility_heat.drop(columns=['city','dayofweek']).to_csv('THI_mobility_heat_clean_90.csv')