### Generates daily matrices with mobility adjusted commuter flows
Output: Pickle file (dictionary of sparce matrices, by date)


In [8]:
import pandas as pd, numpy as np
import re, pickle,copy, scipy
from datetime import datetime, timedelta
from scipy.sparse import csr_matrix


In [9]:
# FIPS information
fips = pd.read_excel('fips_data.xlsx')[1:]
fips['lat'] = fips.apply(lambda x: float(re.sub('–','-',str(x['Latitude'])[:-1])),axis=1)
fips['lon'] = fips.apply(lambda x: float(re.sub('–','-',str(x['Longitude'])[:-1])),axis=1)
fips['fips'] = fips['FIPS']

# state name to abbreviation
statecode = pd.read_csv('statecode.csv')
statecode.columns = ['name','State']
fips = pd.merge(fips,statecode,on='State')
fips.head()

# selecting USA mobility reports
gmr = pd.read_csv('Global_Mobility_Report.csv')
mr = gmr[gmr['country_region_code']=='US'].copy()
del gmr
mr['datetime'] = mr.apply(lambda x: datetime.strptime(x['date'],'%Y-%m-%d'),axis=1)

# importing flows data
flow = pickle.load(open('flow.pickle', 'rb'))
all_fips = [x for x in sorted(set(flow['sfips'])) if x//1000 != 72]
print(len(all_fips))

# selecting statewide data
statemr = mr[mr['sub_region_2'].isnull()].fillna('NA')
statemr['unique'] = statemr.apply(lambda x: '%s_%s' %(str(x['sub_region_1']),str(x['datetime'])),axis=1)
statemr = statemr.set_index('unique')['workplaces_percent_change_from_baseline'].to_dict()


3142


In [10]:
# creating DataFrame
mobility_work = pd.DataFrame()
mobility_work['fips'] = all_fips


# merging with state name
mobility_work = pd.merge(mobility_work,fips[['fips','name']],how='left',on='fips')
mobility_work.columns = ['fips','state']

# cleaning
mobility_work = mobility_work.fillna('NA')
mobility_work = mobility_work.replace(to_replace ='Dist. of Columbia', 
                 value ='NA') 


# creating baseline statelevel mobility metrics
for date in set(mr['datetime']):
    date = str(date)
    mobility_work[date] = mobility_work.swifter.progress_bar(False).apply(lambda x: statemr['%s_%s' %(str(x['state']),date)],axis=1)

# converting to dicitonary
state_work = mobility_work.set_index('fips').to_dict('index')


#### Matching county names to FIPS codes and cleaning

In [11]:
to_concat = []
states = mr.groupby('sub_region_1')
for state, group in states:
    state_fips = fips[fips['name']==state][['FIPS','County\xa0[2]','name']]
    try:
        state_fips.columns = ['fips','county','state_name']
        group['county'] = group.apply(lambda x: re.sub(' County','',str(x['sub_region_2'])),axis=1)
        group['county'] = group.apply(lambda x: re.findall(re.compile('[\w\. ]+'),str(x['county']))[0],axis=1)
        group['county'] = group.apply(lambda x: re.sub(' Parish','',str(x['county'])),axis=1)
        state_fips['county'] = state_fips.apply(lambda x: re.sub(' Parish','',str(x['county'])),axis=1)
        state_fips['county'] = state_fips.apply(lambda x: re.findall(re.compile('[\w\. ]+'),str(x['county']))[0],axis=1)
        df = pd.merge(group,state_fips,how='right',on='county')
        to_concat.append(df)
    except:
        pass
    
mobility = pd.concat(to_concat)
mobility = mobility[~mobility['date'].isnull()].copy()
mobility['datetime'] = mobility.apply(lambda x: datetime.strptime(x['date'],'%Y-%m-%d'),axis=1)
mobility = mobility[['sub_region_1','workplaces_percent_change_from_baseline','fips','datetime']].dropna(axis=0)

fips_mobility = copy.deepcopy(state_work)
for index,row in mobility.iterrows():
    if row['fips'] in state_work:
        if state_work[row['fips']]['state'] == row['sub_region_1']:
            fips_mobility[row['fips']][str(row['datetime'])] = row['workplaces_percent_change_from_baseline']
            
mobility_df = pd.DataFrame(fips_mobility).transpose()
mobility_df = mobility_df.drop('state',axis=1)

# remove any fips starting with 72
flow = flow[(flow['sfips']//1000 != 72)&(flow['efips']//1000 != 72)]
all_fips = sorted(set(flow['sfips']))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [12]:
flow['identical'] = flow.apply(lambda x: 1 if x['sfips']==x['efips'] else 0,axis=1)
fipindex = pd.Series({x:all_fips.index(x) for x in all_fips})

def create_commuting_matrix(df,scores,date):
    #df['mobility_score'] = scores[date][np.array(df['sfips'])].values
    df['mobility_score'] = scores[str(date)][np.array(df['sfips'])].values
    
    # for different counties
    df['weight'] = (1-df['identical'].values)*(df['commuter%'].values*(1+df['mobility_score'].values/100))
    
    # for within county
    df['weight'] = df['weight'].values + (df['identical'].values)*(df['commuter%'].values - (1-df['commuter%'].values)*(df['mobility_score']/100))
    df = df[['sfips','efips','weight','mobility_score']].copy()
    
    M = np.zeros((len(fipindex),len(fipindex)))
    M[fipindex[df['efips'].values].values,fipindex[df['sfips'].values].values] = df['weight'].values
    return(scipy.sparse.csr_matrix(M))

dates = sorted(set(mr['datetime']))
date_conversion = {x:x for x in dates}
date = dates[-1]
all_dates = dates.copy()

#Redundant - execute in other notebooks
'''while date <= datetime(2020,4,29):
    date = date + timedelta(days=1)
    date_conversion[date] = dates[-1]
    all_dates.append(date)'''
    
matrices = {}
for date in all_dates:
    matrices[date] = create_commuting_matrix(flow,mobility_df,date_conversion[date])


In [13]:
dates

[Timestamp('2020-02-15 00:00:00'),
 Timestamp('2020-02-16 00:00:00'),
 Timestamp('2020-02-17 00:00:00'),
 Timestamp('2020-02-18 00:00:00'),
 Timestamp('2020-02-19 00:00:00'),
 Timestamp('2020-02-20 00:00:00'),
 Timestamp('2020-02-21 00:00:00'),
 Timestamp('2020-02-22 00:00:00'),
 Timestamp('2020-02-23 00:00:00'),
 Timestamp('2020-02-24 00:00:00'),
 Timestamp('2020-02-25 00:00:00'),
 Timestamp('2020-02-26 00:00:00'),
 Timestamp('2020-02-27 00:00:00'),
 Timestamp('2020-02-28 00:00:00'),
 Timestamp('2020-02-29 00:00:00'),
 Timestamp('2020-03-01 00:00:00'),
 Timestamp('2020-03-02 00:00:00'),
 Timestamp('2020-03-03 00:00:00'),
 Timestamp('2020-03-04 00:00:00'),
 Timestamp('2020-03-05 00:00:00'),
 Timestamp('2020-03-06 00:00:00'),
 Timestamp('2020-03-07 00:00:00'),
 Timestamp('2020-03-08 00:00:00'),
 Timestamp('2020-03-09 00:00:00'),
 Timestamp('2020-03-10 00:00:00'),
 Timestamp('2020-03-11 00:00:00'),
 Timestamp('2020-03-12 00:00:00'),
 Timestamp('2020-03-13 00:00:00'),
 Timestamp('2020-03-

In [14]:
pickle.dump(matrices,open('matrices.pickle','wb'))