### Converts US Census commuting table to DataFrame used by the Spread Model
Output: Pickle file (DataFrame)

In [1]:
import numpy as np, pandas as pd
import re, swifter, pickle

#### Removes any rows without two FIPS codes

In [2]:
flows = pd.read_excel('flows.xlsx')
flow = flows[flows['e_FIPS'] != 'nannan']

def correct_format(x):
    if re.match(re.compile('\d{5,7}'),x):
        return('correct')
    else:
        return('incorrect')
    
flows['eFormat'] = flows.apply(lambda x: correct_format(x['e_FIPS']),axis=1)
flows['sFormat'] = flows.apply(lambda x: correct_format(x['s_FIPS']),axis=1)
flow = flows[(flows['eFormat']=='correct') & (flows['sFormat']=='correct')].reset_index(drop=True)
flow['sfips'] = flow.apply(lambda x: float(x['s_FIPS']),axis=1)
flow['efips'] = flow.apply(lambda x: float(x['e_FIPS']),axis=1)

flow=flow[['flow','sfips','efips']]

del flows

#### Combining duplicates

In [3]:
# aggregating duplicates

to_flow = {}
grouped = flow.groupby(['sfips','efips'])
i = 0
for name, group in grouped:
    to_flow[i] = {}
    to_flow[i]['sfips'] = name[0]
    to_flow[i]['efips'] = name[1]
    to_flow[i]['flow'] = sum(group['flow'])
    i += 1

flow = pd.DataFrame(to_flow).transpose()
all_fips = sorted(set([x for x in flow['sfips'] if not np.isnan(x)]))

#### Calculating percent of commuters from A who travel to B

In [4]:
flow_dict = {x:{} for x in set(list(flow['sfips']))}
for index, row in flow.iterrows():
    flow_dict[row['sfips']][row['efips']] = row['flow']

flow_total = {}
for x in list(flow_dict):
    try:
        flow_total[x] = sum([flow_dict[x][y] for y in flow_dict[x]])
    except:
        print(x)

def commuter_percent(f1,f2):
    if f1 in flow_dict:
        if f2 in flow_dict[f1]:
            return(flow_dict[f1][f2]/flow_total[f1])
        else:
            return(0)
    else:
        return(0)
    
flow['commuter%'] = flow.swifter.apply(lambda x: commuter_percent(x['sfips'],x['efips']),axis=1)

HBox(children=(IntProgress(value=0, description='Dask Apply', max=16, style=ProgressStyle(description_width='i…




#### Exporting as Pickle File

In [5]:
pickle.dump(flow,open("flow.pickle", "wb" ))
del flow