In [72]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

## Create a mapping for fips codes, county names, states, and state ID's

In [123]:
states = ['CA', 'OR', 'AZ', 'NV', 'TX', 'FL', 'NY', 'PA', 'IL', 'OH', 'GA', 'NC', 'MI']

In [3]:
fips_state = pd.read_csv("data/fips-by-state.csv", encoding='unicode_escape')
fips_county = pd.read_csv("data/us-state-ansi-fips.csv")
fips_county.columns = fips_county.columns.str.replace(' ', '')
fips_county['stusps'] = fips_county['stusps'].str.strip()
fips_county['stname'] = fips_county['stname'].str.strip()
merged_index = pd.merge(fips_state, fips_county, left_on="state", right_on="stusps").drop("stusps", axis=1)
merged_index

Unnamed: 0,fips,name,state,stname,st
0,1001,Autauga County,AL,Alabama,1
1,1003,Baldwin County,AL,Alabama,1
2,1005,Barbour County,AL,Alabama,1
3,1007,Bibb County,AL,Alabama,1
4,1009,Blount County,AL,Alabama,1
...,...,...,...,...,...
3138,56037,Sweetwater County,WY,Wyoming,56
3139,56039,Teton County,WY,Wyoming,56
3140,56041,Uinta County,WY,Wyoming,56
3141,56043,Washakie County,WY,Wyoming,56


In [63]:
merged_index.to_csv("merged_index.csv")

### Create helper functions

In [33]:
def get_state_name_from_fips(statefips):
    return merged_index[merged_index['st']==statefips]['state'].value_counts().keys()[0]

def get_county_name_from_fips(countyfips):
    return merged_index[merged_index['fips']==countyfips]['name'].values[0]

### Load in the Data

In [122]:
def plot_daily_pm_by_county(data):
    data['countyfips'] = data['countyfips'].astype(str)
    state = get_state_name_from_fips(data.statefips.value_counts().keys()[0])
    data = data.groupby(by=[data.date, data.countyfips]).agg({'ds_pm_pred':'mean', 'ds_pm_stdd':np.std}).reset_index()
    viz = sns.lineplot(data=data, x="date", y="ds_pm_pred", hue="countyfips")
    viz.legend(fontsize=4)
    title = "Intra-day Average PM2.5 Measurements for Counties in " + state
    viz.set(title=title)
    plt.xticks(rotation=90)
    viz.set_xticks(viz.get_xticks()[::10])
    plt.savefig(f'visualizations/daily_pm_{state}_2011')
    plt.cla()
    plt.clf()

def plot_and_save_daily_pm(states):
    for state in states:
        path = f"data/pm_unagg/pm_conc_data_{state}.csv"
        data = pd.read_csv(path, index_col=[0])
        plot_daily_pm_by_county(data)

In [121]:
plot_and_save_daily_pm()

<Figure size 640x480 with 0 Axes>

### Combine Data

In [166]:
def combine_pm_data_unagg():
    pm_all_2011_unagg = pd.concat([pd.read_csv(f"data/pm_unagg/pm_conc_data_{state}.csv", index_col=0) for state in states])
    # Add relevant metadata columns for easy navigation
    # pm_all_2011_unagg['state_acronym'] = pm_all_2011_unagg['statefips'].apply(get_state_name_from_fips)
    # pm_all_2011_unagg['county_name'] = pm_all_2011_unagg['countyfips'].apply(get_county_name_from_fips)
    return pm_all_2011_unagg

def aggregate_pm_data_unagg(pm_all_2011_unagg):
    pm_all_2011_unagg = pm_all_2011_unagg.groupby(by=['year', 'statefips', 'countyfips'])
    stds = pm_all_2011_unagg['ds_pm_pred'].std().reset_index().ds_pm_pred
    pm_all_2011_unagg = pm_all_2011_unagg.agg({'ds_pm_pred':'mean'}).reset_index() 
    pm_all_2011_unagg['ds_pm_stdd'] = stds
    return pm_all_2011_unagg

In [167]:
pm_all_2011_unagg = combine_pm_data_unagg()
pm_all_2011_agg = aggregate_pm_data_unagg(pm_all_2011_unagg)

In [169]:
demographic_data = pd.read_csv("data/demographic_data.csv")
demographic_data['County Name'] = [x[0] for x in demographic_data['County Name'].str.split(",")]

In [173]:
pm_all_2011_agg['county_name'] = pm_all_2011_agg.countyfips.apply(get_county_name_from_fips)

Maybe there are missing counties as a function of the variables I queried for from the ACS data? I need to explore the ACS data features in more depth. For now, we can work with the counties that are represented.

In [174]:
def merge_datasets(pm, dem):
    merged = pm.merge(right=dem, left_on="county_name", right_on="County Name")
    merged = merged.drop(['state', 'county', 'County Name'], axis=1)
    return merged

In [178]:
merged = merge_datasets(pm_all_2011_agg, demographic_data)
merged['state_name'] = merged.statefips.apply(get_state_name_from_fips)

In [179]:
merged.to_csv("data/merged.csv")