In [None]:
# general imports
import numpy as np
import pandas as pd
from urllib.request import urlopen
import json

import plotly.express as px

# Loading EPA Data

In [None]:
air_data_21 = pd.read_csv("./local_datasets/air_quality/daily_summaries/daily_no2_42602_2021.csv")
air_data_20 = pd.read_csv("./local_datasets/air_quality/daily_summaries/daily_no2_42602_2020.csv")
air_data_19 = pd.read_csv("./local_datasets/air_quality/daily_summaries/daily_no2_42602_2019.csv")

## Augmenting EPA Data

The EPA daily summary has the fips code broken apart into it's components. For ease of use I'll be adding it to the df

In [None]:
def FIPS_function(row):
    state = str(row['State Code']).zfill(2)
    county = str(row['County Code']).zfill(3)
    return state + county
# convert to NO2 ug/m^3 for reference
def no2_mass_by_vol(ppb):
    ugm3 = 1.88*ppb
    return ugm3


In [None]:
# adding a full fips code for simplicity and the average micrograms per liter 
air_data_20['fips'] = air_data_20.apply(lambda row: FIPS_function(row), axis=1)
air_data_20['Mean ugm3'] = air_data_20.apply(lambda row: no2_mass_by_vol(row['Arithmetic Mean']), axis=1)

# Same operations on 2021 data.
air_data_21['fips'] = air_data_21.apply(lambda row: FIPS_function(row), axis=1)
air_data_21['Mean ugm3'] = air_data_21.apply(lambda row: no2_mass_by_vol(row['Arithmetic Mean']), axis=1)

air_data_19['fips'] = air_data_19.apply(lambda row: FIPS_function(row), axis=1)
air_data_19['Mean ugm3'] = air_data_19.apply(lambda row: no2_mass_by_vol(row['Arithmetic Mean']), axis=1)
# Exporting for github upload.

#

In [None]:
air_data_20.to_csv("./data/air_quality/no2/daily_no2_2020_with_FIPS.csv",index=False)
air_data_19.to_csv("./data/air_quality/no2/daily_no2_2019_with_FIPS.csv",index=False)
air_data_21.to_csv("./data/air_quality/no2/daily_no2_2021_with_FIPS.csv",index=False)

## Can we make a single dataset for these?

In [None]:
len(air_data_20)

150k + rows is a lot, we might have to get clever with this stuff

## Setting up US county map

Documentation for Plotly's county map system available [here](https://plotly.com/python/choropleth-maps/)

In [None]:
# Pulling a GeoJSON file to give the geometry information for us counties, See docs for more details

#might want to pickle 
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
# feature.id is a FIPS code


In [None]:
# tweaked the plotly example to show where the monitoring sites are
fig = px.choropleth(air_data_20, geojson=counties, locations='fips', color='POC',
                           color_continuous_scale="Viridis",
                           range_color=(0, 12),
                           scope="usa",
                           # labels={'POC':'location of sites'}
                          )
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(
    autosize=False,
    width=800,
    height=800,)
fig.show()