# Local Covid-19 Metrics

## Gather Datasets

In [1]:

# init
import pandas as pd
import requests
import matplotlib.pyplot as plt
from IPython.display import display, HTML
from pandas import DataFrame

# set parameters
days_back = 120

# get content from web APIs
us = requests.get('https://covidtracking.com/api/v1/us/daily.json')
us = us.text
us = pd.read_json(us)

pa = requests.get('https://covidtracking.com/api/v1/states/pa/daily.json')
pa = pa.text
pa = pd.read_json(pa)

nj = requests.get('https://covidtracking.com/api/v1/states/nj/daily.json')
nj = nj.text
nj = pd.read_json(nj)

# we can only get the past 7 days for bucks county with this API
bucks_data = requests.get('https://localcoviddata.com/covid19/v1/cases/newYorkTimes?zipCode=19067&daysInPast=7')
bucks_data = bucks_data.text
bucks_data = pd.read_json(bucks_data)
bucks_data = bucks_data.counties[0].get("historicData")
bucks_data = DataFrame (bucks_data,columns=['date','deathCt','positiveCt'])
bucks_data['date'] = pd.to_datetime(pd.Series(bucks_data['date']), format="%Y-%m-%d")

# derive positive and death for bucks
diff = bucks_data[['deathCt','positiveCt']].diff(periods=-1)
bucks = bucks_data.join(diff, lsuffix='_caller', rsuffix='_other')
bucks = bucks.rename(columns={'deathCt_caller':'death_total'
                      ,'positiveCt_caller':'positive_total'
                      ,'deathCt_other':'death'
                      ,'positiveCt_other':'positive'})

# massage datasets

# make function for the covid tracking api since they are all the same
def clean_covidtracking_api_data(df):
    
    # get subset
    df = df[['date','positiveIncrease','totalTestResultsIncrease'
           ,'hospitalizedCurrently','death','deathIncrease','positive']]
    
    # Rename dataset columns
    df = df.rename(columns={'positiveIncrease':'positive'
                              ,'totalTestResultsIncrease': 'tests'
                              ,'hospitalizedCurrently': 'hospitalized'
                              ,'death': 'death_total'
                              ,'deathIncrease': 'death'
                              ,'positive': 'positive_total'})
    
    # Harmonize date column
    df['date'] = df['date'].astype(str)
    df['date'] = pd.to_datetime(pd.Series(df['date']), format="%Y%m%d")
    
    # Derived Columns
    # Positivity Rate
    positive = df.loc[:,['positive']].values[0:]
    tests = df.loc[:,['tests']].values[0:]
    df['pos_rate'] = positive / tests * 100

    # Death Rate
    deaths = df.loc[:,['death_total']].values[0:]
    positive = df.loc[:,['positive_total']].values[0:]
    df['death_rate'] = (deaths / positive) * 100
    df['death_rate_est'] = (deaths / (positive * 10)) * 100
    
    return df

# fix individual files
pa = clean_covidtracking_api_data(pa)
nj = clean_covidtracking_api_data(nj)
us = clean_covidtracking_api_data(us)

# Make compact summary dataset
a = pa[['date','positive','pos_rate','hospitalized']]
b = nj[['date','positive','pos_rate','hospitalized']]
c = bucks[['date','positive']]
d = a.set_index('date').join(b.set_index('date'), lsuffix='_pa', rsuffix='_nj')
# e = a.set_index('date').join(c.set_index('date'))

# todo: message bucks county dataset, fix date type, add + count delta



# todo: harmonize all four datasets

# todo: combine most important datapoints into one dataframe for all four entities



In [3]:
#e = d.set_index('date').join(c.set_index('date'))
d

Unnamed: 0_level_0,positive_pa,pos_rate_pa,hospitalized_pa,positive_nj,pos_rate_nj,hospitalized_nj
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-08-06,807,5.197733,663.0,374,0.865200,754.0
2020-08-05,705,5.342528,639.0,357,1.616701,784.0
2020-08-04,854,5.708556,656.0,356,100.000000,470.0
2020-08-03,565,4.708333,585.0,264,1.411614,738.0
2020-08-02,654,5.340083,564.0,321,0.652704,695.0
...,...,...,...,...,...,...
2020-03-10,2,100.000000,,4,30.769231,
2020-03-09,4,100.000000,,5,55.555556,
2020-03-08,2,100.000000,,2,6.060606,
2020-03-07,2,100.000000,,3,100.000000,


In [None]:
bucks
# change political entity dataset to select visualizations
# (stopgap for now until we have better organized data)

# political_entity = pa

# view what we have for bucks


In [None]:

display(political_entity.loc[[0,1,2,3,4,5,6], ['date','positive','tests','pos_rate']])


## Show Hospital Rates

In [None]:

display(political_entity.loc[[0,1,2,3,4,5,6], ['date','hospitalized','death','death_rate','death_rate_est']])


In [None]:

subset_df = political_entity.loc[:,['positive']]
arr = subset_df[:].to_numpy()
reversed_arr = arr[::-1]
reversed_arr = reversed_arr[-days_back:]
plt.plot(reversed_arr[0:], label = 'positive')

subset_df = political_entity.loc[:,['hospitalized']]
arr = subset_df[:].to_numpy()
reversed_arr = arr[::-1]
reversed_arr = reversed_arr[-days_back:]
plt.plot(reversed_arr[0:], label = 'hospitalized')

subset_df = political_entity.loc[:,['death']]
arr = subset_df[:].to_numpy()
reversed_arr = arr[::-1]
reversed_arr = reversed_arr[-days_back:]
plt.plot(reversed_arr[0:], label = 'death')

plt.legend()
plt.show()


In [None]:
subset_df = political_entity.loc[:,['death']]
arr = subset_df[:].to_numpy()
reversed_arr = arr[::-1]
reversed_arr = reversed_arr[-days_back:]
plt.plot(reversed_arr[0:], label = 'death')

plt.legend()
plt.show()

In [None]:
subset_df = political_entity.loc[:,['pos_rate']]
arr = subset_df[:].to_numpy()
reversed_arr = arr[::-1]
reversed_arr = reversed_arr[-days_back:]
plt.plot(reversed_arr[0:], label = 'pos_rate')

plt.legend()
plt.show()