In [2]:
import pandas as pd
from source.getData import subtractDays

# Keeping the date dynamic:
today = pd.Timestamp("today").strftime("%Y-%m-%d")
# going back 30 days (31 days for the url $where=date> to work as intended to grab the data for last 30 days)
last30Days = subtractDays(today,31)

url = f'https://healthdata.gov/resource/j8mb-icvb.json?$where=date>\"{last30Days}\"&$limit=10000&$order=date%20DESC'
df = pd.read_json(url)

# Iterate over the data for each state and calculate positivity_rate
state_data = []
for current_state in df.state.unique():
    state = df.loc[(df.state == current_state)]
    state_name = state.iloc[0]['state_name']
    state_NewResultsReported = state['new_results_reported'].sum()
    state_pCases = state.loc[state.overall_outcome == 'Positive']['new_results_reported'].sum()
    state_pRate = (state_pCases/state_NewResultsReported)*100
    current_state_data = [current_state,state_name,state_NewResultsReported,state_pCases,state_pRate]
    state_data.append(current_state_data)

# Create a new data frame with the above calculated values
state_columns=['state','state_name','new_results_reported','positive_cases','positivity_rate']
state_df = pd.DataFrame(state_data, columns=state_columns)

# The 10 states with the highest test positivity rate (positive tests / tests performed) 
# for tests performed in the last 30 days.
state_df.sort_values(by='positivity_rate',ascending=False).head(10)

Unnamed: 0,state,state_name,new_results_reported,positive_cases,positivity_rate
30,VI,U.S. Virgin Islands,177,50,28.248588
50,MO,Missouri,20913,3865,18.481327
25,PR,Puerto Rico,21679,3972,18.321878
28,SD,South Dakota,9744,1673,17.16954
37,GU,Guam,1604,256,15.9601
42,WA,Washington,34762,4922,14.159139
35,WY,Wyoming,5006,581,11.606073
51,NM,New Mexico,17450,1944,11.140401
45,HI,Hawaii,19391,1994,10.283121
44,NV,Nevada,22114,2227,10.070544
