In [5]:
# Dependencies
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# CovidActNow API Key
from config import api_key
# Census API Key
from config import census_key

In [6]:
# Create State population dataframe

# Specify the Census URL
census_url = "https://api.census.gov/data/2021/pep/population?"
# Build Census Pops by state query URL
census_query_url = f"{census_url}get=DENSITY_2021,POP_2021,NAME,STATE&for=state:*&key={census_key}"

In [7]:
census_query_url

'https://api.census.gov/data/2021/pep/population?get=DENSITY_2021,POP_2021,NAME,STATE&for=state:*&key=1323acd93c160d42f2cd9cd97339cf3439681469'

In [8]:
# Make state pops data request and save to a dataframe
pops_response = requests.get(census_query_url)
pops_json = pops_response.json()
row_counter = len(pops_json)
pops_data = []
for row in range(1, row_counter):
    pops_data.append(pops_json[row])
pops_data_df = pd.DataFrame(pops_data, columns = pops_json[0])

JSONDecodeError: Expecting value: line 2 column 1 (char 1)

In [None]:
pops_data_df.head()

In [12]:
pops_response.url

'https://api.census.gov/data/invalid_key.html'

In [None]:
# Create new dataframe with only the needed data columns
reduced_pops_data_df = pops_data_df[["NAME", "POP_2021"]]

In [None]:
reduced_pops_data_df.head()

In [None]:
# Create dataframe that contains state name and ID
state_id = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY",
            "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH",
            "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
state_list = ["Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", 
              "District of Columbia", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas",
              "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi",
              "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", 
              "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Puerto Rico", "Rhode Island", 
              "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", 
              "West Virginia", "Wisconsin", "Wyoming"]
state_id_df = pd.DataFrame({"NAME" : state_list,
                            "State" : state_id})

In [None]:
state_id_df.head()

In [None]:
# Combine the data into a single dataset
state_pops_data_df = pd.merge(state_id_df, reduced_pops_data_df, on = "NAME", how = "left", sort = False)

In [None]:
state_pops_data_df

In [None]:
# Create Covid dataframe

# Specify the Covid URL
covid_url = "https://api.covidactnow.org/v2/states.timeseries.csv?apiKey="
# Build CovidActNow query URL
covid_query_url = f"{covid_url}{api_key}"

In [None]:
covid_query_url

In [None]:
# Make Covid data request and save to a CSV file
covid_url_response = requests.get(covid_query_url)
covid_url_content = covid_url_response.content
csv_file = open('covid_data.csv', 'wb')
csv_file.write(covid_url_content)
csv_file.close()

In [None]:
# Path to the Covid data file
covid_data_path = "covid_data.csv"
# Read the CSV file and create a dataframe
covid_data_df = pd.read_csv(covid_data_path)

In [None]:
covid_data_df

In [None]:
# Create new dataframe with only the needed data columns
clean_covid_df = covid_data_df[["date" , 
                                "state", 
                                "actuals.cases", 
                                "actuals.newCases", 
                                "actuals.deaths", 
                                "actuals.newDeaths", 
                                "actuals.positiveTests", 
                                "actuals.negativeTests",  
                                "actuals.hospitalBeds.capacity", 
                                "actuals.hospitalBeds.currentUsageCovid",
                                "actuals.icuBeds.capacity", 
                                "actuals.icuBeds.currentUsageCovid",
                                "actuals.vaccinesDistributed", 
                                "actuals.vaccinesAdministered",
                                "actuals.vaccinationsInitiated", 
                                "actuals.vaccinationsCompleted",  
                                "actuals.vaccinationsAdditionalDose"]]

In [None]:
clean_covid_df

In [None]:
# Rename columns for easier viewing
clean_covid_df = clean_covid_df.rename(columns={"date" : "Date", 
                                                "state" : "State", 
                                                "actuals.cases" : "Total_Cases",
                                                "actuals.newCases" : "New_Cases", 
                                                "actuals.deaths" : "Total_Deaths",
                                                "actuals.newDeaths" : "New_Deaths", 
                                                "actuals.positiveTests" : "Total_Positives",
                                                "actuals.negativeTests" : "Total_Negatives", 
                                                "actuals.hospitalBeds.capacity": "Total_Bed",
                                                "actuals.hospitalBeds.currentUsageCovid" : "Current_Bed",
                                                "actuals.icuBeds.capacity" : "Total_ICU", 
                                                "actuals.icuBeds.currentUsageCovid" : "Current_ICU",
                                                "actuals.vaccinesDistributed" : "Vaccine_Dist", 
                                                "actuals.vaccinesAdministered" : "Vaccine_Admin",
                                                "actuals.vaccinationsInitiated" : "Vaccine_Int", 
                                                "actuals.vaccinationsCompleted" : "Vaccine_Complete",
                                                "actuals.vaccinationsAdditionalDose" : "Add_Dose"})

In [None]:
clean_covid_df

In [None]:
# Filter for the data from 12/31/20 to most recent data
clean_covid_df = clean_covid_df.loc[(clean_covid_df["Date"] > "2020-12-30") &
                                    (clean_covid_df["Date"] < "2022-01-01"), :]

In [None]:
clean_covid_df

In [None]:
# Create "Year", "Month" and "Day" columns based on the "Date" column
clean_covid_df["Year"] = pd.DatetimeIndex(clean_covid_df["Date"]).year
clean_covid_df["Month"] = pd.DatetimeIndex(clean_covid_df["Date"]).month
clean_covid_df["Day"] = pd.DatetimeIndex(clean_covid_df["Date"]).day

In [None]:
# Move "Year", "Month" and "Day" columns to the front of the dataframe
year_column = clean_covid_df.pop("Year")
month_column = clean_covid_df.pop("Month")
day_column = clean_covid_df.pop("Day")
clean_covid_df.insert(0, "Year", year_column)
clean_covid_df.insert(1, "Month", month_column)
clean_covid_df.insert(2, "Day", day_column)

In [None]:
#Replace NaN values with space
clean_covid_df = clean_covid_df.fillna(0)

In [None]:
clean_covid_df

In [None]:
monthly_data1_df = clean_covid_df[["State",
                                   "Year", 
                                   "Month", 
                                   "Day", 
                                   "Total_Cases", 
                                   "Total_Deaths", 
                                   "Total_Positives",
                                   "Total_Negatives", 
                                   "Vaccine_Dist",
                                   "Vaccine_Admin", 
                                   "Vaccine_Int",
                                   "Vaccine_Complete", 
                                   "Add_Dose"]]

In [None]:
monthly_data1_df

In [None]:
# Sort the data using "State"(ascending), "Year"(ascending), "Month"(ascending)  and "Day" (descending) columns 
sorted_monthly_data1_df = monthly_data1_df.sort_values(by = ["State", "Year", "Month", "Day"], 
                                                         ascending = [True, True, True, False])


In [None]:
sorted_monthly_data1_df

In [None]:
# Keep only the data for the last day of each month, remove all other days
final_monthly_data1_df = sorted_monthly_data1_df.drop_duplicates(subset = ["State", "Year", "Month"],
                                                                  keep = "first").reset_index(drop = True)

In [None]:
final_monthly_data1_df

In [None]:
final_monthly_data1_df[["Monthly Cases", "Monthly Deaths", "Monthly Positive", "Monthly Negatives", 
                       "Monthly Vaccine Completed"]]=""

In [None]:
final_monthly_data1_df

In [None]:
row_counter = len(final_monthly_data1_df)
row_counter

In [None]:
for x in range(1, row_counter):
    final_monthly_data1_df.iloc[x, 13] = final_monthly_data1_df.iloc[x, 4] - final_monthly_data1_df.iloc[x-1, 4]
    final_monthly_data1_df.iloc[x, 14] = final_monthly_data1_df.iloc[x, 5] - final_monthly_data1_df.iloc[x-1, 5]
    final_monthly_data1_df.iloc[x, 15] = final_monthly_data1_df.iloc[x, 6] - final_monthly_data1_df.iloc[x-1, 6]
    final_monthly_data1_df.iloc[x, 16] = final_monthly_data1_df.iloc[x, 7] - final_monthly_data1_df.iloc[x-1, 7]
    final_monthly_data1_df.iloc[x, 17] = final_monthly_data1_df.iloc[x, 11] - final_monthly_data1_df.iloc[x-1, 11]

In [None]:
 final_monthly_data1_df

In [None]:
sorted_monthly_data1_df.to_csv("Final_COVID_Data.csv")