In [None]:
# pip install psycopg2-binary in mlenv
# Create config.py file with postgres password as db_password
# Create postgres database called COVID_Risk_Final_Project on POstgreSQL 13 server

In [1]:
# Import the requests library.
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from config import connection_string
import pandas as pd
import psycopg2 as pg
from flask import Flask, jsonify, render_template
from functools import reduce
import numpy as np

In [2]:
# Read vaccination data into dataframe
vax_data = 'https://group3data.s3.us-west-2.amazonaws.com/COVID-19_Vaccinations_in_the_United_States_County.csv'
vax_data_df = pd.read_csv(vax_data)
vax_data_df

Unnamed: 0,Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,...,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI,Metro_status,Series_Complete_Pop_Pct_UR_Equity,Series_Complete_12PlusPop_Pct_UR_Equity,Series_Complete_18PlusPop_Pct_UR_Equity,Series_Complete_65PlusPop_Pct_UR_Equity
0,11/06/2021,21097,44,Harrison County,KY,43.2,8163,8163.0,50.7,7772,...,C,11.0,12.0,12.0,12.0,Non-metro,7.0,8.0,8.0,8.0
1,11/06/2021,18105,44,Monroe County,IN,54.7,81158,81155.0,61.0,76819,...,B,8.0,8.0,8.0,8.0,Metro,4.0,4.0,4.0,4.0
2,11/06/2021,46023,44,Charles Mix County,SD,47.3,4399,4398.0,59.8,4119,...,C,11.0,12.0,12.0,12.0,Non-metro,7.0,8.0,8.0,8.0
3,11/06/2021,41045,44,Malheur County,OR,42.2,12893,12884.0,51.1,12170,...,D,15.0,16.0,16.0,16.0,Non-metro,7.0,8.0,8.0,8.0
4,11/06/2021,39127,44,Perry County,OH,36.9,13339,13339.0,43.5,12823,...,C,10.0,11.0,11.0,12.0,Metro,2.0,3.0,3.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1079793,12/13/2020,45045,51,Greenville County,SC,0.0,0,0.0,0.0,0,...,C,,,,,Metro,,,,
1079794,12/13/2020,05145,51,White County,AR,0.0,0,0.0,0.0,0,...,C,,,,,Non-metro,,,,
1079795,12/13/2020,55025,51,Dane County,WI,0.0,0,0.0,0.0,0,...,A,,,,,Metro,,,,
1079796,12/13/2020,18109,51,Morgan County,IN,0.0,0,0.0,0.0,0,...,A,,,,,Metro,,,,


In [3]:
drop_list_vax = ['MMWR_week', 'Recip_County', 'Recip_State', 'SVI_CTGY', 'Series_Complete_Pop_Pct_SVI', 
                 'Series_Complete_12PlusPop_Pct_SVI', 'Series_Complete_18PlusPop_Pct_SVI', 
                 'Series_Complete_65PlusPop_Pct_SVI', 'Metro_status', 'Series_Complete_Pop_Pct_UR_Equity', 
                 'Series_Complete_12PlusPop_Pct_UR_Equity', 'Series_Complete_18PlusPop_Pct_UR_Equity', 
                 'Series_Complete_65PlusPop_Pct_UR_Equity', 'Administered_Dose1_Recip_18Plus', 'Series_Complete_12Plus', 
                 'Series_Complete_12PlusPop_Pct', 'Administered_Dose1_Recip', 'Administered_Dose1_Recip_12Plus', 
                 'Administered_Dose1_Recip_12PlusPop_Pct', 'Administered_Dose1_Recip_65Plus']

In [4]:
# Remove drop_list columns from df
vax_data_df.drop(drop_list_vax, axis=1, inplace=True)
vax_data_df.head()

Unnamed: 0,Date,FIPS,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65PlusPop_Pct
0,11/06/2021,21097,43.2,8163,7772,53.2,2708,79.4,94.0,48.1,59.1,84.9
1,11/06/2021,18105,54.7,81158,76819,61.3,17297,86.0,98.7,57.9,64.7,89.0
2,11/06/2021,46023,47.3,4399,4119,63.2,1409,81.6,96.3,59.9,78.3,90.0
3,11/06/2021,41045,42.2,12893,12170,53.7,4168,81.1,97.8,47.2,59.9,88.0
4,11/06/2021,39127,36.9,13339,12823,46.4,4555,74.2,98.6,38.7,48.6,77.6


In [5]:
# Convert date column to datetime
vax_data_df['Date'] = pd.to_datetime(vax_data_df['Date'])

In [6]:
# Set each column to a string value so we can concatonate them together 
vax_data_df['FIPS'] = vax_data_df['FIPS'].astype('str')
vax_data_df['Date'] = vax_data_df['Date'].astype('str')

In [7]:
# Concatonate the date column and the fips column to create fips_date
vax_data_df['fips_date'] = vax_data_df['FIPS'] + vax_data_df['Date']
vax_data_df

Unnamed: 0,Date,FIPS,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65PlusPop_Pct,fips_date
0,2021-11-06,21097,43.2,8163,7772,53.2,2708,79.4,94.0,48.1,59.1,84.9,210972021-11-06
1,2021-11-06,18105,54.7,81158,76819,61.3,17297,86.0,98.7,57.9,64.7,89.0,181052021-11-06
2,2021-11-06,46023,47.3,4399,4119,63.2,1409,81.6,96.3,59.9,78.3,90.0,460232021-11-06
3,2021-11-06,41045,42.2,12893,12170,53.7,4168,81.1,97.8,47.2,59.9,88.0,410452021-11-06
4,2021-11-06,39127,36.9,13339,12823,46.4,4555,74.2,98.6,38.7,48.6,77.6,391272021-11-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1079793,2020-12-13,45045,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,450452020-12-13
1079794,2020-12-13,05145,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,051452020-12-13
1079795,2020-12-13,55025,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,550252020-12-13
1079796,2020-12-13,18109,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,181092020-12-13


In [8]:
# Sort by fips_date
vax_data_df.sort_values(by=['fips_date'])

Unnamed: 0,Date,FIPS,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65PlusPop_Pct,fips_date
1079754,2020-12-13,01001,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,010012020-12-13
1075917,2020-12-14,01001,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,010012020-12-14
1071696,2020-12-15,01001,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,010012020-12-15
1067633,2020-12-16,01001,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,010012020-12-16
1065713,2020-12-17,01001,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,010012020-12-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2952,2021-11-06,UNK,0.0,163735,158310,0.0,39580,0.0,92.6,0.0,0.0,0.0,UNK2021-11-06
2941,2021-11-06,UNK,0.0,379701,333208,0.0,101590,0.0,93.6,0.0,0.0,0.0,UNK2021-11-06
2664,2021-11-06,UNK,0.0,15721,14428,0.0,1714,0.0,0.0,0.0,0.0,0.0,UNK2021-11-06
2742,2021-11-06,UNK,0.0,2160207,2095932,0.0,533935,0.0,58.1,0.0,0.0,0.0,UNK2021-11-06


In [9]:
# Eliminate '0' from fips_date columns that begin with '0'
cols_to_check = ['fips_date']
for col in cols_to_check:
    vax_data_df[col] = vax_data_df[col].apply(lambda x : x[1:] if x.startswith("0") else x)

In [10]:
vax_data_df.sort_values(by=['fips_date'])

Unnamed: 0,Date,FIPS,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65PlusPop_Pct,fips_date
1079237,2020-12-13,10001,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,100012020-12-13
1073653,2020-12-14,10001,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,100012020-12-14
1070840,2020-12-15,10001,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,100012020-12-15
1069915,2020-12-16,10001,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,100012020-12-16
1063782,2020-12-17,10001,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,100012020-12-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2941,2021-11-06,UNK,0.0,379701,333208,0.0,101590,0.0,93.6,0.0,0.0,0.0,UNK2021-11-06
353,2021-11-06,UNK,0.0,33674,32944,0.0,8929,0.0,96.5,0.0,0.0,0.0,UNK2021-11-06
1550,2021-11-06,UNK,0.0,9206,9004,0.0,2436,0.0,96.4,0.0,0.0,0.0,UNK2021-11-06
2742,2021-11-06,UNK,0.0,2160207,2095932,0.0,533935,0.0,58.1,0.0,0.0,0.0,UNK2021-11-06


In [11]:
# Read NYT data into dataframe
nyt_data = 'https://group3data.s3.us-west-2.amazonaws.com/us-counties.csv'
nyt_data_df = pd.read_csv(nyt_data)
nyt_data_df

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0
...,...,...,...,...,...,...
1888090,2021-11-05,Sweetwater,Wyoming,56037.0,7695,91.0
1888091,2021-11-05,Teton,Wyoming,56039.0,5243,14.0
1888092,2021-11-05,Uinta,Wyoming,56041.0,3901,27.0
1888093,2021-11-05,Washakie,Wyoming,56043.0,1780,35.0


In [12]:
# Convert the date column in the NYT data to a datetime object using pd.to_datetime
nyt_data_df['date'] = pd.to_datetime(nyt_data_df['date'])
nyt_data_df.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0


In [13]:
# 1. Remove the data for dates before 7-31-20 and after 10-22-21
# 2. Bin the date column by weeks ending every friday
# 3. Sum the cases and deaths columns for each week binned

# Step 1:
nyt_data_filtered = nyt_data_df[nyt_data_df['date'] >= '2020-07-25']
nyt_data_filtered = nyt_data_filtered[nyt_data_filtered['date'] <= '2021-10-22']
nyt_data_filtered

Unnamed: 0,date,county,state,fips,cases,deaths
366652,2020-07-25,Autauga,Alabama,1001.0,932,21.0
366653,2020-07-25,Baldwin,Alabama,1003.0,2662,18.0
366654,2020-07-25,Barbour,Alabama,1005.0,552,4.0
366655,2020-07-25,Bibb,Alabama,1007.0,318,2.0
366656,2020-07-25,Blount,Alabama,1009.0,637,1.0
...,...,...,...,...,...,...
1842600,2021-10-22,Sweetwater,Wyoming,56037.0,7414,79.0
1842601,2021-10-22,Teton,Wyoming,56039.0,5119,14.0
1842602,2021-10-22,Uinta,Wyoming,56041.0,3781,26.0
1842603,2021-10-22,Washakie,Wyoming,56043.0,1676,33.0


In [14]:
# Step 2:
# Step 3:

nyt_data_grouped = nyt_data_filtered.groupby(['fips', pd.Grouper(key='date', freq='W-FRI')]).agg({'cases':'sum','deaths':'sum'}).reset_index()
nyt_data_grouped

Unnamed: 0,fips,date,cases,deaths
0,1001.0,2020-07-31,6817,147.0
1,1001.0,2020-08-07,7503,150.0
2,1001.0,2020-08-14,8531,158.0
3,1001.0,2020-08-21,9009,161.0
4,1001.0,2020-08-28,9484,161.0
...,...,...,...,...
208977,78030.0,2021-09-24,24720,297.0
208978,78030.0,2021-10-01,24982,304.0
208979,78030.0,2021-10-08,25204,308.0
208980,78030.0,2021-10-15,25290,311.0


In [15]:
# Convert fips to integer
columns = ['fips']
nyt_data_grouped[columns] = nyt_data_grouped[columns].applymap(np.int64)
nyt_data_grouped

Unnamed: 0,fips,date,cases,deaths
0,1001,2020-07-31,6817,147.0
1,1001,2020-08-07,7503,150.0
2,1001,2020-08-14,8531,158.0
3,1001,2020-08-21,9009,161.0
4,1001,2020-08-28,9484,161.0
...,...,...,...,...
208977,78030,2021-09-24,24720,297.0
208978,78030,2021-10-01,24982,304.0
208979,78030,2021-10-08,25204,308.0
208980,78030,2021-10-15,25290,311.0


In [16]:
# Set each column to a string value so we can concatonate them together 
nyt_data_grouped['fips'] = nyt_data_grouped['fips'].astype('str')
nyt_data_grouped['date'] = nyt_data_grouped['date'].astype('str')
nyt_data_grouped['fips'].dtype

dtype('O')

In [17]:
# Concatonate the date column and the fips column to create fips_date
nyt_data_grouped['fips_date'] = nyt_data_grouped['fips'] + nyt_data_grouped['date']
nyt_data_grouped

Unnamed: 0,fips,date,cases,deaths,fips_date
0,1001,2020-07-31,6817,147.0,10012020-07-31
1,1001,2020-08-07,7503,150.0,10012020-08-07
2,1001,2020-08-14,8531,158.0,10012020-08-14
3,1001,2020-08-21,9009,161.0,10012020-08-21
4,1001,2020-08-28,9484,161.0,10012020-08-28
...,...,...,...,...,...
208977,78030,2021-09-24,24720,297.0,780302021-09-24
208978,78030,2021-10-01,24982,304.0,780302021-10-01
208979,78030,2021-10-08,25204,308.0,780302021-10-08
208980,78030,2021-10-15,25290,311.0,780302021-10-15


In [18]:
# Read Hospital data (healthdata.gov) into dataframe
hospital_data = 'https://group3data.s3.us-west-2.amazonaws.com/COVID-19_Reported_Patient_Impact_and_Hospital_Capacity_by_Facility.csv'
hospital_data_df = pd.read_csv(hospital_data)
hospital_data_df

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,...,previous_day_admission_pediatric_covid_confirmed_7_day_coverage,previous_day_admission_adult_covid_suspected_7_day_coverage,previous_day_admission_pediatric_covid_suspected_7_day_coverage,previous_week_personnel_covid_vaccinated_doses_administered_7_day,total_personnel_covid_vaccinated_doses_none_7_day,total_personnel_covid_vaccinated_doses_one_7_day,total_personnel_covid_vaccinated_doses_all_7_day,previous_week_patients_covid_vaccinated_doses_one_7_day,previous_week_patients_covid_vaccinated_doses_all_7_day,is_corrected
0,140158,2021/10/15,IL,140158,INSIGHT HOSPITAL AND MEDICAL CENTER CHICAGO,2525 S MICHIGAN AVE,CHICAGO,60616.0,Short Term,17031.0,...,7,7,7,0.0,,,,0.0,,False
1,450162,2021/10/15,TX,450162,GRACE SURGICAL HOSPITAL,2412 50TH ST,LUBBOCK,79412.0,Short Term,48303.0,...,7,7,7,,,,,,,False
2,451318,2021/10/15,TX,451318,STONEWALL MEMORIAL HOSPITAL DISTRICT,821 NORTH BROADWAY,ASPERMONT,79502.0,Critical Access Hospitals,48433.0,...,7,7,7,,,,,,,False
3,050769,2021/10/08,CA,050769,HOAG ORTHOPEDIC INSTITUTE,16250 SAND CANYON AVENUE,IRVINE,92618.0,Short Term,6059.0,...,7,7,7,,,,,,,False
4,291500,2021/10/08,NV,291500,NATHAN ADELSON HOSPICE,4141 UNIVERSITY CENTER DR,LAS VEGAS,89119.0,Short Term,32003.0,...,7,7,7,,,,,,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324743,100291,2020/08/07,FL,100291,STEWARD REGIONAL MEDICAL CENTER,250 NORTH WICKHAM ROAD,MELBOURNE,32935.0,Short Term,12009.0,...,7,7,7,,,,,,,False
324744,250096,2020/07/31,MS,250096,MERIT HEALTH RANKIN,350 CROSSGATES BLVD,BRANDON,39042.0,Short Term,28121.0,...,0,0,0,,,,,,,False
324745,241345,2020/07/31,MN,241345,MAYO CLINIC HEALTH SYSTEM-WASECA,501 NORTH STATE STREET,WASECA,56093.0,Critical Access Hospitals,27161.0,...,7,7,7,,,,,,,False
324746,271311,2020/08/07,MT,271311,BIG SANDY MEDICAL CENTER,166 MONTANA AVE E,BIG SANDY,59520.0,Critical Access Hospitals,30015.0,...,1,1,1,,,,,,,False


In [19]:
# Create list of columns to drop
drop_list_hospitals = ['hospital_pk', 'state', 'ccn', 'hospital_name', 'address', 'city', 'zip', 'hospital_subtype', 
                       'is_metro_micro', 'total_beds_7_day_avg', 'all_adult_hospital_beds_7_day_avg', 
                       'all_adult_hospital_inpatient_beds_7_day_avg', 'inpatient_beds_used_7_day_avg', 
                       'all_adult_hospital_inpatient_bed_occupied_7_day_avg', 
                       'total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg', 
                       'total_adult_patients_hospitalized_confirmed_covid_7_day_avg', 
                       'total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg', 
                       'total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg', 'inpatient_beds_7_day_avg', 
                       'total_icu_beds_7_day_avg', 'total_staffed_adult_icu_beds_7_day_avg', 'icu_beds_used_7_day_avg', 
                       'staffed_adult_icu_bed_occupancy_7_day_avg', 
                       'staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_avg', 
                       'staffed_icu_adult_patients_confirmed_covid_7_day_avg', 
                       'total_patients_hospitalized_confirmed_influenza_7_day_avg', 
                       'icu_patients_confirmed_influenza_7_day_avg', 
                       'total_patients_hospitalized_confirmed_influenza_and_covid_7_day_avg', 
                       'all_adult_hospital_inpatient_beds_7_day_coverage', 
                       'inpatient_beds_used_7_day_coverage', 'all_adult_hospital_inpatient_bed_occupied_7_day_coverage', 
                       'total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage', 
                       'total_adult_patients_hospitalized_confirmed_covid_7_day_coverage', 
                       'total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage', 
                       'total_pediatric_patients_hospitalized_confirmed_covid_7_day_coverage', 
                       'inpatient_beds_7_day_coverage', 'total_icu_beds_7_day_coverage', 
                       'total_staffed_adult_icu_beds_7_day_coverage', 'icu_beds_used_7_day_coverage', 
                       'staffed_adult_icu_bed_occupancy_7_day_coverage', 
                       'staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage', 
                       'staffed_icu_adult_patients_confirmed_covid_7_day_coverage', 
                       'total_patients_hospitalized_confirmed_influenza_7_day_coverage', 
                       'icu_patients_confirmed_influenza_7_day_coverage', 
                       'total_patients_hospitalized_confirmed_influenza_and_covid_7_day_coverage', 
                       'previous_day_admission_adult_covid_confirmed_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_18-19_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_20-29_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_30-39_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_40-49_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_50-59_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_60-69_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_70-79_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_80+_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_unknown_7_day_sum', 
                       'previous_day_admission_pediatric_covid_confirmed_7_day_sum', 
                       'previous_day_covid_ED_visits_7_day_sum', 'previous_day_admission_adult_covid_suspected_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_18-19_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_20-29_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_30-39_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_40-49_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_50-59_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_60-69_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_70-79_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_80+_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_unknown_7_day_sum', 
                       'previous_day_admission_pediatric_covid_suspected_7_day_sum', 
                       'previous_day_total_ED_visits_7_day_sum', 'previous_day_admission_influenza_confirmed_7_day_sum', 
                       'geocoded_hospital_address', 'hhs_ids', 
                       'previous_day_admission_adult_covid_confirmed_7_day_coverage', 
                       'previous_day_admission_pediatric_covid_confirmed_7_day_coverage', 
                       'previous_day_admission_adult_covid_suspected_7_day_coverage', 
                       'previous_day_admission_pediatric_covid_suspected_7_day_coverage', 
                       'previous_week_personnel_covid_vaccinated_doses_administered_7_day', 
                       'total_personnel_covid_vaccinated_doses_none_7_day', 
                       'total_personnel_covid_vaccinated_doses_one_7_day', 
                       'total_personnel_covid_vaccinated_doses_all_7_day', 
                       'previous_week_patients_covid_vaccinated_doses_one_7_day', 
                       'previous_week_patients_covid_vaccinated_doses_all_7_day', 'is_corrected', 
                       'total_beds_7_day_coverage', 'all_adult_hospital_beds_7_day_coverage',
                       'total_patients_hospitalized_confirmed_influenza_7_day_sum', 
                       'icu_patients_confirmed_influenza_7_day_sum', 
                       'total_patients_hospitalized_confirmed_influenza_and_covid_7_day_sum'
]

In [20]:
# Remove drop_list columns from df
hospital_data_df.drop(drop_list_hospitals, axis=1, inplace=True)
hospital_data_df.head()

Unnamed: 0,collection_week,fips_code,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum
0,2021/10/15,17031.0,350.0,350.0,252.0,160.0,160.0,7.0,7.0,0.0,0.0,252.0,28.0,28.0,16.0,16.0,7.0,7.0
1,2021/10/15,48303.0,268.0,268.0,261.0,34.0,34.0,0.0,0.0,0.0,0.0,261.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2021/10/15,48433.0,77.0,77.0,63.0,0.0,0.0,0.0,0.0,0.0,0.0,63.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2021/10/08,6059.0,245.0,245.0,133.0,126.0,126.0,0.0,0.0,0.0,0.0,133.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2021/10/08,32003.0,266.0,266.0,266.0,191.0,191.0,11.0,11.0,0.0,0.0,266.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
# Convert the date column in the hospital data to a datetime object using pd.to_datetime
hospital_data_df['collection_week'] = pd.to_datetime(hospital_data_df['collection_week'])
# Check to verify the date is now a datetime object
hospital_data_df.dtypes

collection_week                                                                  datetime64[ns]
fips_code                                                                               float64
total_beds_7_day_sum                                                                    float64
all_adult_hospital_beds_7_day_sum                                                       float64
all_adult_hospital_inpatient_beds_7_day_sum                                             float64
inpatient_beds_used_7_day_sum                                                           float64
all_adult_hospital_inpatient_bed_occupied_7_day_sum                                     float64
total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum               float64
total_adult_patients_hospitalized_confirmed_covid_7_day_sum                             float64
total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum           float64
total_pediatric_patients_hospitalized_co

In [22]:
# Sort the hospital data by date
hospital_data_df = hospital_data_df.sort_values(by='collection_week')
hospital_data_df

# The hospital data is binned by collection week ending on every friday

Unnamed: 0,collection_week,fips_code,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum
324747,2020-07-31,13013.0,266.0,,,199.0,,0.0,,-999999.0,,266.0,0.0,,0.0,,,
319367,2020-07-31,21015.0,1062.0,1062.0,1062.0,949.0,949.0,-999999.0,-999999.0,0.0,0.0,1062.0,126.0,108.0,109.0,109.0,-999999.0,-999999.0
319371,2020-07-31,12099.0,3332.0,2710.0,2710.0,1873.0,1859.0,266.0,266.0,0.0,0.0,3206.0,287.0,287.0,178.0,178.0,41.0,41.0
319374,2020-07-31,19049.0,102.0,126.0,84.0,-999999.0,9.0,4.0,-999999.0,0.0,0.0,84.0,0.0,0.0,0.0,0.0,0.0,0.0
319375,2020-07-31,6037.0,824.0,404.0,341.0,296.0,296.0,5.0,0.0,0.0,0.0,341.0,20.0,20.0,18.0,18.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10378,2021-10-22,37005.0,42.0,42.0,42.0,26.0,26.0,-999999.0,-999999.0,0.0,0.0,42.0,0.0,0.0,0.0,0.0,0.0,0.0
10377,2021-10-22,48039.0,15.0,15.0,9.0,4.0,4.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0
6377,2021-10-22,20019.0,154.0,154.0,126.0,10.0,10.0,0.0,0.0,0.0,0.0,126.0,0.0,0.0,0.0,0.0,0.0,0.0
10372,2021-10-22,21125.0,1421.0,864.0,864.0,658.0,658.0,158.0,158.0,0.0,0.0,864.0,336.0,336.0,229.0,229.0,71.0,71.0


In [23]:
# Group and sort hospital_data_df by fips_code
hospital_data_df = hospital_data_df.groupby(['fips_code', pd.Grouper(key='collection_week', freq='W-FRI')]).agg({'total_beds_7_day_sum':'sum','all_adult_hospital_beds_7_day_sum':'sum', 'all_adult_hospital_inpatient_beds_7_day_sum':'sum', 'inpatient_beds_used_7_day_sum':'sum', 'all_adult_hospital_inpatient_bed_occupied_7_day_sum':'sum', 
                                                                                                                       'total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum':'sum', 'total_adult_patients_hospitalized_confirmed_covid_7_day_sum': 'sum', 'total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum':'sum', 'total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum':'sum', 'inpatient_beds_7_day_sum':'sum', 'total_icu_beds_7_day_sum':'sum', 
                                                                                                                       'total_staffed_adult_icu_beds_7_day_sum':'sum', 'icu_beds_used_7_day_sum':'sum', 'staffed_adult_icu_bed_occupancy_7_day_sum':'sum', 'staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum':'sum', 'staffed_icu_adult_patients_confirmed_covid_7_day_sum':'sum'}).reset_index()
hospital_data_df

Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum
0,1001.0,2020-07-31,574.0,574.0,455.0,363.0,363.0,117.0,103.0,-999999.0,0.0,455.0,42.0,42.0,40.0,40.0,28.0,26.0
1,1001.0,2020-08-07,574.0,574.0,455.0,369.0,369.0,153.0,139.0,-999999.0,-999999.0,455.0,42.0,42.0,41.0,41.0,38.0,37.0
2,1001.0,2020-08-14,574.0,574.0,455.0,384.0,384.0,125.0,111.0,0.0,-999999.0,455.0,42.0,42.0,42.0,42.0,38.0,32.0
3,1001.0,2020-08-21,574.0,574.0,455.0,348.0,348.0,86.0,74.0,0.0,0.0,455.0,42.0,42.0,38.0,38.0,48.0,35.0
4,1001.0,2020-08-28,574.0,574.0,455.0,346.0,346.0,77.0,75.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,18.0,18.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020.0,2021-09-24,924.0,868.0,728.0,410.0,389.0,20.0,20.0,0.0,0.0,784.0,56.0,56.0,29.0,29.0,7.0,7.0
159867,78020.0,2021-10-01,924.0,868.0,728.0,362.0,340.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,43.0,43.0,7.0,7.0
159868,78020.0,2021-10-08,924.0,868.0,728.0,412.0,381.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,48.0,48.0,7.0,7.0
159869,78020.0,2021-10-15,924.0,868.0,728.0,402.0,365.0,7.0,7.0,0.0,0.0,784.0,56.0,56.0,40.0,40.0,7.0,7.0


In [24]:
# Convert fips to integer
columns = ['fips_code']
hospital_data_df[columns] = hospital_data_df[columns].applymap(np.int64)
hospital_data_df

Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum
0,1001,2020-07-31,574.0,574.0,455.0,363.0,363.0,117.0,103.0,-999999.0,0.0,455.0,42.0,42.0,40.0,40.0,28.0,26.0
1,1001,2020-08-07,574.0,574.0,455.0,369.0,369.0,153.0,139.0,-999999.0,-999999.0,455.0,42.0,42.0,41.0,41.0,38.0,37.0
2,1001,2020-08-14,574.0,574.0,455.0,384.0,384.0,125.0,111.0,0.0,-999999.0,455.0,42.0,42.0,42.0,42.0,38.0,32.0
3,1001,2020-08-21,574.0,574.0,455.0,348.0,348.0,86.0,74.0,0.0,0.0,455.0,42.0,42.0,38.0,38.0,48.0,35.0
4,1001,2020-08-28,574.0,574.0,455.0,346.0,346.0,77.0,75.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,18.0,18.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020,2021-09-24,924.0,868.0,728.0,410.0,389.0,20.0,20.0,0.0,0.0,784.0,56.0,56.0,29.0,29.0,7.0,7.0
159867,78020,2021-10-01,924.0,868.0,728.0,362.0,340.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,43.0,43.0,7.0,7.0
159868,78020,2021-10-08,924.0,868.0,728.0,412.0,381.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,48.0,48.0,7.0,7.0
159869,78020,2021-10-15,924.0,868.0,728.0,402.0,365.0,7.0,7.0,0.0,0.0,784.0,56.0,56.0,40.0,40.0,7.0,7.0


In [25]:
# Convert the hospital date and fips_code column to strings
hospital_data_df['collection_week'] = hospital_data_df['collection_week'].astype('str')
hospital_data_df['fips_code'] = hospital_data_df['fips_code'].astype('str')

In [26]:
# Concat the date and fips_code for new columns fips_date
hospital_data_df['fips_date'] = hospital_data_df['fips_code'] + hospital_data_df['collection_week']
hospital_data_df

Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum,fips_date
0,1001,2020-07-31,574.0,574.0,455.0,363.0,363.0,117.0,103.0,-999999.0,0.0,455.0,42.0,42.0,40.0,40.0,28.0,26.0,10012020-07-31
1,1001,2020-08-07,574.0,574.0,455.0,369.0,369.0,153.0,139.0,-999999.0,-999999.0,455.0,42.0,42.0,41.0,41.0,38.0,37.0,10012020-08-07
2,1001,2020-08-14,574.0,574.0,455.0,384.0,384.0,125.0,111.0,0.0,-999999.0,455.0,42.0,42.0,42.0,42.0,38.0,32.0,10012020-08-14
3,1001,2020-08-21,574.0,574.0,455.0,348.0,348.0,86.0,74.0,0.0,0.0,455.0,42.0,42.0,38.0,38.0,48.0,35.0,10012020-08-21
4,1001,2020-08-28,574.0,574.0,455.0,346.0,346.0,77.0,75.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,18.0,18.0,10012020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020,2021-09-24,924.0,868.0,728.0,410.0,389.0,20.0,20.0,0.0,0.0,784.0,56.0,56.0,29.0,29.0,7.0,7.0,780202021-09-24
159867,78020,2021-10-01,924.0,868.0,728.0,362.0,340.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,43.0,43.0,7.0,7.0,780202021-10-01
159868,78020,2021-10-08,924.0,868.0,728.0,412.0,381.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,48.0,48.0,7.0,7.0,780202021-10-08
159869,78020,2021-10-15,924.0,868.0,728.0,402.0,365.0,7.0,7.0,0.0,0.0,784.0,56.0,56.0,40.0,40.0,7.0,7.0,780202021-10-15


In [27]:
# Convert all negative values to 0
num = hospital_data_df._get_numeric_data()
num[num < 0] = 0

In [28]:
hospital_data_df

Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum,fips_date
0,1001,2020-07-31,574.0,574.0,455.0,363.0,363.0,117.0,103.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,28.0,26.0,10012020-07-31
1,1001,2020-08-07,574.0,574.0,455.0,369.0,369.0,153.0,139.0,0.0,0.0,455.0,42.0,42.0,41.0,41.0,38.0,37.0,10012020-08-07
2,1001,2020-08-14,574.0,574.0,455.0,384.0,384.0,125.0,111.0,0.0,0.0,455.0,42.0,42.0,42.0,42.0,38.0,32.0,10012020-08-14
3,1001,2020-08-21,574.0,574.0,455.0,348.0,348.0,86.0,74.0,0.0,0.0,455.0,42.0,42.0,38.0,38.0,48.0,35.0,10012020-08-21
4,1001,2020-08-28,574.0,574.0,455.0,346.0,346.0,77.0,75.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,18.0,18.0,10012020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020,2021-09-24,924.0,868.0,728.0,410.0,389.0,20.0,20.0,0.0,0.0,784.0,56.0,56.0,29.0,29.0,7.0,7.0,780202021-09-24
159867,78020,2021-10-01,924.0,868.0,728.0,362.0,340.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,43.0,43.0,7.0,7.0,780202021-10-01
159868,78020,2021-10-08,924.0,868.0,728.0,412.0,381.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,48.0,48.0,7.0,7.0,780202021-10-08
159869,78020,2021-10-15,924.0,868.0,728.0,402.0,365.0,7.0,7.0,0.0,0.0,784.0,56.0,56.0,40.0,40.0,7.0,7.0,780202021-10-15


In [29]:
f = hospital_data_df['fips_date'].unique()

In [30]:
x = nyt_data_grouped['fips_date'].unique()

In [31]:
# Create list of fips_date values that only appear in one data set
remove_list = list(set(x).symmetric_difference(set(f)))
remove_list

['180472020-08-14',
 '22802021-07-16',
 '380332020-09-25',
 '132892021-09-17',
 '132892021-05-28',
 '280732021-07-02',
 '132692021-02-19',
 '181192021-05-14',
 '311152021-09-17',
 '720832021-09-24',
 '221072020-12-25',
 '211532021-07-16',
 '370732021-09-17',
 '370292021-04-02',
 '10752020-11-06',
 '540312021-10-22',
 '721232021-07-02',
 '470232021-09-10',
 '133072021-03-05',
 '130492020-12-04',
 '51012021-01-15',
 '211052021-01-22',
 '290572021-10-01',
 '380052021-05-07',
 '483812021-08-27',
 '720732021-06-18',
 '721152020-11-06',
 '22802021-03-26',
 '210432020-09-18',
 '290412021-01-15',
 '371432021-05-07',
 '260832020-11-06',
 '21882020-12-11',
 '191732020-08-21',
 '471532020-09-11',
 '60032021-02-05',
 '460412020-09-18',
 '720572021-10-22',
 '210892020-08-14',
 '470612021-06-04',
 '371992021-05-07',
 '516852021-07-02',
 '511932021-04-16',
 '212352021-09-03',
 '720272020-10-23',
 '180132020-12-18',
 '720292021-01-15',
 '180152021-07-23',
 '510372020-12-11',
 '201392021-07-16',
 '1713

In [32]:
# Eliminate rows with fips_date values that do not appear in counties dataset
hospital_data_df = hospital_data_df[~hospital_data_df['fips_date'].isin(remove_list)]
hospital_data_df

Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum,fips_date
0,1001,2020-07-31,574.0,574.0,455.0,363.0,363.0,117.0,103.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,28.0,26.0,10012020-07-31
1,1001,2020-08-07,574.0,574.0,455.0,369.0,369.0,153.0,139.0,0.0,0.0,455.0,42.0,42.0,41.0,41.0,38.0,37.0,10012020-08-07
2,1001,2020-08-14,574.0,574.0,455.0,384.0,384.0,125.0,111.0,0.0,0.0,455.0,42.0,42.0,42.0,42.0,38.0,32.0,10012020-08-14
3,1001,2020-08-21,574.0,574.0,455.0,348.0,348.0,86.0,74.0,0.0,0.0,455.0,42.0,42.0,38.0,38.0,48.0,35.0,10012020-08-21
4,1001,2020-08-28,574.0,574.0,455.0,346.0,346.0,77.0,75.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,18.0,18.0,10012020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020,2021-09-24,924.0,868.0,728.0,410.0,389.0,20.0,20.0,0.0,0.0,784.0,56.0,56.0,29.0,29.0,7.0,7.0,780202021-09-24
159867,78020,2021-10-01,924.0,868.0,728.0,362.0,340.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,43.0,43.0,7.0,7.0,780202021-10-01
159868,78020,2021-10-08,924.0,868.0,728.0,412.0,381.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,48.0,48.0,7.0,7.0,780202021-10-08
159869,78020,2021-10-15,924.0,868.0,728.0,402.0,365.0,7.0,7.0,0.0,0.0,784.0,56.0,56.0,40.0,40.0,7.0,7.0,780202021-10-15


In [33]:
# Shorten column names to avoid truncating in postgres (confirmed=con, suspected=sus, pediatric=ped)
hospital_data_df.rename({'total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum':'total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum', 
                         'total_adult_patients_hospitalized_confirmed_covid_7_day_sum':'total_adult_patients_hospitalized_con_covid_7_day_sum', 
                         'total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum':'total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum', 
                         'total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum':'total_ped_patients_hospitalized_con_covid_7_day_sum', 
                         'staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum':'staffed_icu_adult_patients_con_and_sus_covid_7_day_sum', 
                         'staffed_icu_adult_patients_confirmed_covid_7_day_sum':'staffed_icu_adult_patients_con_covid_7_day_sum'}, axis=1, inplace=True)
hospital_data_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum,total_adult_patients_hospitalized_con_covid_7_day_sum,total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum,total_ped_patients_hospitalized_con_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_con_and_sus_covid_7_day_sum,staffed_icu_adult_patients_con_covid_7_day_sum,fips_date
0,1001,2020-07-31,574.0,574.0,455.0,363.0,363.0,117.0,103.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,28.0,26.0,10012020-07-31
1,1001,2020-08-07,574.0,574.0,455.0,369.0,369.0,153.0,139.0,0.0,0.0,455.0,42.0,42.0,41.0,41.0,38.0,37.0,10012020-08-07
2,1001,2020-08-14,574.0,574.0,455.0,384.0,384.0,125.0,111.0,0.0,0.0,455.0,42.0,42.0,42.0,42.0,38.0,32.0,10012020-08-14
3,1001,2020-08-21,574.0,574.0,455.0,348.0,348.0,86.0,74.0,0.0,0.0,455.0,42.0,42.0,38.0,38.0,48.0,35.0,10012020-08-21
4,1001,2020-08-28,574.0,574.0,455.0,346.0,346.0,77.0,75.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,18.0,18.0,10012020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020,2021-09-24,924.0,868.0,728.0,410.0,389.0,20.0,20.0,0.0,0.0,784.0,56.0,56.0,29.0,29.0,7.0,7.0,780202021-09-24
159867,78020,2021-10-01,924.0,868.0,728.0,362.0,340.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,43.0,43.0,7.0,7.0,780202021-10-01
159868,78020,2021-10-08,924.0,868.0,728.0,412.0,381.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,48.0,48.0,7.0,7.0,780202021-10-08
159869,78020,2021-10-15,924.0,868.0,728.0,402.0,365.0,7.0,7.0,0.0,0.0,784.0,56.0,56.0,40.0,40.0,7.0,7.0,780202021-10-15


In [34]:
# Eliminate rows with fips_date values that do not appear in hospitals dataset
nyt_data_grouped = nyt_data_grouped[~nyt_data_grouped['fips_date'].isin(remove_list)]
nyt_data_grouped

Unnamed: 0,fips,date,cases,deaths,fips_date
0,1001,2020-07-31,6817,147.0,10012020-07-31
1,1001,2020-08-07,7503,150.0,10012020-08-07
2,1001,2020-08-14,8531,158.0,10012020-08-14
3,1001,2020-08-21,9009,161.0,10012020-08-21
4,1001,2020-08-28,9484,161.0,10012020-08-28
...,...,...,...,...,...
208912,78020,2021-09-24,2258,21.0,780202021-09-24
208913,78020,2021-10-01,2293,21.0,780202021-10-01
208914,78020,2021-10-08,2317,21.0,780202021-10-08
208915,78020,2021-10-15,2317,21.0,780202021-10-15


In [35]:
nyt_data_grouped.dtypes

fips          object
date          object
cases          int64
deaths       float64
fips_date     object
dtype: object

In [36]:
# Convert float data types to integers in NYT data set
columns = ['deaths']
nyt_data_grouped[columns] = nyt_data_grouped[columns].applymap(np.int64)
nyt_data_grouped

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,fips,date,cases,deaths,fips_date
0,1001,2020-07-31,6817,147,10012020-07-31
1,1001,2020-08-07,7503,150,10012020-08-07
2,1001,2020-08-14,8531,158,10012020-08-14
3,1001,2020-08-21,9009,161,10012020-08-21
4,1001,2020-08-28,9484,161,10012020-08-28
...,...,...,...,...,...
208912,78020,2021-09-24,2258,21,780202021-09-24
208913,78020,2021-10-01,2293,21,780202021-10-01
208914,78020,2021-10-08,2317,21,780202021-10-08
208915,78020,2021-10-15,2317,21,780202021-10-15


In [37]:
# Convert NYT date column to datetime data type
nyt_data_grouped['date'] = pd.to_datetime(nyt_data_grouped['date'])
nyt_data_grouped

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,fips,date,cases,deaths,fips_date
0,1001,2020-07-31,6817,147,10012020-07-31
1,1001,2020-08-07,7503,150,10012020-08-07
2,1001,2020-08-14,8531,158,10012020-08-14
3,1001,2020-08-21,9009,161,10012020-08-21
4,1001,2020-08-28,9484,161,10012020-08-28
...,...,...,...,...,...
208912,78020,2021-09-24,2258,21,780202021-09-24
208913,78020,2021-10-01,2293,21,780202021-10-01
208914,78020,2021-10-08,2317,21,780202021-10-08
208915,78020,2021-10-15,2317,21,780202021-10-15


In [38]:
nyt_data_grouped.dtypes

fips                 object
date         datetime64[ns]
cases                 int64
deaths                int64
fips_date            object
dtype: object

In [39]:
# Rename NYT columns 
nyt_data_grouped.rename({'date':'collection_week', 'cases':'cases_to_date', 'deaths':'deaths_to_date'}, axis=1, inplace=True)
nyt_data_grouped

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,fips,collection_week,cases_to_date,deaths_to_date,fips_date
0,1001,2020-07-31,6817,147,10012020-07-31
1,1001,2020-08-07,7503,150,10012020-08-07
2,1001,2020-08-14,8531,158,10012020-08-14
3,1001,2020-08-21,9009,161,10012020-08-21
4,1001,2020-08-28,9484,161,10012020-08-28
...,...,...,...,...,...
208912,78020,2021-09-24,2258,21,780202021-09-24
208913,78020,2021-10-01,2293,21,780202021-10-01
208914,78020,2021-10-08,2317,21,780202021-10-08
208915,78020,2021-10-15,2317,21,780202021-10-15


In [40]:
hospital_data_df.dtypes

fips_code                                                         object
collection_week                                                   object
total_beds_7_day_sum                                             float64
all_adult_hospital_beds_7_day_sum                                float64
all_adult_hospital_inpatient_beds_7_day_sum                      float64
inpatient_beds_used_7_day_sum                                    float64
all_adult_hospital_inpatient_bed_occupied_7_day_sum              float64
total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum    float64
total_adult_patients_hospitalized_con_covid_7_day_sum            float64
total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum      float64
total_ped_patients_hospitalized_con_covid_7_day_sum              float64
inpatient_beds_7_day_sum                                         float64
total_icu_beds_7_day_sum                                         float64
total_staffed_adult_icu_beds_7_day_sum             

In [41]:
# Convert float data types to integers in hospitals data set
columns = ['total_beds_7_day_sum', 'all_adult_hospital_beds_7_day_sum', 
           'all_adult_hospital_inpatient_beds_7_day_sum', 'inpatient_beds_used_7_day_sum', 
           'all_adult_hospital_inpatient_bed_occupied_7_day_sum', 
           'total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum', 
           'total_adult_patients_hospitalized_con_covid_7_day_sum', 
           'total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum', 
           'total_ped_patients_hospitalized_con_covid_7_day_sum', 'inpatient_beds_7_day_sum', 'total_icu_beds_7_day_sum', 
           'total_staffed_adult_icu_beds_7_day_sum', 'icu_beds_used_7_day_sum', 'staffed_adult_icu_bed_occupancy_7_day_sum',
           'staffed_icu_adult_patients_con_and_sus_covid_7_day_sum', 'staffed_icu_adult_patients_con_covid_7_day_sum']
hospital_data_df[columns] = hospital_data_df[columns].applymap(np.int64)
hospital_data_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum,total_adult_patients_hospitalized_con_covid_7_day_sum,total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum,total_ped_patients_hospitalized_con_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_con_and_sus_covid_7_day_sum,staffed_icu_adult_patients_con_covid_7_day_sum,fips_date
0,1001,2020-07-31,574,574,455,363,363,117,103,0,0,455,42,42,40,40,28,26,10012020-07-31
1,1001,2020-08-07,574,574,455,369,369,153,139,0,0,455,42,42,41,41,38,37,10012020-08-07
2,1001,2020-08-14,574,574,455,384,384,125,111,0,0,455,42,42,42,42,38,32,10012020-08-14
3,1001,2020-08-21,574,574,455,348,348,86,74,0,0,455,42,42,38,38,48,35,10012020-08-21
4,1001,2020-08-28,574,574,455,346,346,77,75,0,0,455,42,42,40,40,18,18,10012020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020,2021-09-24,924,868,728,410,389,20,20,0,0,784,56,56,29,29,7,7,780202021-09-24
159867,78020,2021-10-01,924,868,728,362,340,12,12,0,0,784,56,56,43,43,7,7,780202021-10-01
159868,78020,2021-10-08,924,868,728,412,381,12,12,0,0,784,56,56,48,48,7,7,780202021-10-08
159869,78020,2021-10-15,924,868,728,402,365,7,7,0,0,784,56,56,40,40,7,7,780202021-10-15


In [42]:
# Convert NYT date column to datetime data type
hospital_data_df['collection_week'] = pd.to_datetime(hospital_data_df['collection_week'])
hospital_data_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum,total_adult_patients_hospitalized_con_covid_7_day_sum,total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum,total_ped_patients_hospitalized_con_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_con_and_sus_covid_7_day_sum,staffed_icu_adult_patients_con_covid_7_day_sum,fips_date
0,1001,2020-07-31,574,574,455,363,363,117,103,0,0,455,42,42,40,40,28,26,10012020-07-31
1,1001,2020-08-07,574,574,455,369,369,153,139,0,0,455,42,42,41,41,38,37,10012020-08-07
2,1001,2020-08-14,574,574,455,384,384,125,111,0,0,455,42,42,42,42,38,32,10012020-08-14
3,1001,2020-08-21,574,574,455,348,348,86,74,0,0,455,42,42,38,38,48,35,10012020-08-21
4,1001,2020-08-28,574,574,455,346,346,77,75,0,0,455,42,42,40,40,18,18,10012020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020,2021-09-24,924,868,728,410,389,20,20,0,0,784,56,56,29,29,7,7,780202021-09-24
159867,78020,2021-10-01,924,868,728,362,340,12,12,0,0,784,56,56,43,43,7,7,780202021-10-01
159868,78020,2021-10-08,924,868,728,412,381,12,12,0,0,784,56,56,48,48,7,7,780202021-10-08
159869,78020,2021-10-15,924,868,728,402,365,7,7,0,0,784,56,56,40,40,7,7,780202021-10-15


In [43]:
hospital_data_df.dtypes

fips_code                                                                object
collection_week                                                  datetime64[ns]
total_beds_7_day_sum                                                      int64
all_adult_hospital_beds_7_day_sum                                         int64
all_adult_hospital_inpatient_beds_7_day_sum                               int64
inpatient_beds_used_7_day_sum                                             int64
all_adult_hospital_inpatient_bed_occupied_7_day_sum                       int64
total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum             int64
total_adult_patients_hospitalized_con_covid_7_day_sum                     int64
total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum               int64
total_ped_patients_hospitalized_con_covid_7_day_sum                       int64
inpatient_beds_7_day_sum                                                  int64
total_icu_beds_7_day_sum                

In [44]:
# Eliminate rows with fips_date values that do not appear in counties dataset
f = vax_data_df['fips_date'].unique()

In [45]:
x = nyt_data_grouped['fips_date'].unique()

In [46]:
remove_list_dates = list(set(x).symmetric_difference(set(f)))
remove_list_dates

['490252021-01-21',
 '311172021-03-24',
 '300452021-03-10',
 '500032021-05-02',
 '450632021-09-12',
 '211152021-07-31',
 '211652021-07-12',
 '460172021-06-19',
 '132692021-02-19',
 '280332021-08-14',
 '401432021-02-28',
 '170552020-08-07',
 '484272021-01-13',
 '480292021-03-18',
 '510012021-06-16',
 '450572021-05-02',
 '390212021-05-20',
 '410672020-08-07',
 '191872021-09-15',
 '261212021-10-11',
 '460412021-08-15',
 '370292021-04-02',
 '480832020-12-24',
 '10472021-04-22',
 '560152021-01-04',
 '400832020-12-26',
 '210192021-02-06',
 '310332021-03-21',
 '450012021-04-28',
 '360752021-09-11',
 '291792021-03-22',
 '720972021-10-13',
 '10172021-06-29',
 '220192021-05-19',
 '380252021-04-11',
 '311392021-02-17',
 '484872021-02-18',
 '200952021-07-19',
 '131272021-03-10',
 '410672021-02-20',
 '132572021-06-29',
 '530512020-10-23',
 '390032020-12-14',
 '260552021-05-23',
 '500052021-04-04',
 '400452021-01-28',
 '80492021-09-27',
 '190412021-10-05',
 '560212021-05-17',
 '482672021-10-06',
 '1

In [47]:
vax_data_df = vax_data_df[~vax_data_df['fips_date'].isin(remove_list_dates)]
vax_data_df

Unnamed: 0,Date,FIPS,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65PlusPop_Pct,fips_date
49245,2021-10-22,16011,38.0,17795,17728,54.4,5292,78.0,97.6,42.6,60.8,83.3,160112021-10-22
49246,2021-10-22,48451,46.2,55067,52271,57.5,15433,82.9,99.1,52.1,64.7,89.3,484512021-10-22
49247,2021-10-22,48077,39.7,4162,4059,48.3,1756,70.7,99.1,43.4,52.6,74.0,480772021-10-22
49248,2021-10-22,51173,43.5,13103,12477,51.2,4617,67.9,83.0,50.1,58.9,76.2,511732021-10-22
49251,2021-10-22,47179,52.8,68276,65043,62.0,21163,87.2,97.9,59.1,69.1,94.3,471792021-10-22
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1063561,2020-12-18,42069,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,420692020-12-18
1063564,2020-12-18,48099,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,480992020-12-18
1063565,2020-12-18,13005,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,130052020-12-18
1063566,2020-12-18,18035,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,180352020-12-18


In [48]:
vax_data_df_final = vax_data_df.drop(['Date', 'FIPS'], axis=1)
vax_data_df_final

Unnamed: 0,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65PlusPop_Pct,fips_date
49245,38.0,17795,17728,54.4,5292,78.0,97.6,42.6,60.8,83.3,160112021-10-22
49246,46.2,55067,52271,57.5,15433,82.9,99.1,52.1,64.7,89.3,484512021-10-22
49247,39.7,4162,4059,48.3,1756,70.7,99.1,43.4,52.6,74.0,480772021-10-22
49248,43.5,13103,12477,51.2,4617,67.9,83.0,50.1,58.9,76.2,511732021-10-22
49251,52.8,68276,65043,62.0,21163,87.2,97.9,59.1,69.1,94.3,471792021-10-22
...,...,...,...,...,...,...,...,...,...,...,...
1063561,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,420692020-12-18
1063564,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,480992020-12-18
1063565,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,130052020-12-18
1063566,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,180352020-12-18


In [49]:
vax_data_df_final.fillna(0)

Unnamed: 0,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65PlusPop_Pct,fips_date
49245,38.0,17795,17728,54.4,5292,78.0,97.6,42.6,60.8,83.3,160112021-10-22
49246,46.2,55067,52271,57.5,15433,82.9,99.1,52.1,64.7,89.3,484512021-10-22
49247,39.7,4162,4059,48.3,1756,70.7,99.1,43.4,52.6,74.0,480772021-10-22
49248,43.5,13103,12477,51.2,4617,67.9,83.0,50.1,58.9,76.2,511732021-10-22
49251,52.8,68276,65043,62.0,21163,87.2,97.9,59.1,69.1,94.3,471792021-10-22
...,...,...,...,...,...,...,...,...,...,...,...
1063561,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,420692020-12-18
1063564,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,480992020-12-18
1063565,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,130052020-12-18
1063566,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,180352020-12-18


In [50]:
nyt_data_grouped_final = nyt_data_grouped[~nyt_data_grouped['fips_date'].isin(remove_list_dates)]
nyt_data_grouped_final

Unnamed: 0,fips,collection_week,cases_to_date,deaths_to_date,fips_date
20,1001,2020-12-18,24040,296,10012020-12-18
21,1001,2020-12-25,26881,314,10012020-12-25
22,1001,2021-01-01,28791,333,10012021-01-01
23,1001,2021-01-08,31575,350,10012021-01-08
24,1001,2021-01-15,34774,382,10012021-01-15
...,...,...,...,...,...
208912,78020,2021-09-24,2258,21,780202021-09-24
208913,78020,2021-10-01,2293,21,780202021-10-01
208914,78020,2021-10-08,2317,21,780202021-10-08
208915,78020,2021-10-15,2317,21,780202021-10-15


In [51]:
nyt_data_grouped_final = nyt_data_grouped_final.drop(['collection_week', 'fips'], axis=1)
nyt_data_grouped_final

Unnamed: 0,cases_to_date,deaths_to_date,fips_date
20,24040,296,10012020-12-18
21,26881,314,10012020-12-25
22,28791,333,10012021-01-01
23,31575,350,10012021-01-08
24,34774,382,10012021-01-15
...,...,...,...
208912,2258,21,780202021-09-24
208913,2293,21,780202021-10-01
208914,2317,21,780202021-10-08
208915,2317,21,780202021-10-15


In [52]:
hospital_data_df = hospital_data_df[~hospital_data_df['fips_date'].isin(remove_list_dates)]
hospital_data_df

Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum,total_adult_patients_hospitalized_con_covid_7_day_sum,total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum,total_ped_patients_hospitalized_con_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_con_and_sus_covid_7_day_sum,staffed_icu_adult_patients_con_covid_7_day_sum,fips_date
20,1001,2020-12-18,581,581,462,425,425,220,209,0,0,462,42,42,39,39,27,27,10012020-12-18
21,1001,2020-12-25,581,581,462,445,445,286,281,0,0,462,42,42,42,42,27,27,10012020-12-25
22,1001,2021-01-01,581,581,462,454,454,282,272,0,0,462,42,42,42,42,29,29,10012021-01-01
23,1001,2021-01-08,602,602,462,431,431,291,283,0,0,462,42,42,42,42,38,35,10012021-01-08
24,1001,2021-01-15,602,602,462,423,423,250,247,0,0,462,42,42,42,42,39,39,10012021-01-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020,2021-09-24,924,868,728,410,389,20,20,0,0,784,56,56,29,29,7,7,780202021-09-24
159867,78020,2021-10-01,924,868,728,362,340,12,12,0,0,784,56,56,43,43,7,7,780202021-10-01
159868,78020,2021-10-08,924,868,728,412,381,12,12,0,0,784,56,56,48,48,7,7,780202021-10-08
159869,78020,2021-10-15,924,868,728,402,365,7,7,0,0,784,56,56,40,40,7,7,780202021-10-15


Connection String

In [None]:
db_string = f"postgresql://postgres:YOURPASSWORDHERE@127.0.0.1:5432/COVID_Risk_Analysis"

In [None]:
engine = create_engine(db_string)

In [None]:
nyt_data_grouped_final.to_sql(name='counties', con=engine)

In [None]:
hospital_data_df.to_sql(name='hospitals', con=engine)

In [None]:
vax_data_df_final.to_sql(name='vaccinations', con=engine)

Additional Tables for Generating Static Charts

In [57]:
counties_charts_df = nyt_data_grouped[['collection_week', 'fips', 'cases_to_date', 'deaths_to_date']]
counties_charts_df

Unnamed: 0,collection_week,fips,cases_to_date,deaths_to_date
0,2020-07-31,1001,6817,147
1,2020-08-07,1001,7503,150
2,2020-08-14,1001,8531,158
3,2020-08-21,1001,9009,161
4,2020-08-28,1001,9484,161
...,...,...,...,...
208912,2021-09-24,78020,2258,21
208913,2021-10-01,78020,2293,21
208914,2021-10-08,78020,2317,21
208915,2021-10-15,78020,2317,21


In [56]:
vax_charts_df = vax_data_df[['Date', 'FIPS', 'Series_Complete_Pop_Pct']]
vax_charts_df

Unnamed: 0,Date,FIPS,Series_Complete_Pop_Pct
49245,2021-10-22,16011,38.0
49246,2021-10-22,48451,46.2
49247,2021-10-22,48077,39.7
49248,2021-10-22,51173,43.5
49251,2021-10-22,47179,52.8
...,...,...,...
1063561,2020-12-18,42069,0.0
1063564,2020-12-18,48099,0.0
1063565,2020-12-18,13005,0.0
1063566,2020-12-18,18035,0.0


In [55]:
hospital_charts_df = hospital_data_df[['fips_code', 'collection_week', 'inpatient_beds_used_7_day_sum', 'inpatient_beds_7_day_sum']]
hospital_charts_df

Unnamed: 0,fips_code,collection_week,inpatient_beds_used_7_day_sum,inpatient_beds_7_day_sum
20,1001,2020-12-18,425,462
21,1001,2020-12-25,445,462
22,1001,2021-01-01,454,462
23,1001,2021-01-08,431,462
24,1001,2021-01-15,423,462
...,...,...,...,...
159866,78020,2021-09-24,410,784
159867,78020,2021-10-01,362,784
159868,78020,2021-10-08,412,784
159869,78020,2021-10-15,402,784


In [58]:
hospital_charts_df["Percentage_inpatient_beds_used"] = hospital_charts_df["inpatient_beds_used_7_day_sum"]/hospital_charts_df["inpatient_beds_7_day_sum"]
hospital_charts_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,fips_code,collection_week,inpatient_beds_used_7_day_sum,inpatient_beds_7_day_sum,Percentage_inpatient_beds_used
20,1001,2020-12-18,425,462,0.919913
21,1001,2020-12-25,445,462,0.963203
22,1001,2021-01-01,454,462,0.982684
23,1001,2021-01-08,431,462,0.932900
24,1001,2021-01-15,423,462,0.915584
...,...,...,...,...,...
159866,78020,2021-09-24,410,784,0.522959
159867,78020,2021-10-01,362,784,0.461735
159868,78020,2021-10-08,412,784,0.525510
159869,78020,2021-10-15,402,784,0.512755


In [59]:
hospital_charts_df = hospital_charts_df.drop(columns=['inpatient_beds_used_7_day_sum', 'inpatient_beds_7_day_sum'])
hospital_charts_df

Unnamed: 0,fips_code,collection_week,Percentage_inpatient_beds_used
20,1001,2020-12-18,0.919913
21,1001,2020-12-25,0.963203
22,1001,2021-01-01,0.982684
23,1001,2021-01-08,0.932900
24,1001,2021-01-15,0.915584
...,...,...,...
159866,78020,2021-09-24,0.522959
159867,78020,2021-10-01,0.461735
159868,78020,2021-10-08,0.525510
159869,78020,2021-10-15,0.512755


In [None]:
counties_charts_df.to_sql(name='counties_charts', con=engine)

In [None]:
vax_charts_df.to_sql(name='vaccinations_chart', con=engine)

In [None]:
hospital_charts_df.to_sql(name='hospital_chart', con=engine)