In [149]:
# Import the requests library.
import requests
import pandas as pd
from bs4 import BeautifulSoup
import requests
import time
from sqlalchemy import create_engine
from config import db_password
import numpy as np

In [150]:
# Read NYT data into dataframe
nyt_data = pd.DataFrame(pd.read_csv('us-counties.csv'))
nyt_data

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0
...,...,...,...,...,...,...
1891340,2021-11-06,Sweetwater,Wyoming,56037.0,7695,91.0
1891341,2021-11-06,Teton,Wyoming,56039.0,5243,14.0
1891342,2021-11-06,Uinta,Wyoming,56041.0,3901,27.0
1891343,2021-11-06,Washakie,Wyoming,56043.0,1780,35.0


In [151]:
# Convert the date column in the NYT data to a datetime object using pd.to_datetime
nyt_data['date'] = pd.to_datetime(nyt_data['date'])
nyt_data.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0


In [152]:
# 1. Remove the data for dates before 7-31-20 and after 10-22-21
# 2. Bin the date column by weeks ending every friday
# 3. Sum the cases and deaths columns for each week binned

# Step 1:
nyt_data_filtered = nyt_data[nyt_data['date'] >= '2020-07-25']
nyt_data_filtered = nyt_data_filtered[nyt_data_filtered['date'] <= '2021-10-22']
nyt_data_filtered

Unnamed: 0,date,county,state,fips,cases,deaths
366652,2020-07-25,Autauga,Alabama,1001.0,932,21.0
366653,2020-07-25,Baldwin,Alabama,1003.0,2662,18.0
366654,2020-07-25,Barbour,Alabama,1005.0,552,4.0
366655,2020-07-25,Bibb,Alabama,1007.0,318,2.0
366656,2020-07-25,Blount,Alabama,1009.0,637,1.0
...,...,...,...,...,...,...
1842600,2021-10-22,Sweetwater,Wyoming,56037.0,7414,79.0
1842601,2021-10-22,Teton,Wyoming,56039.0,5119,14.0
1842602,2021-10-22,Uinta,Wyoming,56041.0,3781,26.0
1842603,2021-10-22,Washakie,Wyoming,56043.0,1676,33.0


In [153]:
# Step 2:
# Step 3:

nyt_data_grouped = nyt_data_filtered.groupby(['fips', pd.Grouper(key='date', freq='W-FRI')]).agg({'cases':'sum','deaths':'sum'}).reset_index()
nyt_data_grouped

Unnamed: 0,fips,date,cases,deaths
0,1001.0,2020-07-31,6817,147.0
1,1001.0,2020-08-07,7503,150.0
2,1001.0,2020-08-14,8531,158.0
3,1001.0,2020-08-21,9009,161.0
4,1001.0,2020-08-28,9484,161.0
...,...,...,...,...
208977,78030.0,2021-09-24,24720,297.0
208978,78030.0,2021-10-01,24982,304.0
208979,78030.0,2021-10-08,25204,308.0
208980,78030.0,2021-10-15,25290,311.0


In [154]:
# Set each column to a string value so we can concatonate them together 
nyt_data_grouped['fips'] = nyt_data_grouped['fips'].astype('str')
nyt_data_grouped['date'] = nyt_data_grouped['date'].astype('str')
nyt_data_grouped['fips'].dtype

dtype('O')

In [155]:
# Concatonate the date column and the fips column to create fips_date
nyt_data_grouped['fips_date'] = nyt_data_grouped['fips'] + nyt_data_grouped['date']
nyt_data_grouped

Unnamed: 0,fips,date,cases,deaths,fips_date
0,1001.0,2020-07-31,6817,147.0,1001.02020-07-31
1,1001.0,2020-08-07,7503,150.0,1001.02020-08-07
2,1001.0,2020-08-14,8531,158.0,1001.02020-08-14
3,1001.0,2020-08-21,9009,161.0,1001.02020-08-21
4,1001.0,2020-08-28,9484,161.0,1001.02020-08-28
...,...,...,...,...,...
208977,78030.0,2021-09-24,24720,297.0,78030.02021-09-24
208978,78030.0,2021-10-01,24982,304.0,78030.02021-10-01
208979,78030.0,2021-10-08,25204,308.0,78030.02021-10-08
208980,78030.0,2021-10-15,25290,311.0,78030.02021-10-15


In [156]:
# Read Hospital data (healthdata.gov) into dataframe
hospital_data_df = pd.DataFrame(pd.read_csv('COVID-19_Reported_Patient_Impact_and_Hospital_Capacity_by_Facility.csv'))

In [157]:
hospital_data_df

Unnamed: 0,hospital_pk,collection_week,state,ccn,hospital_name,address,city,zip,hospital_subtype,fips_code,...,previous_day_admission_pediatric_covid_confirmed_7_day_coverage,previous_day_admission_adult_covid_suspected_7_day_coverage,previous_day_admission_pediatric_covid_suspected_7_day_coverage,previous_week_personnel_covid_vaccinated_doses_administered_7_day,total_personnel_covid_vaccinated_doses_none_7_day,total_personnel_covid_vaccinated_doses_one_7_day,total_personnel_covid_vaccinated_doses_all_7_day,previous_week_patients_covid_vaccinated_doses_one_7_day,previous_week_patients_covid_vaccinated_doses_all_7_day,is_corrected
0,140158,2021/10/15,IL,140158,INSIGHT HOSPITAL AND MEDICAL CENTER CHICAGO,2525 S MICHIGAN AVE,CHICAGO,60616.0,Short Term,17031.0,...,7,7,7,0.0,,,,0.0,,False
1,450162,2021/10/15,TX,450162,GRACE SURGICAL HOSPITAL,2412 50TH ST,LUBBOCK,79412.0,Short Term,48303.0,...,7,7,7,,,,,,,False
2,451318,2021/10/15,TX,451318,STONEWALL MEMORIAL HOSPITAL DISTRICT,821 NORTH BROADWAY,ASPERMONT,79502.0,Critical Access Hospitals,48433.0,...,7,7,7,,,,,,,False
3,050769,2021/10/08,CA,050769,HOAG ORTHOPEDIC INSTITUTE,16250 SAND CANYON AVENUE,IRVINE,92618.0,Short Term,6059.0,...,7,7,7,,,,,,,False
4,291500,2021/10/08,NV,291500,NATHAN ADELSON HOSPICE,4141 UNIVERSITY CENTER DR,LAS VEGAS,89119.0,Short Term,32003.0,...,7,7,7,,,,,,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324743,100291,2020/08/07,FL,100291,STEWARD REGIONAL MEDICAL CENTER,250 NORTH WICKHAM ROAD,MELBOURNE,32935.0,Short Term,12009.0,...,7,7,7,,,,,,,False
324744,250096,2020/07/31,MS,250096,MERIT HEALTH RANKIN,350 CROSSGATES BLVD,BRANDON,39042.0,Short Term,28121.0,...,0,0,0,,,,,,,False
324745,241345,2020/07/31,MN,241345,MAYO CLINIC HEALTH SYSTEM-WASECA,501 NORTH STATE STREET,WASECA,56093.0,Critical Access Hospitals,27161.0,...,7,7,7,,,,,,,False
324746,271311,2020/08/07,MT,271311,BIG SANDY MEDICAL CENTER,166 MONTANA AVE E,BIG SANDY,59520.0,Critical Access Hospitals,30015.0,...,1,1,1,,,,,,,False


In [158]:
# Create list of columns to drop
drop_list_hospitals = ['hospital_pk', 'state', 'ccn', 'hospital_name', 'address', 'city', 'zip', 'hospital_subtype', 
                       'is_metro_micro', 'total_beds_7_day_avg', 'all_adult_hospital_beds_7_day_avg', 
                       'all_adult_hospital_inpatient_beds_7_day_avg', 'inpatient_beds_used_7_day_avg', 
                       'all_adult_hospital_inpatient_bed_occupied_7_day_avg', 
                       'total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg', 
                       'total_adult_patients_hospitalized_confirmed_covid_7_day_avg', 
                       'total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg', 
                       'total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg', 'inpatient_beds_7_day_avg', 
                       'total_icu_beds_7_day_avg', 'total_staffed_adult_icu_beds_7_day_avg', 'icu_beds_used_7_day_avg', 
                       'staffed_adult_icu_bed_occupancy_7_day_avg', 
                       'staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_avg', 
                       'staffed_icu_adult_patients_confirmed_covid_7_day_avg', 
                       'total_patients_hospitalized_confirmed_influenza_7_day_avg', 
                       'icu_patients_confirmed_influenza_7_day_avg', 
                       'total_patients_hospitalized_confirmed_influenza_and_covid_7_day_avg', 
                       'all_adult_hospital_inpatient_beds_7_day_coverage', 
                       'inpatient_beds_used_7_day_coverage', 'all_adult_hospital_inpatient_bed_occupied_7_day_coverage', 
                       'total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage', 
                       'total_adult_patients_hospitalized_confirmed_covid_7_day_coverage', 
                       'total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage', 
                       'total_pediatric_patients_hospitalized_confirmed_covid_7_day_coverage', 
                       'inpatient_beds_7_day_coverage', 'total_icu_beds_7_day_coverage', 
                       'total_staffed_adult_icu_beds_7_day_coverage', 'icu_beds_used_7_day_coverage', 
                       'staffed_adult_icu_bed_occupancy_7_day_coverage', 
                       'staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage', 
                       'staffed_icu_adult_patients_confirmed_covid_7_day_coverage', 
                       'total_patients_hospitalized_confirmed_influenza_7_day_coverage', 
                       'icu_patients_confirmed_influenza_7_day_coverage', 
                       'total_patients_hospitalized_confirmed_influenza_and_covid_7_day_coverage', 
                       'previous_day_admission_adult_covid_confirmed_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_18-19_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_20-29_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_30-39_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_40-49_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_50-59_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_60-69_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_70-79_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_80+_7_day_sum', 
                       'previous_day_admission_adult_covid_confirmed_unknown_7_day_sum', 
                       'previous_day_admission_pediatric_covid_confirmed_7_day_sum', 
                       'previous_day_covid_ED_visits_7_day_sum', 'previous_day_admission_adult_covid_suspected_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_18-19_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_20-29_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_30-39_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_40-49_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_50-59_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_60-69_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_70-79_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_80+_7_day_sum', 
                       'previous_day_admission_adult_covid_suspected_unknown_7_day_sum', 
                       'previous_day_admission_pediatric_covid_suspected_7_day_sum', 
                       'previous_day_total_ED_visits_7_day_sum', 'previous_day_admission_influenza_confirmed_7_day_sum', 
                       'geocoded_hospital_address', 'hhs_ids', 
                       'previous_day_admission_adult_covid_confirmed_7_day_coverage', 
                       'previous_day_admission_pediatric_covid_confirmed_7_day_coverage', 
                       'previous_day_admission_adult_covid_suspected_7_day_coverage', 
                       'previous_day_admission_pediatric_covid_suspected_7_day_coverage', 
                       'previous_week_personnel_covid_vaccinated_doses_administered_7_day', 
                       'total_personnel_covid_vaccinated_doses_none_7_day', 
                       'total_personnel_covid_vaccinated_doses_one_7_day', 
                       'total_personnel_covid_vaccinated_doses_all_7_day', 
                       'previous_week_patients_covid_vaccinated_doses_one_7_day', 
                       'previous_week_patients_covid_vaccinated_doses_all_7_day', 'is_corrected', 
                       'total_beds_7_day_coverage', 'all_adult_hospital_beds_7_day_coverage',
                       'total_patients_hospitalized_confirmed_influenza_7_day_sum', 
                       'icu_patients_confirmed_influenza_7_day_sum', 
                       'total_patients_hospitalized_confirmed_influenza_and_covid_7_day_sum'
]

In [159]:
# Remove drop_list columns from df
hospital_data_df.drop(drop_list_hospitals, axis=1, inplace=True)
hospital_data_df.head()

Unnamed: 0,collection_week,fips_code,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum
0,2021/10/15,17031.0,350.0,350.0,252.0,160.0,160.0,7.0,7.0,0.0,0.0,252.0,28.0,28.0,16.0,16.0,7.0,7.0
1,2021/10/15,48303.0,268.0,268.0,261.0,34.0,34.0,0.0,0.0,0.0,0.0,261.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2021/10/15,48433.0,77.0,77.0,63.0,0.0,0.0,0.0,0.0,0.0,0.0,63.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2021/10/08,6059.0,245.0,245.0,133.0,126.0,126.0,0.0,0.0,0.0,0.0,133.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2021/10/08,32003.0,266.0,266.0,266.0,191.0,191.0,11.0,11.0,0.0,0.0,266.0,0.0,0.0,0.0,0.0,0.0,0.0


In [160]:
# Convert the date column in the hospital data to a datetime object using pd.to_datetime
hospital_data_df['collection_week'] = pd.to_datetime(hospital_data_df['collection_week'])
# Check to verify the date is now a datetime object
hospital_data_df.dtypes

collection_week                                                                  datetime64[ns]
fips_code                                                                               float64
total_beds_7_day_sum                                                                    float64
all_adult_hospital_beds_7_day_sum                                                       float64
all_adult_hospital_inpatient_beds_7_day_sum                                             float64
inpatient_beds_used_7_day_sum                                                           float64
all_adult_hospital_inpatient_bed_occupied_7_day_sum                                     float64
total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum               float64
total_adult_patients_hospitalized_confirmed_covid_7_day_sum                             float64
total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum           float64
total_pediatric_patients_hospitalized_co

In [161]:
# Sort the hospital data by date
hospital_data_df = hospital_data_df.sort_values(by='collection_week')
hospital_data_df

# The hospital data is binned by collection week ending on every friday

Unnamed: 0,collection_week,fips_code,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum
324747,2020-07-31,13013.0,266.0,,,199.0,,0.0,,-999999.0,,266.0,0.0,,0.0,,,
319353,2020-07-31,12099.0,3332.0,2710.0,2710.0,1873.0,1859.0,266.0,266.0,0.0,0.0,3206.0,287.0,287.0,178.0,178.0,41.0,41.0
319356,2020-07-31,19049.0,102.0,126.0,84.0,-999999.0,9.0,4.0,-999999.0,0.0,0.0,84.0,0.0,0.0,0.0,0.0,0.0,0.0
319357,2020-07-31,6037.0,824.0,404.0,341.0,296.0,296.0,5.0,0.0,0.0,0.0,341.0,20.0,20.0,18.0,18.0,0.0,0.0
319358,2020-07-31,39095.0,1476.0,,,716.0,,52.0,52.0,0.0,0.0,1446.0,132.0,,94.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4644,2021-10-22,21231.0,175.0,79.0,100.0,79.0,79.0,-999999.0,-999999.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0
4645,2021-10-22,21111.0,12047.0,7935.0,6736.0,7319.0,6245.0,1071.0,422.0,257.0,42.0,7897.0,1091.0,804.0,695.0,695.0,185.0,107.0
9753,2021-10-22,48303.0,965.0,358.0,274.0,714.0,241.0,7.0,7.0,14.0,14.0,881.0,164.0,26.0,79.0,7.0,-999999.0,-999999.0
9755,2021-10-22,5009.0,623.0,560.0,364.0,240.0,227.0,6.0,6.0,0.0,0.0,448.0,84.0,84.0,26.0,26.0,5.0,5.0


In [162]:
# Group and sort hospital_data_df by fips_code
hospital_data_df = hospital_data_df.groupby(['fips_code', pd.Grouper(key='collection_week', freq='W-FRI')]).agg({'total_beds_7_day_sum':'sum','all_adult_hospital_beds_7_day_sum':'sum', 'all_adult_hospital_inpatient_beds_7_day_sum':'sum', 'inpatient_beds_used_7_day_sum':'sum', 'all_adult_hospital_inpatient_bed_occupied_7_day_sum':'sum', 
                                                                                                                       'total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum':'sum', 'total_adult_patients_hospitalized_confirmed_covid_7_day_sum': 'sum', 'total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum':'sum', 'total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum':'sum', 'inpatient_beds_7_day_sum':'sum', 'total_icu_beds_7_day_sum':'sum', 
                                                                                                                       'total_staffed_adult_icu_beds_7_day_sum':'sum', 'icu_beds_used_7_day_sum':'sum', 'staffed_adult_icu_bed_occupancy_7_day_sum':'sum', 'staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum':'sum', 'staffed_icu_adult_patients_confirmed_covid_7_day_sum':'sum'}).reset_index()
hospital_data_df

Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum
0,1001.0,2020-07-31,574.0,574.0,455.0,363.0,363.0,117.0,103.0,-999999.0,0.0,455.0,42.0,42.0,40.0,40.0,28.0,26.0
1,1001.0,2020-08-07,574.0,574.0,455.0,369.0,369.0,153.0,139.0,-999999.0,-999999.0,455.0,42.0,42.0,41.0,41.0,38.0,37.0
2,1001.0,2020-08-14,574.0,574.0,455.0,384.0,384.0,125.0,111.0,0.0,-999999.0,455.0,42.0,42.0,42.0,42.0,38.0,32.0
3,1001.0,2020-08-21,574.0,574.0,455.0,348.0,348.0,86.0,74.0,0.0,0.0,455.0,42.0,42.0,38.0,38.0,48.0,35.0
4,1001.0,2020-08-28,574.0,574.0,455.0,346.0,346.0,77.0,75.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,18.0,18.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020.0,2021-09-24,924.0,868.0,728.0,410.0,389.0,20.0,20.0,0.0,0.0,784.0,56.0,56.0,29.0,29.0,7.0,7.0
159867,78020.0,2021-10-01,924.0,868.0,728.0,362.0,340.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,43.0,43.0,7.0,7.0
159868,78020.0,2021-10-08,924.0,868.0,728.0,412.0,381.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,48.0,48.0,7.0,7.0
159869,78020.0,2021-10-15,924.0,868.0,728.0,402.0,365.0,7.0,7.0,0.0,0.0,784.0,56.0,56.0,40.0,40.0,7.0,7.0


In [163]:
# Convert the hospital date and fips_code column to strings
hospital_data_df['collection_week'] = hospital_data_df['collection_week'].astype('str')
hospital_data_df['fips_code'] = hospital_data_df['fips_code'].astype('str')

In [164]:
# Concat the date and fips_code for new columns fips_date
hospital_data_df['fips_date'] = hospital_data_df['fips_code'] + hospital_data_df['collection_week']
hospital_data_df

Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum,fips_date
0,1001.0,2020-07-31,574.0,574.0,455.0,363.0,363.0,117.0,103.0,-999999.0,0.0,455.0,42.0,42.0,40.0,40.0,28.0,26.0,1001.02020-07-31
1,1001.0,2020-08-07,574.0,574.0,455.0,369.0,369.0,153.0,139.0,-999999.0,-999999.0,455.0,42.0,42.0,41.0,41.0,38.0,37.0,1001.02020-08-07
2,1001.0,2020-08-14,574.0,574.0,455.0,384.0,384.0,125.0,111.0,0.0,-999999.0,455.0,42.0,42.0,42.0,42.0,38.0,32.0,1001.02020-08-14
3,1001.0,2020-08-21,574.0,574.0,455.0,348.0,348.0,86.0,74.0,0.0,0.0,455.0,42.0,42.0,38.0,38.0,48.0,35.0,1001.02020-08-21
4,1001.0,2020-08-28,574.0,574.0,455.0,346.0,346.0,77.0,75.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,18.0,18.0,1001.02020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020.0,2021-09-24,924.0,868.0,728.0,410.0,389.0,20.0,20.0,0.0,0.0,784.0,56.0,56.0,29.0,29.0,7.0,7.0,78020.02021-09-24
159867,78020.0,2021-10-01,924.0,868.0,728.0,362.0,340.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,43.0,43.0,7.0,7.0,78020.02021-10-01
159868,78020.0,2021-10-08,924.0,868.0,728.0,412.0,381.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,48.0,48.0,7.0,7.0,78020.02021-10-08
159869,78020.0,2021-10-15,924.0,868.0,728.0,402.0,365.0,7.0,7.0,0.0,0.0,784.0,56.0,56.0,40.0,40.0,7.0,7.0,78020.02021-10-15


In [165]:
# Convert all negative values to 0
num = hospital_data_df._get_numeric_data()
num[num < 0] = 0

In [166]:
hospital_data_df

Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum,fips_date
0,1001.0,2020-07-31,574.0,574.0,455.0,363.0,363.0,117.0,103.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,28.0,26.0,1001.02020-07-31
1,1001.0,2020-08-07,574.0,574.0,455.0,369.0,369.0,153.0,139.0,0.0,0.0,455.0,42.0,42.0,41.0,41.0,38.0,37.0,1001.02020-08-07
2,1001.0,2020-08-14,574.0,574.0,455.0,384.0,384.0,125.0,111.0,0.0,0.0,455.0,42.0,42.0,42.0,42.0,38.0,32.0,1001.02020-08-14
3,1001.0,2020-08-21,574.0,574.0,455.0,348.0,348.0,86.0,74.0,0.0,0.0,455.0,42.0,42.0,38.0,38.0,48.0,35.0,1001.02020-08-21
4,1001.0,2020-08-28,574.0,574.0,455.0,346.0,346.0,77.0,75.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,18.0,18.0,1001.02020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020.0,2021-09-24,924.0,868.0,728.0,410.0,389.0,20.0,20.0,0.0,0.0,784.0,56.0,56.0,29.0,29.0,7.0,7.0,78020.02021-09-24
159867,78020.0,2021-10-01,924.0,868.0,728.0,362.0,340.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,43.0,43.0,7.0,7.0,78020.02021-10-01
159868,78020.0,2021-10-08,924.0,868.0,728.0,412.0,381.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,48.0,48.0,7.0,7.0,78020.02021-10-08
159869,78020.0,2021-10-15,924.0,868.0,728.0,402.0,365.0,7.0,7.0,0.0,0.0,784.0,56.0,56.0,40.0,40.0,7.0,7.0,78020.02021-10-15


In [167]:
f = hospital_data_df['fips_date'].unique()

In [168]:
x = nyt_data_grouped['fips_date'].unique()

In [169]:
# Create list of fips_date values that only appear in one data set
remove_list = list(set(x).symmetric_difference(set(f)))
remove_list

['24019.02020-08-07',
 '5091.02021-08-20',
 '46003.02021-03-26',
 '48327.02020-08-28',
 '48261.02020-10-16',
 '31087.02021-08-13',
 '72089.02021-03-19',
 '18155.02020-08-14',
 '29063.02020-12-11',
 '8061.02020-07-31',
 '16075.02021-06-18',
 '38091.02020-08-28',
 '31115.02020-11-27',
 '48261.02021-10-01',
 '29209.02020-08-28',
 '22107.02021-09-03',
 '19195.02021-10-22',
 '31113.02020-12-18',
 '31007.02020-10-02',
 '38077.02021-07-09',
 '27039.02021-08-13',
 '12041.02021-09-03',
 '29179.02021-05-14',
 '48387.02020-09-04',
 '41009.02020-09-04',
 '13079.02021-03-19',
 '8047.02021-02-05',
 '54063.02021-04-23',
 '48311.02021-06-25',
 '48385.02021-03-12',
 '46061.02021-10-08',
 '32029.02021-01-22',
 '21237.02020-10-16',
 '48361.02020-11-20',
 '51097.02021-02-19',
 '46021.02021-02-26',
 '31113.02021-09-24',
 '51135.02021-10-01',
 '13239.02021-07-02',
 '13053.02021-01-15',
 '35023.02020-12-18',
 '39015.02021-05-28',
 '18045.02021-03-26',
 '72019.02020-11-27',
 '47027.02021-01-08',
 '8027.02021-

In [170]:
# Eliminate rows with fips_date values that do not appear in counties dataset
hospital_data_df = hospital_data_df[~hospital_data_df['fips_date'].isin(remove_list)]
hospital_data_df

Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_adult_patients_hospitalized_confirmed_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum,total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum,staffed_icu_adult_patients_confirmed_covid_7_day_sum,fips_date
0,1001.0,2020-07-31,574.0,574.0,455.0,363.0,363.0,117.0,103.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,28.0,26.0,1001.02020-07-31
1,1001.0,2020-08-07,574.0,574.0,455.0,369.0,369.0,153.0,139.0,0.0,0.0,455.0,42.0,42.0,41.0,41.0,38.0,37.0,1001.02020-08-07
2,1001.0,2020-08-14,574.0,574.0,455.0,384.0,384.0,125.0,111.0,0.0,0.0,455.0,42.0,42.0,42.0,42.0,38.0,32.0,1001.02020-08-14
3,1001.0,2020-08-21,574.0,574.0,455.0,348.0,348.0,86.0,74.0,0.0,0.0,455.0,42.0,42.0,38.0,38.0,48.0,35.0,1001.02020-08-21
4,1001.0,2020-08-28,574.0,574.0,455.0,346.0,346.0,77.0,75.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,18.0,18.0,1001.02020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020.0,2021-09-24,924.0,868.0,728.0,410.0,389.0,20.0,20.0,0.0,0.0,784.0,56.0,56.0,29.0,29.0,7.0,7.0,78020.02021-09-24
159867,78020.0,2021-10-01,924.0,868.0,728.0,362.0,340.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,43.0,43.0,7.0,7.0,78020.02021-10-01
159868,78020.0,2021-10-08,924.0,868.0,728.0,412.0,381.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,48.0,48.0,7.0,7.0,78020.02021-10-08
159869,78020.0,2021-10-15,924.0,868.0,728.0,402.0,365.0,7.0,7.0,0.0,0.0,784.0,56.0,56.0,40.0,40.0,7.0,7.0,78020.02021-10-15


In [171]:
# Shorten column names to avoid truncating in postgres (confirmed=con, suspected=sus, pediatric=ped)
hospital_data_df.rename({'total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum':'total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum', 
                         'total_adult_patients_hospitalized_confirmed_covid_7_day_sum':'total_adult_patients_hospitalized_con_covid_7_day_sum', 
                         'total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum':'total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum', 
                         'total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum':'total_ped_patients_hospitalized_con_covid_7_day_sum', 
                         'staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum':'staffed_icu_adult_patients_con_and_sus_covid_7_day_sum', 
                         'staffed_icu_adult_patients_confirmed_covid_7_day_sum':'staffed_icu_adult_patients_con_covid_7_day_sum'}, axis=1, inplace=True)
hospital_data_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum,total_adult_patients_hospitalized_con_covid_7_day_sum,total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum,total_ped_patients_hospitalized_con_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_con_and_sus_covid_7_day_sum,staffed_icu_adult_patients_con_covid_7_day_sum,fips_date
0,1001.0,2020-07-31,574.0,574.0,455.0,363.0,363.0,117.0,103.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,28.0,26.0,1001.02020-07-31
1,1001.0,2020-08-07,574.0,574.0,455.0,369.0,369.0,153.0,139.0,0.0,0.0,455.0,42.0,42.0,41.0,41.0,38.0,37.0,1001.02020-08-07
2,1001.0,2020-08-14,574.0,574.0,455.0,384.0,384.0,125.0,111.0,0.0,0.0,455.0,42.0,42.0,42.0,42.0,38.0,32.0,1001.02020-08-14
3,1001.0,2020-08-21,574.0,574.0,455.0,348.0,348.0,86.0,74.0,0.0,0.0,455.0,42.0,42.0,38.0,38.0,48.0,35.0,1001.02020-08-21
4,1001.0,2020-08-28,574.0,574.0,455.0,346.0,346.0,77.0,75.0,0.0,0.0,455.0,42.0,42.0,40.0,40.0,18.0,18.0,1001.02020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020.0,2021-09-24,924.0,868.0,728.0,410.0,389.0,20.0,20.0,0.0,0.0,784.0,56.0,56.0,29.0,29.0,7.0,7.0,78020.02021-09-24
159867,78020.0,2021-10-01,924.0,868.0,728.0,362.0,340.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,43.0,43.0,7.0,7.0,78020.02021-10-01
159868,78020.0,2021-10-08,924.0,868.0,728.0,412.0,381.0,12.0,12.0,0.0,0.0,784.0,56.0,56.0,48.0,48.0,7.0,7.0,78020.02021-10-08
159869,78020.0,2021-10-15,924.0,868.0,728.0,402.0,365.0,7.0,7.0,0.0,0.0,784.0,56.0,56.0,40.0,40.0,7.0,7.0,78020.02021-10-15


In [172]:
# Eliminate rows with fips_date values that do not appear in hospitals dataset
nyt_data_grouped = nyt_data_grouped[~nyt_data_grouped['fips_date'].isin(remove_list)]
nyt_data_grouped

Unnamed: 0,fips,date,cases,deaths,fips_date
0,1001.0,2020-07-31,6817,147.0,1001.02020-07-31
1,1001.0,2020-08-07,7503,150.0,1001.02020-08-07
2,1001.0,2020-08-14,8531,158.0,1001.02020-08-14
3,1001.0,2020-08-21,9009,161.0,1001.02020-08-21
4,1001.0,2020-08-28,9484,161.0,1001.02020-08-28
...,...,...,...,...,...
208912,78020.0,2021-09-24,2258,21.0,78020.02021-09-24
208913,78020.0,2021-10-01,2293,21.0,78020.02021-10-01
208914,78020.0,2021-10-08,2317,21.0,78020.02021-10-08
208915,78020.0,2021-10-15,2317,21.0,78020.02021-10-15


In [173]:
nyt_data_grouped.dtypes

fips          object
date          object
cases          int64
deaths       float64
fips_date     object
dtype: object

In [174]:
# Convert float data types to integers in NYT data set
columns = ['deaths']
nyt_data_grouped[columns] = nyt_data_grouped[columns].applymap(np.int64)
nyt_data_grouped

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,fips,date,cases,deaths,fips_date
0,1001.0,2020-07-31,6817,147,1001.02020-07-31
1,1001.0,2020-08-07,7503,150,1001.02020-08-07
2,1001.0,2020-08-14,8531,158,1001.02020-08-14
3,1001.0,2020-08-21,9009,161,1001.02020-08-21
4,1001.0,2020-08-28,9484,161,1001.02020-08-28
...,...,...,...,...,...
208912,78020.0,2021-09-24,2258,21,78020.02021-09-24
208913,78020.0,2021-10-01,2293,21,78020.02021-10-01
208914,78020.0,2021-10-08,2317,21,78020.02021-10-08
208915,78020.0,2021-10-15,2317,21,78020.02021-10-15


In [175]:
# Convert NYT date column to datetime data type
nyt_data_grouped['date'] = pd.to_datetime(nyt_data_grouped['date'])
nyt_data_grouped

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,fips,date,cases,deaths,fips_date
0,1001.0,2020-07-31,6817,147,1001.02020-07-31
1,1001.0,2020-08-07,7503,150,1001.02020-08-07
2,1001.0,2020-08-14,8531,158,1001.02020-08-14
3,1001.0,2020-08-21,9009,161,1001.02020-08-21
4,1001.0,2020-08-28,9484,161,1001.02020-08-28
...,...,...,...,...,...
208912,78020.0,2021-09-24,2258,21,78020.02021-09-24
208913,78020.0,2021-10-01,2293,21,78020.02021-10-01
208914,78020.0,2021-10-08,2317,21,78020.02021-10-08
208915,78020.0,2021-10-15,2317,21,78020.02021-10-15


In [176]:
nyt_data_grouped.dtypes

fips                 object
date         datetime64[ns]
cases                 int64
deaths                int64
fips_date            object
dtype: object

In [191]:
# Rename NYT columns 
nyt_data_grouped.rename({'date':'collection_week', 'cases':'cases_to_date', 'deaths':'deaths_to_date'}, axis=1, inplace=True)
nyt_data_grouped

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,fips,collection_week,cases_to_date,deaths_to_date,fips_date
0,1001.0,2020-07-31,6817,147,1001.02020-07-31
1,1001.0,2020-08-07,7503,150,1001.02020-08-07
2,1001.0,2020-08-14,8531,158,1001.02020-08-14
3,1001.0,2020-08-21,9009,161,1001.02020-08-21
4,1001.0,2020-08-28,9484,161,1001.02020-08-28
...,...,...,...,...,...
208912,78020.0,2021-09-24,2258,21,78020.02021-09-24
208913,78020.0,2021-10-01,2293,21,78020.02021-10-01
208914,78020.0,2021-10-08,2317,21,78020.02021-10-08
208915,78020.0,2021-10-15,2317,21,78020.02021-10-15


In [192]:
hospital_data_df.dtypes

fips_code                                                                object
collection_week                                                  datetime64[ns]
total_beds_7_day_sum                                                      int64
all_adult_hospital_beds_7_day_sum                                         int64
all_adult_hospital_inpatient_beds_7_day_sum                               int64
inpatient_beds_used_7_day_sum                                             int64
all_adult_hospital_inpatient_bed_occupied_7_day_sum                       int64
total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum             int64
total_adult_patients_hospitalized_con_covid_7_day_sum                     int64
total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum               int64
total_ped_patients_hospitalized_con_covid_7_day_sum                       int64
inpatient_beds_7_day_sum                                                  int64
total_icu_beds_7_day_sum                

In [193]:
# Convert float data types to integers in hospitals data set
columns = ['total_beds_7_day_sum', 'all_adult_hospital_beds_7_day_sum', 
           'all_adult_hospital_inpatient_beds_7_day_sum', 'inpatient_beds_used_7_day_sum', 
           'all_adult_hospital_inpatient_bed_occupied_7_day_sum', 
           'total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum', 
           'total_adult_patients_hospitalized_con_covid_7_day_sum', 
           'total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum', 
           'total_ped_patients_hospitalized_con_covid_7_day_sum', 'inpatient_beds_7_day_sum', 'total_icu_beds_7_day_sum', 
           'total_staffed_adult_icu_beds_7_day_sum', 'icu_beds_used_7_day_sum', 'staffed_adult_icu_bed_occupancy_7_day_sum',
           'staffed_icu_adult_patients_con_and_sus_covid_7_day_sum', 'staffed_icu_adult_patients_con_covid_7_day_sum']
hospital_data_df[columns] = hospital_data_df[columns].applymap(np.int64)
hospital_data_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum,total_adult_patients_hospitalized_con_covid_7_day_sum,total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum,total_ped_patients_hospitalized_con_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_con_and_sus_covid_7_day_sum,staffed_icu_adult_patients_con_covid_7_day_sum,fips_date
0,1001.0,2020-07-31,574,574,455,363,363,117,103,0,0,455,42,42,40,40,28,26,1001.02020-07-31
1,1001.0,2020-08-07,574,574,455,369,369,153,139,0,0,455,42,42,41,41,38,37,1001.02020-08-07
2,1001.0,2020-08-14,574,574,455,384,384,125,111,0,0,455,42,42,42,42,38,32,1001.02020-08-14
3,1001.0,2020-08-21,574,574,455,348,348,86,74,0,0,455,42,42,38,38,48,35,1001.02020-08-21
4,1001.0,2020-08-28,574,574,455,346,346,77,75,0,0,455,42,42,40,40,18,18,1001.02020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020.0,2021-09-24,924,868,728,410,389,20,20,0,0,784,56,56,29,29,7,7,78020.02021-09-24
159867,78020.0,2021-10-01,924,868,728,362,340,12,12,0,0,784,56,56,43,43,7,7,78020.02021-10-01
159868,78020.0,2021-10-08,924,868,728,412,381,12,12,0,0,784,56,56,48,48,7,7,78020.02021-10-08
159869,78020.0,2021-10-15,924,868,728,402,365,7,7,0,0,784,56,56,40,40,7,7,78020.02021-10-15


In [194]:
# Convert NYT date column to datetime data type
hospital_data_df['collection_week'] = pd.to_datetime(hospital_data_df['collection_week'])
hospital_data_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,fips_code,collection_week,total_beds_7_day_sum,all_adult_hospital_beds_7_day_sum,all_adult_hospital_inpatient_beds_7_day_sum,inpatient_beds_used_7_day_sum,all_adult_hospital_inpatient_bed_occupied_7_day_sum,total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum,total_adult_patients_hospitalized_con_covid_7_day_sum,total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum,total_ped_patients_hospitalized_con_covid_7_day_sum,inpatient_beds_7_day_sum,total_icu_beds_7_day_sum,total_staffed_adult_icu_beds_7_day_sum,icu_beds_used_7_day_sum,staffed_adult_icu_bed_occupancy_7_day_sum,staffed_icu_adult_patients_con_and_sus_covid_7_day_sum,staffed_icu_adult_patients_con_covid_7_day_sum,fips_date
0,1001.0,2020-07-31,574,574,455,363,363,117,103,0,0,455,42,42,40,40,28,26,1001.02020-07-31
1,1001.0,2020-08-07,574,574,455,369,369,153,139,0,0,455,42,42,41,41,38,37,1001.02020-08-07
2,1001.0,2020-08-14,574,574,455,384,384,125,111,0,0,455,42,42,42,42,38,32,1001.02020-08-14
3,1001.0,2020-08-21,574,574,455,348,348,86,74,0,0,455,42,42,38,38,48,35,1001.02020-08-21
4,1001.0,2020-08-28,574,574,455,346,346,77,75,0,0,455,42,42,40,40,18,18,1001.02020-08-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159866,78020.0,2021-09-24,924,868,728,410,389,20,20,0,0,784,56,56,29,29,7,7,78020.02021-09-24
159867,78020.0,2021-10-01,924,868,728,362,340,12,12,0,0,784,56,56,43,43,7,7,78020.02021-10-01
159868,78020.0,2021-10-08,924,868,728,412,381,12,12,0,0,784,56,56,48,48,7,7,78020.02021-10-08
159869,78020.0,2021-10-15,924,868,728,402,365,7,7,0,0,784,56,56,40,40,7,7,78020.02021-10-15


In [195]:
hospital_data_df.dtypes

fips_code                                                                object
collection_week                                                  datetime64[ns]
total_beds_7_day_sum                                                      int64
all_adult_hospital_beds_7_day_sum                                         int64
all_adult_hospital_inpatient_beds_7_day_sum                               int64
inpatient_beds_used_7_day_sum                                             int64
all_adult_hospital_inpatient_bed_occupied_7_day_sum                       int64
total_adult_patients_hospitalized_con_and_sus_covid_7_day_sum             int64
total_adult_patients_hospitalized_con_covid_7_day_sum                     int64
total_ped_patients_hospitalized_con_and_sus_covid_7_day_sum               int64
total_ped_patients_hospitalized_con_covid_7_day_sum                       int64
inpatient_beds_7_day_sum                                                  int64
total_icu_beds_7_day_sum                

In [196]:
# Export grouped hospital data to CSV as hospitals_grouped.csv
hospital_data_df.to_csv(r'C:\Users\sophc\OneDrive\Desktop\Classwork\Group-3\hospitals_grouped.csv', index=False)

In [197]:
# Export grouped NYT data to CSV as counties_grouped.csv
nyt_data_grouped.to_csv(r'C:\Users\sophc\OneDrive\Desktop\Classwork\Group-3\counties_grouped.csv', index=False)

Connection String Attempt

In [198]:
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/COVID_Risk_Final_Project"

In [199]:
engine = create_engine(db_string)

In [201]:
nyt_data_grouped.to_sql(name='counties', con=engine)

In [202]:
hospital_data_df.to_sql(name='hospitals', con=engine)