In [25]:
import pandas as pd
import os
from datetime import date, datetime

In [26]:
tests_and_deaths_raw = pd.read_csv(os.getcwd() + r'\data\raw\COVID-19_Cases__Tests__and_Deaths_by_ZIP_Code.csv')
vaccinations_raw = pd.read_csv(os.getcwd() + r'\data\raw\COVID-19_Vaccinations_by_ZIP_Code.csv')

In [27]:
tests_and_deaths_raw['Year-Week'] = pd.to_datetime(tests_and_deaths_raw['Week Start'], format='%m/%d/%Y').dt.year.astype("string") +'-'+ tests_and_deaths_raw['Week Number'].astype("string") 
tests_and_deaths_raw = tests_and_deaths_raw.rename(columns={'ZIP Code': 'Zip Code'}).set_index(['Zip Code', 'Year-Week'])

In [28]:
vaccinations_raw['Week Number'] = pd.to_datetime(vaccinations_raw['Date'], format='%m/%d/%Y').dt.isocalendar().week
vaccinations_raw['Year'] = pd.to_datetime(vaccinations_raw['Date'], format='%m/%d/%Y').dt.year
vaccinations_raw['Year-Week'] = vaccinations_raw['Year'].astype("string") +'-'+ vaccinations_raw['Week Number'].astype("string")

In [29]:
vaccination_weekly_df = (vaccinations_raw[['Zip Code', 'Year-Week', 'Total Doses - Daily', '1st Dose - Daily', 
                                           'Vaccine Series Completed - Daily', 'Total Doses - Daily - Age 5+', 
                                           'Total Doses - Daily - Age 12+', 'Total Doses - Daily - Age 18+', 
                                           'Total Doses - Daily - Age 65+', '1st Dose - Daily - Age 5+', '1st Dose - Daily - Age 12+', 
                                           '1st Dose - Daily - Age 18+', '1st Dose - Daily - Age 65+', 
                                           'Vaccine Series Completed - Daily - Age 5+', 'Vaccine Series Completed - Daily - Age 12+', 
                                           'Vaccine Series Completed - Daily - Age 18+', 'Vaccine Series Completed - Daily - Age 65+']]
                        .groupby(['Zip Code', 'Year-Week']).sum())

In [30]:
vaccination_weekly_cumilative_df = (vaccinations_raw[['Zip Code', 'Year-Week', 'Total Doses - Cumulative', '1st Dose - Cumulative',
                                                      '1st Dose - Percent Population', 'Vaccine Series Completed - Cumulative', 
                                                      'Vaccine Series Completed  - Percent Population']]
                                    .groupby(['Zip Code', 'Year-Week']).max())

In [31]:
vaccination_weekly_merged_df = vaccination_weekly_df.join(vaccination_weekly_cumilative_df, how='inner')

In [39]:
vaccination_weekly_merged_df.columns

Index(['Total Doses - Daily', '1st Dose - Daily',
       'Vaccine Series Completed - Daily', 'Total Doses - Daily - Age 5+',
       'Total Doses - Daily - Age 12+', 'Total Doses - Daily - Age 18+',
       'Total Doses - Daily - Age 65+', '1st Dose - Daily - Age 5+',
       '1st Dose - Daily - Age 12+', '1st Dose - Daily - Age 18+',
       '1st Dose - Daily - Age 65+',
       'Vaccine Series Completed - Daily - Age 5+',
       'Vaccine Series Completed - Daily - Age 12+',
       'Vaccine Series Completed - Daily - Age 18+',
       'Vaccine Series Completed - Daily - Age 65+',
       'Total Doses - Cumulative', '1st Dose - Cumulative',
       '1st Dose - Percent Population',
       'Vaccine Series Completed - Cumulative',
       'Vaccine Series Completed  - Percent Population'],
      dtype='object')

In [33]:
combined_dataset = tests_and_deaths_raw.join(vaccination_weekly_merged_df, on=['Zip Code', 'Year-Week'], how='left').fillna(0)

In [34]:
combined_dataset = combined_dataset.reset_index()

In [35]:
combined_dataset = combined_dataset[combined_dataset['Zip Code'] != 'Unknown']

In [37]:
combined_dataset.to_csv('data/processed/combined_dataset.csv')

In [38]:
combined_dataset[['Year-Week']]

Index(['Zip Code', 'Year-Week', 'Week Number', 'Week Start', 'Week End',
       'Cases - Weekly', 'Cases - Cumulative', 'Case Rate - Weekly',
       'Case Rate - Cumulative', 'Tests - Weekly', 'Tests - Cumulative',
       'Test Rate - Weekly', 'Test Rate - Cumulative',
       'Percent Tested Positive - Weekly',
       'Percent Tested Positive - Cumulative', 'Deaths - Weekly',
       'Deaths - Cumulative', 'Death Rate - Weekly', 'Death Rate - Cumulative',
       'Population', 'Row ID', 'ZIP Code Location', 'Total Doses - Daily',
       '1st Dose - Daily', 'Vaccine Series Completed - Daily',
       'Total Doses - Daily - Age 5+', 'Total Doses - Daily - Age 12+',
       'Total Doses - Daily - Age 18+', 'Total Doses - Daily - Age 65+',
       '1st Dose - Daily - Age 5+', '1st Dose - Daily - Age 12+',
       '1st Dose - Daily - Age 18+', '1st Dose - Daily - Age 65+',
       'Vaccine Series Completed - Daily - Age 5+',
       'Vaccine Series Completed - Daily - Age 12+',
       'Vaccine Seri