In [None]:
import matplotlib.pyplot as plt
import os
import pandas as pd
from pathlib import Path
from sqlalchemy import create_engine

In [None]:
username = 'twmzglvg'
database_name = 'twmzglvg'
# by default username and database_name are the same
# Save your database password (found on the ElephantSQL site into a file in your home directory under a directory .secrets and a file called elephantsql
# Read in the password from a file
with open(os.path.join(Path.home(), '.secrets/elephantsql'), 'r') as secret:
    password = secret.read().replace('\n', '').strip()

url = f'postgresql://{username}:{password}@bubble.db.elephantsql.com/{database_name}'
engine = create_engine(url, echo=False)

In [None]:
# Load Covid test data by zip code in New York City from April 1st 2020 through May 1st 2020
with engine.connect() as conn:
    tests_by_zip = pd.read_sql('select * from ny_tests_by_zcta', conn)
tests_by_zip = tests_by_zip.dropna()
tests_by_zip['ZipCode'] = tests_by_zip['MODZCTA'].astype('int')

In [None]:
with engine.connect() as conn:
    ny_zips = pd.read_sql('select * from ny_census_zip', conn)
ny_zips['ZipCode'] = ny_zips['NAME'].replace(to_replace=r'ZCTA5 (\d+), New York', value=r'\1', regex=True)
ny_zips = ny_zips[ny_zips['ZipCode'].apply(lambda x: x.isnumeric())]
ny_zips['ZipCode'] = ny_zips['ZipCode'].astype('int')

In [None]:
days = 5
df = tests_by_zip.merge(ny_zips, on='ZipCode')
df = df[['GEO_ID', 'ZipCode', 'P001001', 'Total', 'Positive', 'Timestamp']]
df.columns = ['GEO_ID', 'Zip Code', 'Population', 'Total Tests', 'Positive CVD', 'Timestamp']
df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='mixed')
df['Population'] = df['Population'].astype('int')
df['Incidence Rate'] = df['Positive CVD'] / df['Population'] * 100

In [None]:
new_df = pd.DataFrame()
for by, group in df.groupby(['Zip Code']):
    group = group.set_index('Timestamp', verify_integrity=True)
    group = group.sort_values(by='Timestamp')
    group.loc[:, str(days) + ' Day Mean Incidence Rate'] = group['Incidence Rate'].rolling(str(days) + 'd').mean()
    new_df = pd.concat([new_df, group])
df = new_df.reset_index()

In [None]:
df.to_csv('5_Day_Mean_Incidence_Rate_Per_NY_Zip_Code.csv', index=False)