In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

In [None]:
# New York Times State/County Data
nyt_csv = 'data/covid-19-data/us-%s.csv'

data = pd.read_csv(nyt_csv % 'states')

plot_new_cases_vs_total_cases(data, fig_size=(14, 10), rolling_days=5,
                              groupby_filter=['Washington', 'New York', 'California', 'Florida'])

In [None]:
# John Hopkins University Country Data
jhu_csv = 'data/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_%s_global.csv'

In [None]:
import pandas as pd
import requests


def request_json_as_df(url):
    response = requests.get(url)
    response.raise_for_status()
    return pd.read_json(response.text)


def apply_settings(title, ax, font_size=20, disable_legend=True):
    font = {'fontsize': font_size}
    plt.title(title, **font)
    plt.legend(prop={'size': font_size})
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                  ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(font_size)
    if disable_legend:
        ax.get_legend().remove()


def plot_new_cases_vs_total_cases(data,
                                  groupby_column='state',
                                  cases_label='cases',
                                  groupby_filter=[],
                                  fig_size=(10, 8),
                                  logx=True,
                                  logy=True,
                                  rolling_days=3):
    
    grouped = data.groupby([groupby_column])

    fig, ax = plt.subplots(figsize=fig_size)

    for label, item in grouped:
        if groupby_filter and label not in groupby_filter:
            continue
        df = item.copy()
        df.loc[:, 'cases_delta'] = df[cases_label].diff(1)
        df.loc[:, 'rolling_cases'] = df[cases_label].rolling(rolling_days).sum()
        df.loc[:, 'rolling_cases_delta'] = df.cases_delta.rolling(rolling_days).sum()
        df.plot(ax=ax, label=label, x='rolling_cases', y='rolling_cases_delta', logx=logx, logy=logy)
        plt.text(df.rolling_cases.max(), df.rolling_cases_delta.max(), label)

    xlabel = 'Total Cases'
    xlabel = 'Log of ' + xlabel if logx else xlabel
    ax.set_xlabel(xlabel)
    ylabel = 'Total Cases'
    ylabel = 'Log of ' + ylabel if logy else ylabel
    ax.set_ylabel(ylabel)
    apply_settings('New Cases vs Total Cases', ax)


In [None]:
tests_by_zip = pd.read_csv('data/nychealth/coronavirus-data/tests-by-zcta.csv')
ny_zips=pd.read_csv('data/NY_Census_Zip.csv')


In [None]:
tests_by_zip=tests_by_zip.dropna()
tests_by_zip['ZipCode']=tests_by_zip['MODZCTA'].astype('int')
tests_by_zip

In [None]:
ny_zips['ZipCode']=ny_zips['NAME'].replace(to_replace=r'ZCTA5 (\d+), New York', value=r'\1', regex=True)

In [None]:
ny_zips.sort_values(by=['ZipCode'])

In [None]:
df=tests_by_zip.join(ny_zips, on='ZipCode', how='left', lsuffix='_left', rsuffix='_right')
df
