# COVID-19 data in Poland

plotting data for Poland based on https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series

## Preparation of data
to be run once a day

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
#import timeit
#pd.show_versions(as_json=False)

def prepare_data(name, df, region):
    p = df[df.Region == region].drop(['Region', 'Province/State', 'Lat', 'Long'], axis=1)
    col = p.rename(index=lambda x: name).transpose()
    col.rename(index=lambda i: datetime.datetime.strptime(i, '%m/%d/%y'), inplace = True)
    return col

def calculate_mean(df, mean_window_size):
    return df.rolling(window=mean_window_size, center=True).mean(). \
            rename(columns=lambda x:"centered mean of %s over %i days" % (x, mean_window_size))

def calulate_percentage(df, mean_window_size):
    return df.rolling(window=mean_window_size, center=True). \
            apply(lambda x: x[-1]/x[:-1].mean()). \
            rename(columns= lambda x: "new confirmed ratio from today against %s" % \
                ("last day" if mean_window_size <=2
                    else "mean of last %i days" % (mean_window_size-1)))

c_full = pd.read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv", parse_dates=True). \
    rename(columns={'Country/Region': 'Region'})
d_full = pd.read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv", parse_dates=True). \
    rename(columns={'Country/Region': 'Region'})

In [None]:
#        0         1          2     3        4        5         6          7        8         9
regions=["Poland", "Czechia", "US", "Spain", "Italy", "France", "Germany", "China", "Russia", "India"]
region=regions[0]

#https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series
c = prepare_data("confirmed", c_full, region)
d = prepare_data("deaths", d_full, region)

day_zero = c.ge(1).idxmax()[0]  # datetime.datetime(2020, 3, 3)
day_hundred =  c.ge(100).idxmax()[0]  # datetime.datetime(2020, 3, 13)
c = c[c.index >= day_zero]
d = d[d.index >= day_zero]

nc = c.rolling(window=2).apply(lambda x: x[1] - x[0]).rename(columns=lambda x: "new "+x)
nd = d.rolling(window=2).apply(lambda x: x[1] - x[0]).rename(columns=lambda x: "new "+x)

## Playing with data

In [None]:
ncm2 = calculate_mean(nc, 2)
ncm3 = calculate_mean(nc, 4)
ncm4 = calculate_mean(nc, 7)
ncm7 = calculate_mean(nc, 14)
ncp2 = calulate_percentage(nc, 2)
ncp3 = calulate_percentage(nc, 4)
ncp4 = calulate_percentage(nc, 7)
ncp7 = calulate_percentage(nc, 14)

## Presentation of data

In [None]:
c.merge(d.merge(nc.merge(ncp2.merge(nd.merge(ncm2, left_index = True, right_index = True), left_index = True, right_index = True), left_index = True, right_index = True), left_index = True, right_index = True), left_index = True, right_index = True).tail(14)

In [None]:
#TODO: Show region, start date, date where C exceeded 100

fig, axes = plt.subplots(nrows=11, ncols=1, sharex=True, constrained_layout=True)
fig.set_size_inches(10,40)
ncp_ylim=(0,3)

d.plot.bar(ax=axes[0], width=0.8, color='black')
c.plot.bar(ax=axes[1], width=0.8)
nc.plot.bar(ax=axes[2], width=0.8)
ncm2.plot.bar(ax=axes[3], width=0.8, color='g')
ncm3.plot.bar(ax=axes[4], width=0.8, color='g')
ncm4.plot.bar(ax=axes[5], width=0.8, color='g')
ncm7.plot.bar(ax=axes[6], width=0.8, color='g')
ncp2.plot.bar(ax=axes[7], width=0.8, color='g', ylim=ncp_ylim)
ncp3.plot.bar(ax=axes[8], width=0.8, color='g', ylim=ncp_ylim)
ncp4.plot.bar(ax=axes[9], width=0.8, color='g', ylim=ncp_ylim)
ncp7.plot.bar(ax=axes[10], width=0.8, color='g', ylim=ncp_ylim)
for ax in axes[0:7]:
  ax.semilogy()
for ax in axes[7:11]:
  ax.axhline(y = 1, linestyle=':')
#nc.merge(ncm7, left_index = True, right_index = True).plot.bar(ax=axes[5], width=0.8)
a = fig.suptitle("Data for %s, first day with 100+ cases was %s" %(region, day_hundred))
months = pd.date_range(d.index[0], d.index[-1], freq='MS')
#axs[-1].get_xaxis().set_ticks(months)
#axs[-1].get_xaxis().set_ticklabels(months)
#axs[-1].set_xticks(months)
#axs[-1].set_xticklabels(months)
