In [1]:
import requests
import pandas as pd
import numpy as np

from src import *

from bokeh.io import output_notebook

output_notebook()

In [12]:
url = 'https://en.wikipedia.org/wiki/Template:COVID-19_pandemic_data/Italy_medical_cases'

provinces = ['VDA', 'LIG', 'PIE', 'LOM', 'VEN', 'TN', 'BZ', 'FVG', 'EMR',
       'MAR', 'TOS', 'UMB', 'LAZ', 'ABR', 'MOL', 'CAM', 'BAS', 'PUG', 'CAL',
       'SIC', 'SAR']

PROV_POPS = {
    "VEN": 4905854,
    "LOM": 10060574,
    "LAZ": 5879082,
    "CAL": 947131,
    "CAM": 5802000,
}

In [3]:
def clean_data(df):
    df = df_raw[0].copy()

    df.columns = df.columns.droplevel(0)

    df = df[['Date', 'VDA', 'LIG', 'PIE', 'LOM', 'VEN', 'TN', 'BZ', 'FVG', 'EMR',
           'MAR', 'TOS', 'UMB', 'LAZ', 'ABR', 'MOL', 'CAM', 'BAS', 'PUG', 'CAL',
           'SIC', 'SAR']]

    df = df[:-7]

    df = df.replace(r"\([^][]*\)", "", regex=True)
    df = df.replace(r",", "", regex=True)

    df['Date'] = df['Date'].str.replace(".", "-")
    df['Date'] = df['Date'].astype('datetime64[ns]')

    df = df.fillna(0)
    
    return df

In [5]:
def add_provincial_statistics(df, province):
    df[province] = pd.to_numeric(df[province], errors='coerce', downcast='integer')
    
    df["Daily Cases"] = df[province]
    
    df[f'DCRA'] = df["Daily Cases"].rolling(7).mean()  # Daily Cases Rolling Average
    
    df[f'DCRA Per Capita'] = df[f'DCRA']/PROV_POPS[province] * 100000
    
    return df

In [6]:
from dataclasses import dataclass, field
from bokeh.models import ColumnDataSource

@dataclass
class InputConfig:
    country: str
    province: str
    df: pd.DataFrame
    data_src: ColumnDataSource = field(init=False)
        
    def __post_init__(self):
        self.df = df[["Date", self.province]].copy()
        self.df = add_provincial_statistics(self.df, self.province)
        self.data_src = ColumnDataSource(self.df)

In [13]:
italy_inputconfigs = [
    InputConfig("Italy", "VEN", df),
    InputConfig("Italy", "LOM", df),
    InputConfig("Italy", "LAZ", df),    
    InputConfig("Italy", "CAL", df),
    InputConfig("Italy", "CAM", df),
]

In [14]:
graph_data(italy_inputconfigs)