In [2]:
import pandas as pd
import os

import matplotlib
from matplotlib import pyplot as plt

%matplotlib notebook
plt.style.use('ggplot')

#### Load Data (from git@github.com:pcm-dpc/COVID-19.git)

In [3]:
df_list = []

workdir = "COVID-19/dati-province"
for filename in os.listdir(workdir):
    filepath = os.path.join(workdir, filename)
    with open(filepath, 'r') as file_data:
        current_df = pd.read_csv(filepath)
        df_list.append(current_df)
        
df = pd.concat(df_list)

#### DB CleanUp and Basic Improvements

In [4]:
df.dropna(inplace=True)

df['data'] = pd.to_datetime(df['data'])

df = df.groupby(['sigla_provincia', 'data'])['totale_casi'].max().reset_index()

df.set_index(['sigla_provincia', 'data'], inplace=True)
df.sort_index(inplace=True)

df['casi_day'] = df.groupby(level=0)['totale_casi'].transform(lambda x: x - x.shift(1))

In [5]:
# Here you can select the list of provinces to show

# ... as a TOP#N
top_list = df.groupby(level=0)['totale_casi'].max().sort_values(ascending=False).head(50).index

# ... or as a custom list)
top_list = ['BG', 'BS', 'PD', 'CO', 'MI']

In [6]:
ax = plt.figure(1).subplots(1)

for index, provincia in enumerate(top_list):
    df.loc[provincia].plot(ax=ax, y='totale_casi', label=provincia)

ax.set_yscale('log')

<IPython.core.display.Javascript object>