
This notebook has three sections:

- visualises COVID-19 data for Italy for the latest available date on province map,
- animates the spread on region map,
- plots absolute and per capita death graphs by region.

Italian *regions* are composed of *provinces*.

The COVID-19 data comes from the Italian government GitHub repository at https://github.com/pcm-dpc/COVID-19 (updated daily as of Apr 8, 2020). The map data comes from UC Davis, it has outdated Sardegna province information, and has Trentino-Alto Adige as a single region. `folium` is used for province map, `matplotlib`'s `FuncAnimation` for the animation, and `plotly` for death graphs.

# Latest COVID-19 map visualisation by province

This part draws latest total cases data for COVID-19 in Italy by province. Uses `folium`. 

In [None]:
import numpy as np, pandas as pd, json, folium, geopandas as gpd, branca
import matplotlib.pyplot as plt
from matplotlib import animation, colors
from IPython.core.display import HTML, display

!rm -rf COVID-19 ITA* license*
!git clone https://github.com/pcm-dpc/COVID-19 2>&1 | cat
!curl https://data.biogeo.ucdavis.edu/data/diva/adm/ITA_adm.zip --output ITA_adm.zip -s
!unzip -q ITA_adm.zip -d ITA_adm
!rm ITA_adm/ITA_adm[03].* ITA_adm.zip

## Load and reproject geodata, normalize province naming

In [None]:
prov_map = gpd.read_file('./ITA_adm/ITA_adm2.shp').to_crs(epsg='4326')

def map_rename(mapdf, old, new):
    for i, oldname in enumerate(old):
        prov_map.loc[mapdf['NAME_2']==oldname, 'NAME_2'] = new[i]

# fail Sardegna because geodata is outdated
map_rename(prov_map,
    ['Reggio Di Calabria', "Forli' - Cesena", "Reggio Nell'Emilia", 'Mantua',  
     'Monza and Brianza',  'Pesaro E Urbino', 'Syracuse', 'Florence', 'Padua',  
     'Cagliari'], 
    ['Reggio di Calabria', 'Forlì-Cesena',    "Reggio nell'Emilia", 'Mantova', 
     'Monza e della Brianza', 'Pesaro e Urbino', 'Siracusa', 'Firenze', 'Padova', 
     'Sud Sardegna'])


## Load provincial cases data, show last date

In [None]:
covid_provincial = pd.read_csv('./COVID-19/dati-province/dpc-covid19-ita-province.csv')
MAX_DATE = covid_provincial['data'].max()
MAX_DATE

## Merge geodata and COVID data

In [None]:
data = covid_provincial[covid_provincial['data']==MAX_DATE]\
    [['denominazione_provincia', 'totale_casi']].dropna()
mapdata = prov_map.merge(
    data, 
    how='left', 
    left_on="NAME_2", 
    right_on="denominazione_provincia")[['NAME_2','geometry', 'totale_casi']]
mapdata.fillna(0, inplace=True)
mapdata.head()

In [None]:
m = folium.Map(location=[41.89,12.48], zoom_start=6) 

LOGVMAX = np.log(covid_provincial['totale_casi'].max())
colormap = branca.colormap.LinearColormap(
    colors=['blue', 'green', 'yellow', 'orange', 'red'],
    index=np.round(np.exp(np.linspace(0, LOGVMAX, 5))),
    vmin=0, vmax=np.exp(9),
    ).to_step(n=12, 
              index=np.round(np.exp(np.linspace(0, LOGVMAX, 12))))

style_function = lambda x: {'fillColor': colormap(x['properties']['totale_casi']), 
                            'color':'#000000', 'fillOpacity': 0.5, 'weight': 0.1}
highlight_function = lambda x: {'fillColor': '#000000', 'color':'#000000', 
                                'fillOpacity': 0.50, 'weight': 0.1}

provinces = folium.features.GeoJson(
    mapdata,
    style_function=style_function, 
    highlight_function=highlight_function, 
    control=False,
    tooltip=folium.features.GeoJsonTooltip(
        fields=['NAME_2', 'totale_casi'],
        aliases=['province: ', 'total cases: '],
        style="background-color: white; color: #333333;" +
               "font-family: arial; font-size: 12px; padding: 10px;"
    )
)
m.add_child(provinces)
m.keep_in_front(provinces)
colormap.add_to(m)
m

In [None]:
m.save(f'italy_covid_provinces_on_{MAX_DATE}.html')

# COVID-19 animation by region


In [None]:
# read map data and rename regions
map_ita = gpd.read_file('./ITA_adm/ITA_adm1.shp')#.to_crs('World_Robinson')

def map_rename(gpdmap, old, new):
    for i, oldname in enumerate(old):
        gpdmap.loc[gpdmap['NAME_1']==oldname, 'NAME_1'] = new[i]

map_rename(map_ita,
    ['Reggio Di Calabria', "Forli' - Cesena", "Reggio Nell'Emilia", 'Mantua',  
     'Monza and Brianza', 'Pesaro E Urbino', 'Syracuse', 'Florence', 'Padua',  
     'Cagliari', 'Apulia', 'Sicily', 'Friuli-Venezia Giulia', 'Trentino-Alto Adige'], 
    ['Reggio di Calabria', 'Forlì-Cesena', "Reggio nell'Emilia", 'Mantova', 
     'Monza e della Brianza', 'Pesaro e Urbino', 'Siracusa', 'Firenze', 'Padova', 
     'Sud Sardegna','Puglia', 'Sicilia', 'Friuli Venezia Giulia', 'Trento+Bolzano'])

# read COVID data, rename some cols
covid = pd.read_csv('./COVID-19/dati-regioni/dpc-covid19-ita-regioni.csv')
covid.rename(columns={'denominazione_regione':'region', 'data': 'date', 
                      'nuovi_positivi': 'Daily new cases', 'deceduti': 'Total deaths'}, inplace=True)
dates = sorted(covid['date'].unique())
MAX_DATE = dates[-1]
VARIABLES = ['Daily new cases', 'Total deaths']
CMAP = 'jet'

# sum data for two P.A. regions because we only have 'Trentino-Alto Adige' on map
data = covid[ ['date', 'region', 'lat', 'long'] + VARIABLES ].fillna(0)
x = data[data['region']=='P.A. Bolzano']
y = data[data['region']=='P.A. Trento']
z = x.append(y)
aggf = {'id':'min','region':(lambda x:'Trento+Bolzano'), 'lat':'sum', 'long':'sum'}
aggf.update({v: 'sum' for v in VARIABLES})
z['id'] = z.index.to_series()
z = z.groupby('date', as_index=False).agg(aggf)
z = z.set_index(['id'])
z[['lat', 'long']]/=2
data.loc[x.index] = z
data.drop(y.index, inplace=True)

# join map and COVID data on region name
mapdata2 = map_ita.merge(data, how='left', 
                         left_on="NAME_1", right_on="region")
mapdata2 = mapdata2[['NAME_1','geometry'] + list(data.columns)]
mapdata2 = [mapdata2[mapdata2['date']==d] for d in dates]

# log scale up to all-time max
# so whole country will be green sometime
vmin, vmax = [1,1], [covid[VARIABLE].max() for VARIABLE in VARIABLES]

fig, axes = plt.subplots(1,2, figsize=(15, 10)) # create figure and axes for Matplotlib
subplots = list(zip(VARIABLES, axes, vmin, vmax))
plt.figtext(.2, .16, 'Source: Italian government data, https://github.com/pcm-dpc/COVID-19', 
             fontsize=14, color='#333333')

def update(frame):
    for VARIABLE, ax, vmin, vmax in subplots:
        ax.set_yticks([]) # remove axes ticks
        ax.set_xticks([])
        ax.clear()
        
        mapdata2[frame].plot(column=VARIABLE, cmap=CMAP, linewidth=0.7, 
                            ax=ax, edgecolor='0.8',
                            norm=colors.LogNorm(vmin=vmin, vmax=vmax))
        for i,x in enumerate(mapdata2[frame].iterrows()):
            if abs(x[1]['long'] - 12) < 6 and abs(x[1]['lat'] - 42) < 6:
                ax.annotate(f'{x[1][VARIABLE]:.0f}', (x[1]['long']-.3, x[1]['lat']-.1))

        ax.annotate(mapdata2[frame].iloc[0]['date'][:10], (7,36), fontsize=18)
        ax.set_title(VARIABLE, 
                     fontdict={'fontsize': '25', 'fontweight' : '4'}, 
                     color='black')
        

sm = []
for VARIABLE, ax, vmin, vmax in subplots:

    # Create colorbar legend
    cm = plt.cm.ScalarMappable(cmap=CMAP, norm=colors.LogNorm(vmin=vmin, vmax=vmax))
    sm.append(cm)
    sm[-1].set_array([])# empty array for the data range, kaggle won't draw without
    fig.colorbar(sm[-1], ax=ax, shrink=0.55)       # add the colorbar to the axes

plt.tight_layout()

anim = animation.FuncAnimation(fig, update, frames=len(dates), interval=1000, blit=False)

HTML(anim.to_jshtml())

In [None]:
# save video
!apt install -qq -y ffmpeg > /dev/null
from matplotlib.animation import FFMpegWriter
MAX_DATE = dates[-1]
anim.save(f'italy_covid_regions_upto_{MAX_DATE}.mp4', dpi=96, writer=FFMpegWriter())

# Deaths growth by region, incl per capita

In [None]:
from io import StringIO
# Population estmates for 2019 and area data, source: https://www.citypopulation.de/en/italy/cities/
pop_stats = pd.read_table(StringIO("""region	capital	area	population
Abruzzo	Aquila	10795	1311580
Basilicata	Potenza	9992	562869
Calabria	Catanzaro	15080	1947131
Campania	Napoli	13595	5801692
Emilia-Romagna	Bologna	22451	4459477
Friuli Venezia Giulia	Trieste	7907	1215220
Lazio	Roma	17207	5879082
Liguria	Genova	5421	1550640
Lombardia	Milano	23861	10060574
Marche	Ancona	9366	1525271
Molise	Campobasso	4438	305617
Piemonte	Torino	25399	4356406
Puglia	Bari	19363	4029053
Sardegna	Cagliari	24090	1639591
Sicilia	Palermo	25707	4999891
Toscana	Firenze	22993	3729641
Trento+Bolzano	Trento	13607	1072276
Umbria	Perugia	8456	882015
Valle d'Aosta	Aosta	3263	125666
Veneto	Venezia	18316	4905854
"""))


# add data from Italian wikipedia for P.A.'s:
#    https://it.wikipedia.org/wiki/Provincia_autonoma_di_Trento
#    https://it.wikipedia.org/wiki/Provincia_autonoma_di_Bolzano
pop_stats=pop_stats.append(
    pd.DataFrame({
        'region':['P.A. Bolzano', 'P.A. Trento'], 
        'capital': ['Bolzano', 'Trento'], 
        'area':[7398, 6702], 
        'population': [533050, 541380] 
    }),
    ignore_index=True
)

pop_stats['density'] = pop_stats['population'] / pop_stats['area']
pop_stats = pop_stats.set_index('region')
pop_stats.sort_values('density', ascending=False).head()

In [None]:
covid = pd.read_csv('./COVID-19/dati-regioni/dpc-covid19-ita-regioni.csv', parse_dates=True)
covid.rename(columns={'denominazione_regione':'region', 'data': 'date', 
                      'nuovi_positivi': 'Daily new cases', 'deceduti': 'Total deaths'}, inplace=True)
covid['date'] = pd.to_datetime(covid['date'])
covid.set_index(['region', 'date'], inplace=True)
covid.head()

In [None]:
# get series in dataframe rows, show the number of days=columns
deaths = covid['Total deaths'].unstack()
STARTING_LEVEL = 10
y = deaths[deaths > STARTING_LEVEL]
N_days = deaths.shape[1]
N_days 

In [None]:
# compute stats per 10000 population
deaths_per10k = 10000 * deaths.values / pop_stats['population'].loc[deaths.index].values.reshape(-1,1)
deaths_per10k = pd.DataFrame(deaths_per10k)
deaths_per10k.set_index(deaths.index, inplace=True)
for ind, row in deaths_per10k.iterrows():
    deaths_per10k.loc[ind] = deaths_per10k.loc[ind].shift(-(len(row) - len(row[row>0])))
deaths_per10k.head()

In [None]:
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode()

traces = [go.Scatter(
                    x = deaths.columns,
                    y = row,
                    mode = "lines+markers",
                    name = region,
                    # marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
         ) for region, row in deaths.iterrows()]

layout = dict(title=dict(text='Deaths in Italian regions since '+
                         f'{str(covid.index.get_level_values(1).min())[:10]}',
                         font={'size': 24}),
              xaxis=dict(title= 'days',
                         ticklen= 5,zeroline= False),
              yaxis_type="log",
              legend=dict(x=1.01,y=.5,
                          font=dict(size=10) ),
              width=1200, height=800,
              margin={'r': 180, 'l':0},
             )

fig1 = dict(data=traces, layout=layout)
iplot(fig1)

In [None]:
traces = [go.Scatter(
                    x = deaths_per10k.columns,
                    y = row,
                    mode = "lines+markers",
                    name = region,
                    # marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
         ) for region, row in deaths_per10k.iterrows()]

layout['title']['text'] = f'Deaths in Italian regions per 10k population since {STARTING_LEVEL} deaths'
fig2 = dict(data=traces, layout=layout)
iplot(fig2)

In [None]:
import plotly, sys 
!conda install --quiet --yes --prefix {sys.prefix} -c plotly plotly-orca 
plotly.io.write_image(fig1, 'Deaths_by_region.png')
plotly.io.write_image(fig2, f'Deaths_per_10k_by_region_since_{STARTING_LEVEL}_deaths.png')

In [None]:
!rm -rf COVID-19 ITA_adm None*.png