In [90]:
import pandas as pd
import carto
import geopandas as gpd
from shapely.geometry import Point


from cartoframes.viz import Map, Layer, Popup
from cartoframes.viz.helpers import size_continuous_layer


# Data Wrangling

In [2]:
df_it = pd.read_csv('input/covid19-ita-province.csv')
df_it

Unnamed: 0.1,Unnamed: 0,date,state,region_code,region,province_code,province,province_ISO,lat,long,total_cases,note_it,note_en
0,0,2020-02-24T18:00:00,ITA,13,Abruzzo,69,Chieti,CH,42.351032,14.167546,0,,
1,1,2020-02-24T18:00:00,ITA,13,Abruzzo,66,L'Aquila,AQ,42.351222,13.398438,0,,
2,2,2020-02-24T18:00:00,ITA,13,Abruzzo,68,Pescara,PE,42.464584,14.213648,0,,
3,3,2020-02-24T18:00:00,ITA,13,Abruzzo,67,Teramo,TE,42.658918,13.704400,0,,
4,4,2020-02-24T18:00:00,ITA,13,Abruzzo,979,In fase di definizione/aggiornamento,,0.000000,0.000000,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7419,7419,2020-04-21T17:00:00,ITA,5,Veneto,26,Treviso,TV,45.667546,12.245074,2351,,
7420,7420,2020-04-21T17:00:00,ITA,5,Veneto,27,Venezia,VE,45.434905,12.338452,2191,,
7421,7421,2020-04-21T17:00:00,ITA,5,Veneto,23,Verona,VR,45.438390,10.993527,4070,,
7422,7422,2020-04-21T17:00:00,ITA,5,Veneto,24,Vicenza,VI,45.547497,11.545971,2390,,


In [3]:
#How many rows and series are in 
df_it.shape

(7424, 13)

In [4]:
#How many null values are in each serie
df_it.isnull().sum()

Unnamed: 0          0
date                0
state               0
region_code         0
region              0
province_code       0
province            0
province_ISO     1276
lat                 0
long                0
total_cases         0
note_it          7419
note_en          7419
dtype: int64

In [5]:
#Reducing series
df_it = df_it[['Unnamed: 0', 'date', 'region_code', 'region', 'province_code',
       'province', 'lat', 'long', 'total_cases']]

In [6]:
#Which region got most cases
df_it['region'].value_counts()

Lombardia                754
Toscana                  638
Emilia-Romagna           580
Sicilia                  580
Piemonte                 522
Veneto                   464
Puglia                   406
Calabria                 348
Sardegna                 348
Campania                 348
Lazio                    348
Marche                   348
Friuli Venezia Giulia    290
Abruzzo                  290
Liguria                  290
Umbria                   174
Molise                   174
Basilicata               174
P.A. Trento              116
Valle d'Aosta            116
P.A. Bolzano             116
Name: region, dtype: int64

In [7]:
#Lombardia dataframe. A smaller DataFrame
df_lom = df_it[(df_it['region'] == 'Lombardia')]

In [67]:
df_it = df_it.groupby(['region', 'province', 'lat', 'long']).agg({'total_cases':'sum'}).reset_index()
df_it

Unnamed: 0,region,province,lat,long,total_cases
0,Abruzzo,Chieti,42.351032,14.167546,11460
1,Abruzzo,In fase di definizione/aggiornamento,0.000000,0.000000,3
2,Abruzzo,L'Aquila,42.351222,13.398438,5254
3,Abruzzo,Pescara,42.464584,14.213648,24622
4,Abruzzo,Teramo,42.658918,13.704400,14418
...,...,...,...,...,...
123,Veneto,Rovigo,45.071073,11.790070,6798
124,Veneto,Treviso,45.667546,12.245074,59933
125,Veneto,Venezia,45.434905,12.338452,51931
126,Veneto,Verona,45.438390,10.993527,89257


In [70]:
df_lom = df_lom.groupby(['province', 'lat', 'long']).agg({'total_cases':'sum'}).reset_index()
df_lom

Unnamed: 0,province,lat,long,total_cases
0,Bergamo,45.694414,9.668425,336319
1,Brescia,45.539931,10.219103,326891
2,Como,45.809991,9.08516,49589
3,Cremona,45.133367,10.024209,159693
4,In fase di definizione/aggiornamento,0.0,0.0,40322
5,Lecco,45.855758,9.393392,55321
6,Lodi,45.314407,9.503721,93835
7,Mantova,45.157268,10.792774,68537
8,Milano,45.466794,9.190347,375357
9,Monza e della Brianza,45.58439,9.273582,96310


# Map with CARTO

In [71]:
#Create a geodataframe with geopandas
dfg_it = gpd.GeoDataFrame(df_it, geometry=gpd.points_from_xy(df_it.long, df_it.lat))
dfg_it.head()

Unnamed: 0,region,province,lat,long,total_cases,geometry
0,Abruzzo,Chieti,42.351032,14.167546,11460,POINT (14.16755 42.35103)
1,Abruzzo,In fase di definizione/aggiornamento,0.0,0.0,3,POINT (0.00000 0.00000)
2,Abruzzo,L'Aquila,42.351222,13.398438,5254,POINT (13.39844 42.35122)
3,Abruzzo,Pescara,42.464584,14.213648,24622,POINT (14.21365 42.46458)
4,Abruzzo,Teramo,42.658918,13.7044,14418,POINT (13.70440 42.65892)


In [72]:
#Create a geodataframe with geopandas
dfg_lom = gpd.GeoDataFrame(df_lom, geometry=gpd.points_from_xy(df_lom.long, df_lom.lat), crs='EPSG:4326')
dfg_lom.head()

Unnamed: 0,province,lat,long,total_cases,geometry
0,Bergamo,45.694414,9.668425,336319,POINT (9.66842 45.69441)
1,Brescia,45.539931,10.219103,326891,POINT (10.21910 45.53993)
2,Como,45.809991,9.08516,49589,POINT (9.08516 45.80999)
3,Cremona,45.133367,10.024209,159693,POINT (10.02421 45.13337)
4,In fase di definizione/aggiornamento,0.0,0.0,40322,POINT (0.00000 0.00000)


In [73]:
#Create a map
Map(Layer(dfg_it))

In [74]:
Map(size_continuous_layer(dfg_lom, 'total_cases'))

In [75]:
Map(size_continuous_layer(
        dfg_lom, 'total_cases',
        title='Número de casos totales',
        color='red',
        size=[5, 30]
    ))

In [86]:
Map(Layer(
        dfg_it,popup=Popup({
            'hover': {
                'title': 'Name',
                'value': '$province'
            },
            'click': [{
                'title': 'Name',
                'value': '$province'
            },
                {
                'title': 'Total cases',
                'value': '$total_cases'
            }]
        })
    )
    )

In [88]:
Map(size_continuous_layer(
        dfg_lom, 'total_cases',
        title='Número de casos totales',
        color='red',
        size=[5, 30],
        popup=Popup({
            'hover': {
                'title': 'Name',
                'value': '$province'
            },
            'click': [{
                'title': 'Name',
                'value': '$province'
            },
                {
                'title': 'Total cases',
                'value': '$total_cases'
            }]
        })
    ))