# Coronavirus COVID-19
 
According with the World Health Organization (https://www.who.int/health-topics/coronavirus) Coronaviruses (CoV) are a large family of viruses that cause illness ranging from the common cold to more severe diseases such as Middle East Respiratory Syndrome (MERS-CoV) and Severe Acute Respiratory Syndrome (SARS-CoV). A novel coronavirus (nCoV) is a new strain that has not been previously identified in humans.  

Coronaviruses are zoonotic, meaning they are transmitted between animals and people.  Detailed investigations found that SARS-CoV was transmitted from civet cats to humans and MERS-CoV from dromedary camels to humans. Several known coronaviruses are circulating in animals that have not yet infected humans. 

Common signs of infection include respiratory symptoms, fever, cough, shortness of breath and breathing difficulties. In more severe cases, infection can cause pneumonia, severe acute respiratory syndrome, kidney failure and even death. 

Standard recommendations to prevent infection spread include regular hand washing, covering mouth and nose when coughing and sneezing, thoroughly cooking meat and eggs. Avoid close contact with anyone showing symptoms of respiratory illness such as coughing and sneezing.

I just want to provide some simple and informative plots regarding the infection spreading over the word

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.integrate import odeint

from plotly.offline import iplot, init_notebook_mode
import math
import bokeh 
import matplotlib.pyplot as plt
import plotly.express as px
#from urllib.request import urlopen
import json
from dateutil import parser
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file
from bokeh.layouts import row, column
from bokeh.resources import INLINE
from bokeh.io import output_notebook
from bokeh.models import Span
import warnings
warnings.filterwarnings("ignore")
output_notebook(resources=INLINE)

In [None]:
country_codes = pd.read_csv('/kaggle/input/countrycodes/countrycodes.csv')
country_codes = country_codes.drop('GDP (BILLIONS)', 1)
country_codes.rename(columns={'COUNTRY': 'Country', 'CODE': 'Code'}, inplace=True)

In [None]:
virus_data = pd.read_csv('/kaggle/input/novel-corona-virus-2019-dataset/covid_19_data.csv')

prev_index = 0
first_time = False
tmp = 0


for i, row in virus_data.iterrows():

    if(virus_data.loc[i,'SNo'] < 1342 and virus_data.loc[i,'Province/State']=='Hubei'):
        if(first_time):
            tmp = virus_data.loc[i,'Confirmed']
            prev_index = i
            virus_data.loc[i,'Confirmed'] = virus_data.loc[i,'Confirmed'] + 593
            first_time = False
        else:
            increment = virus_data.loc[i,'Confirmed'] - tmp
            tmp = virus_data.loc[i,'Confirmed']
            virus_data.loc[i,'Confirmed'] = virus_data.loc[prev_index,'Confirmed'] + increment + 593
            prev_index = i
    

virus_data.rename(columns={'Country/Region': 'Country', 'ObservationDate': 'Date'}, inplace=True)
virus_data = virus_data.fillna('unknow')
virus_data['Country'] = virus_data['Country'].str.replace('US','United States')
virus_data['Country'] = virus_data['Country'].str.replace('UK','United Kingdom') 
virus_data['Country'] = virus_data['Country'].str.replace('Mainland China','China')
virus_data['Country'] = virus_data['Country'].str.replace('South Korea','Korea, South')
virus_data['Country'] = virus_data['Country'].str.replace('North Korea','Korea, North')
virus_data['Country'] = virus_data['Country'].str.replace('Macau','China')
virus_data['Country'] = virus_data['Country'].str.replace('Ivory Coast','Cote d\'Ivoire')
virus_data = pd.merge(virus_data,country_codes,on=['Country'])
virus_data.head()
#print(len(virus_data))

The plot in this [link](https://aatishb.com/covidtrends/?location=Australia&location=Canada&location=China&location=France&location=Germany&location=Hong+Kong&location=India&location=Iran&location=Italy&location=Japan&location=South+Korea&location=Spain&location=Turkey&location=US&location=United+Kingdom) shows a comparison between the total number of cases and the number of active cases for some of the main affected countries. This is very useful in order to track is a country is winning against the virus. The dotted line represent the exponential growth which means that countries which follow this line are in not out of emergency. 

The world map below shows the confirmed cases up to now, updated according with the provided dataset

In [None]:
import plotly.graph_objects as go

total_confirmed = virus_data.loc[virus_data['Date'] == virus_data['Date'].iloc[-1]]
a = total_confirmed.groupby(['Code','Country'])['Confirmed'].sum().reset_index()
fig = go.Figure(data=go.Choropleth(
    locations = a['Code'],
    z = a['Confirmed'],
    text = a['Country'],
    colorscale = 'Viridis',
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'N° cases',
))
fig.update_layout(
    title_text='Total confirmed Coronavirus cases')

fig.show()

In [None]:
formated_gdf = virus_data.groupby(['Date', 'Country'])['Confirmed'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m/%d/%Y')
formated_gdf['size'] = formated_gdf['Confirmed'].pow(0.3)

fig = px.scatter_geo(formated_gdf, locations="Country", locationmode='country names', 
                     color="Confirmed", size='size', hover_name="Country", 
                     range_color= [0, max(formated_gdf['Confirmed'])+2], animation_frame="Date", 
                     title='Spread over time')
fig.update(layout_coloraxis_showscale=False)
fig.show()

The plot below shows the change in total (%) of new deaths day by day. The formula used is given by :

$\frac{(TotalNumberOfDeathsUpTo-(i-th)Day - TotalNumberOfDeathsUpTo-(i-1-th)Day)}{TotalNumberOfDeathsUpTo-(i-th)Day}$

In [None]:
deaths_inf = virus_data.groupby(['Date'])['Confirmed','Deaths','Recovered'].sum().reset_index()

deaths_day = []
deaths_day.append(deaths_inf['Deaths'][0])
for i in range(1,len(deaths_inf)):
    deaths_day.append(deaths_inf['Deaths'][i] - deaths_inf['Deaths'][i-1])
    
deaths_growth = []
for i in range(len(deaths_day)):
    deaths_growth.append(deaths_day[i] / deaths_inf['Deaths'][i])

datetime = []
a = deaths_inf['Date'].to_frame()
for elm in a['Date']:   
    b = elm[0:10]
    datetime.append(b)
    
datetime = pd.to_datetime(datetime)

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="Change in total Deaths of Novel Coronavirus (2019-nCoV)")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Change in total (%)'

p1.line(datetime, deaths_growth, color='#2874A6', 
        legend_label='Growth factor', line_width=1.5)
p1.circle(datetime, deaths_growth, fill_color="black", size=5)
#p1.line(datetime, active_cases['Co-Recov'], color='#FF4500', 
        #legend_label='Sick people without counting recovered', line_width=1.5)
#p1.circle(datetime, active_cases['Co-Recov'], fill_color="black", size=5)

p1.legend.location = 'top_right'

output_file("coronavirus.html", title="coronavirus.py")

show(p1)

According with scientist, it is very difficult to estimate the death rate of a novel virus during the period of infections. Also counting the death rate as the number of deaths divided by the number of cases it's not precise. The actual deaths are not really actual, they belong rather to the past. For this reason, if we divide the deaths by the cases, we are treating two different groups of patients: those who died (in the past) and those infected (in the present, who do not yet know whether they will recover or die).


[This study](https://academic.oup.com/aje/article/162/5/479/82647) propose a different way to estimate more precisely the death rate. Denoting with $t$ the time and with $D(t)$ and $R(t)$ the deaths and recovered at time $t$ we have that the death rate $DR(t)$ is given as:

$ DR(t) = \frac{D(t)}{D(t)+R(t)} $,

>  which implicitly assumes that the case fatality ratio for those who remain in the hospital will be similar to that for those whose outcome is known. 

In [None]:
dr_countries = virus_data.groupby(['Date'])['Confirmed','Deaths','Recovered'].sum().reset_index()
period = 7
death_rate = []
for i in range(1,len(dr_countries)):
    recover = list(dr_countries['Recovered'])[i] - list(dr_countries['Recovered'])[i-1]
    death = list(dr_countries['Deaths'])[i] - list(dr_countries['Deaths'])[i-1]
    if(recover+death==0):
        death_rate.append(death / (recover+death+1))
    else:
        death_rate.append(death / (recover+death))


p1 = figure(plot_width=600, plot_height=500, title="Death rate of Covid-19 with the novel formula")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Days'
p1.yaxis.axis_label = 'Death Rate (%)'

p1.line(np.arange(1,len(death_rate)+1,7), death_rate[::7], color='#2874A6', 
        legend_label='Growth factor', line_width=1.5)
p1.circle(np.arange(1,len(death_rate)+1,7), death_rate[::7], fill_color="black", size=5)

p1.legend.location = 'top_right'

output_file("coronavirus.html", title="coronavirus.py")

show(p1)

Keep in mind that there are many factor which change between each country that affects drastically this rough measure.

The growth factor of infections is given by the number of new infections in a day divided by the number of new cases in the previous day. This ratio is useful in order to estimate how the growth of the virus is behaving. In particular since in real words cases, pure exponential curves does not exists, we model the virus with the logistic curve. This is quite intuitive as we have a limited population number 

![](https://cdn.britannica.com/39/150639-050-C37A33AA/environment-populations-rate-growth-curve-competition-resources.jpg)

The inflection point of the logistic function is the point where the function from growing starts decreasing. This point is intimately related with the growth factor, in particular as long as the growth factor is greater than 1 it means that we have yet to reach the inflection point. When the growth factor is equal to 1 the inflection point has been reached and from now on the number of new cases will start decreasing 

If whe look at the new cases day by day, it is possible to see that there is an outlier on the 12-th February. According with [Worldometers](https://www.worldometers.info/coronavirus/how-to-interpret-feb-12-case-surge/): 
> China reported 15,152 new cases for February 12, in an apparent 600% surge over the preceding day. However, this spike does not represent a change in the trend. In fact, 13,332 of these new cases are clinically (rather than laboratory) confirmed cases, reported for the first time as an effect of a change in how cases are diagnosed and reported in Hubei province starting on February 12. Previously, these cases were counted as “probable” or “suspected” cases. Of the 15,152 new cases reported, only 1,820 are new laboratory confirmed cases (1,508 in Hubei province and 312 elsewhere in China).

Which means that we need to change the data from that day, as the time series of this dataset is wrong. This outlier number is equally splitted among the previous Hubei cases.
A growth factor which is constantly above 1 implies an exponential growth.

In [None]:
new_cases = []

for i in range(1,len(deaths_inf)):
    
    a = list(deaths_inf['Confirmed'])[i-1]
    b = list(deaths_inf['Confirmed'])[i]
             
    new_cases.append(b - a)
    
growth_factor = []

for i in range(1,len(new_cases)):
    
    growth_factor.append(new_cases[i] / new_cases[i-1])
             

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", 
            title="Infection growth factor of COVID-19",
           y_range=(0, 4))
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Factor'

p1.line(datetime, growth_factor, color='#8B4513', 
        legend_label='Growth Factor', line_width=1.5)
p1.circle(datetime, growth_factor, fill_color="black", size=5)
hline = Span(location=1, dimension='width', line_color='red', line_width=1)


p1.legend.location = 'top_right'

output_file("coronavirus.html", title="coronavirus.py")
  
p1.renderers.extend([hline])
show(p1)

It is important to notice that the growth factor does not decrease only becase the number of infected people is saturating, but it can decrease if every person try to avoid contacts with other people, and also on the probability of infecting someone. 
When there is a new virus, only the people who have contracted it can infect other peoples, hence it is possible to model the spreading of the virus with a rough and simple model.

Given the average number of contacts as **x** and the probabilty of infecting someone as **y** it is possible to have a rough measure of the number of confirmed cases after **n** days as:

$ \Delta N_d = (x * y)^n * N_d  $

If the growth is exponential, we expect that each new day the number of new cases is multiplied by a constant.

The plot below, shows the total number of confirmed cases, recovered and deaths all over the world.

In [None]:
time_confirmed = virus_data.groupby('Date')['Confirmed'].sum().reset_index()
time_deaths = virus_data.groupby('Date')['Deaths'].sum().reset_index()
time_recovered = virus_data.groupby('Date')['Recovered'].sum().reset_index()

datetime = []
a = time_confirmed['Date'].to_frame()
for elm in a['Date']:   
    b = elm[0:10]
    datetime.append(b)
    
datetime = pd.to_datetime(datetime)

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="Coronavirus infection")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Number of cases'

p1.line(datetime, time_confirmed['Confirmed'], color='#D1BB33', 
        legend_label='Confirmed cases', line_width=1.5)
p1.circle(datetime, time_confirmed['Confirmed'], fill_color="white", size=1.5)
p1.line(datetime, time_deaths['Deaths'], color='#D1472A', legend_label='Deaths',
       line_width=1.5)
p1.circle(datetime, time_deaths['Deaths'], fill_color="white", size=1.5)
p1.line(datetime, time_recovered['Recovered'], color='#33A02C', legend_label='Recovered',
       line_width=1.5)
p1.circle(datetime, time_recovered['Recovered'], fill_color="white", size=1.5)
p1.legend.location = "top_left"

output_file("coronavirus.html", title="coronavirus.py")

show(p1)

The plot below, shows the total number of confirmed cases, recovered and deaths in China and Italy, which is my country.

In [None]:
confirmed_state = virus_data.groupby(['Date','Code'])['Confirmed'].sum().reset_index()
deaths_state = virus_data.groupby(['Date','Code'])['Deaths'].sum().reset_index()
recovered_state = virus_data.groupby(['Date','Code'])['Recovered'].sum().reset_index()

confirmed_china = confirmed_state.loc[confirmed_state['Code'] == 'CHN']
deaths_china = deaths_state.loc[deaths_state['Code'] == 'CHN']
recovered_china = recovered_state.loc[recovered_state['Code'] == 'CHN']

datetime = []
a = confirmed_china['Date'].to_frame()
for elm in a['Date']:   
    b = elm[0:10]
    datetime.append(b)
    
datetime = pd.to_datetime(datetime)

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="China Coronavirus infection")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Number of cases'

p1.line(datetime, confirmed_china['Confirmed'], color='#D1BB33', 
        legend_label='Confirmed cases', line_width=1.5)
p1.circle(datetime, confirmed_china['Confirmed'], fill_color="white", size=1.5)
p1.line(datetime, deaths_china['Deaths'], color='#D1472A', legend_label='Deaths',
       line_width=1.5)
p1.circle(datetime, deaths_china['Deaths'], fill_color="white", size=1.5)
p1.line(datetime, recovered_china['Recovered'], color='#33A02C', legend_label='Recovered',
       line_width=1.5)
p1.circle(datetime, recovered_china['Recovered'], fill_color="white", size=1.5)
p1.legend.location = "top_left"

output_file("coronavirus.html", title="coronavirus.py")

show(p1)

In [None]:
confirmed_no_china = confirmed_state.loc[confirmed_state['Code'] != 'CHN']
confirmed_no_china = confirmed_no_china.groupby('Date')['Confirmed'].sum().reset_index()

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="Coronavirus infection cases")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Number of cases'

p1.line(datetime, confirmed_china['Confirmed'], color='#D1BB33', 
        legend_label='China', line_width=1.5)
p1.circle(datetime, confirmed_china['Confirmed'], fill_color="white", size=1.5)
p1.line(datetime, confirmed_no_china['Confirmed'], color='#D1472A', 
        legend_label='Rest of the World',line_width=1.5)
p1.circle(datetime, confirmed_no_china['Confirmed'], fill_color="white", size=1.5)
p1.legend.location = "top_left"

output_file("coronavirus.html", title="coronavirus.py")

show(p1)

## Coronavirus in Italy 

Let's look more in details the situation in Italy by also looking for each Regione from North to South. The histogram below shows the total number of cases for each region

In [None]:
regioni_ita = pd.read_csv('https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province.csv')
#regioni_ita = regioni_ita.loc[regioni_ita['denominazione_regione']!='Lombardia']
last_regioni_ita = regioni_ita.loc[regioni_ita['data'] == regioni_ita['data'].iloc[-1]]
tot_regioni_ita = last_regioni_ita.groupby(['denominazione_regione'])['totale_casi'].sum().reset_index()
tot_regioni_ita = tot_regioni_ita.sort_values('totale_casi', ascending=False)


from bokeh.io import show, output_file
from bokeh.plotting import figure

output_file("bar_stacked.html")

regioni = tot_regioni_ita['denominazione_regione']
infection = ["N° Confirmed"]
colors = ["#CD6155"]

data = {'countries' : regioni,
        'N° Confirmed'   : tot_regioni_ita['totale_casi']}

p = figure(x_range=regioni, plot_height=500, plot_width=700,
           title="COVID-19 infection for Regioni Italiane",
           toolbar_location=None, tools="hover", tooltips="$name @countries: @$name")

p.vbar_stack(infection, x='countries', width=0.9, color=colors, source=data,
             legend_label=infection)

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.legend.location = "top_right"
p.legend.orientation = "horizontal"
p.xaxis.major_label_orientation = math.pi/2

output_file("coronavirus.html", title="coronavirus.py")

show(p)

And let's plot the Covid-19 spread day by day from North to South

In [None]:
day_regioni_ita = regioni_ita.groupby(['data','denominazione_regione'])['totale_casi'].sum().reset_index()
day_veneto = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Veneto']
day_friuli = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Friuli Venezia Giulia']
day_piemonte = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Piemonte']
day_lombardia = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Lombardia'] 
day_emilia = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Emilia-Romagna']
day_liguria = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Liguria']
day_aosta = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Valle d\'Aosta']

day_toscana = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Toscana']
day_abruzzo = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Abruzzo']
day_marche = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Marche']
day_lazio = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Lazio']
day_umbria = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Umbria']

day_basilicata = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Basilicata']
day_calabria = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Calabria']
day_molise = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Molise']
day_puglia = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Puglia']
day_campania = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Campania']
day_sicilia = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Sicilia']
day_sardegna = day_regioni_ita.loc[day_regioni_ita['denominazione_regione']=='Sardegna']

north_legend = ['Veneto','Friuli Venezia Giulia','Piemonte','Lombardia','Emilia Romagna',
               'Liguria','Valle d\'Aosta']

center_legend = ['Toscana','Abruzzo','Marche','Lazio','Umbria']

south_legend = ['Basilicata','Calabria','Molise','Puglia','Campania','Sicilia','Sardegna']

plt.figure(figsize=(15,10))

data = list(day_veneto['data'])
days = []
for elm in data:
    days.append(elm[:10])
    
sns.set()

plt.plot(days,day_veneto['totale_casi'], marker='o',ms=3)
plt.plot(days,day_friuli['totale_casi'], marker='o',ms=3)
plt.plot(days,day_piemonte['totale_casi'], marker='o',ms=3)
plt.plot(days,day_lombardia['totale_casi'], marker='o',ms=3)
plt.plot(days,day_emilia['totale_casi'], marker='o',ms=3)
plt.plot(days,day_liguria['totale_casi'], marker='o',ms=3)
plt.plot(days,day_aosta['totale_casi'], marker='o',ms=3)

plt.plot(days,day_toscana['totale_casi'], marker='o',ms=3)
plt.plot(days,day_abruzzo['totale_casi'], marker='o',ms=3)
plt.plot(days,day_marche['totale_casi'], marker='o',ms=3)
plt.plot(days,day_lazio['totale_casi'], marker='o',ms=3)
plt.plot(days,day_umbria['totale_casi'], marker='o',ms=3)

plt.plot(days,day_basilicata['totale_casi'], marker='o',ms=3)
plt.plot(days,day_calabria['totale_casi'], marker='o',ms=3)
plt.plot(days,day_molise['totale_casi'], marker='o',ms=3)
plt.plot(days,day_puglia['totale_casi'], marker='o',ms=3)
plt.plot(days,day_campania['totale_casi'], marker='o',ms=3)
plt.plot(days,day_sicilia['totale_casi'], marker='o',ms=3)
plt.plot(days,day_sardegna['totale_casi'], marker='o',ms=3)

plt.ylabel('Number of cases')
plt.xlabel('Date')
plt.xticks(rotation=70)
plt.legend(north_legend + center_legend + south_legend)
#plt.grid()
plt.show()

In [None]:
nazionale_ita = pd.read_csv('/kaggle/input/nazionale/dpc-covid19-ita-andamento-nazionale.csv', error_bad_lines=False)
nazionale_ita.head()

In [None]:
daybyday_cases = []
for i in range(1,len(nazionale_ita['totale_casi'])):
    daybyday_cases.append(list(nazionale_ita['totale_casi'])[i] - list(nazionale_ita['totale_casi'])[i-1])

In general the situation day by day in Italy is the following:

In [None]:
confirmed_ita = nazionale_ita.groupby(['data'])['totale_casi'].sum().reset_index()
deaths_ita = nazionale_ita.groupby(['data'])['deceduti'].sum().reset_index()
recovered_ita = nazionale_ita.groupby(['data'])['dimessi_guariti'].sum().reset_index()



datetime = []
a = nazionale_ita['data'].to_frame()
for elm in a['data']:   
    b = elm[0:10]
    datetime.append(b)
    
datetime = pd.to_datetime(datetime)

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="Coronavirus infection in Italy")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Number of cases'

p1.line(datetime, confirmed_ita['totale_casi'], color='#C0392B', 
        legend_label='Confirmed cases', line_width=1.5)
p1.circle(datetime, confirmed_ita['totale_casi'], fill_color="white", size=2)
p1.line(datetime, deaths_ita['deceduti'], color='#5DADE2', legend_label='Deaths',
       line_width=1.5)
p1.circle(datetime, deaths_ita['deceduti'], fill_color="white", size=2)
p1.line(datetime, recovered_ita['dimessi_guariti'], color='#E67E22', legend_label='Recovered',
       line_width=1.5)
p1.circle(datetime, recovered_ita['dimessi_guariti'], fill_color="white", size=2)
p1.legend.location = "top_left"

output_file("coronavirus.html", title="coronavirus.py")

show(p1)

In [None]:
import scipy.stats


def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, m-h, m+h


mean_hm_list = []
mean_hp_list = []
mean_list = []

start = 7
growth_factor_s = growth_factor[start:]

for i in range(len(growth_factor_s)):
    mean, mean_hm, mean_hp = mean_confidence_interval(growth_factor_s[:i], 0.95)
    mean_hm_list.append(mean_hm)
    mean_hp_list.append(mean_hp)
    mean_list.append(mean)
    
mean_hm_list = np.asarray(mean_hm_list)
mean_hp_list = np.asarray(mean_hp_list)
mean_list = np.asarray(mean_list)

mean_range = [i for i in range(len(mean_list))]

plt.figure(figsize=(10,8))
plt.plot(mean_list)
plt.fill_between(mean_range, mean_hm_list, mean_hp_list, 
                 facecolor='b', alpha=0.4, edgecolor='#8F94CC', 
                 linewidth=2, linestyle='dashed')

plt.title("95% - Confidence intervals for the mean R0 in Italy")
plt.legend(["Average R0","CI (95%)"])
plt.ylim(0,3)
plt.show()


The graph below shows how the situation of people with Covid-19 varies over time, in particular we analyze the number of people in intensive care, the number of people hospitalized with symptoms and that of people who have contracted the virus but who are in home isolation. The blue line represents the maximum intensive care places in Italy. But most of the cases in clustered in the North, hence this blu line is not helpful to understand how critical is the situation in some specific hospitals

In [None]:
hospital_ita = nazionale_ita.groupby(['data'])['ricoverati_con_sintomi'].sum().reset_index()
intensive_ita = nazionale_ita.groupby(['data'])['terapia_intensiva'].sum().reset_index()
home_ita = nazionale_ita.groupby(['data'])['isolamento_domiciliare'].sum().reset_index()



datetime = []
a = nazionale_ita['data'].to_frame()
for elm in a['data']:   
    b = elm[0:10]
    datetime.append(b)
    
datetime = pd.to_datetime(datetime)

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="Conditions of the infected")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Number of cases'

p1.line(datetime, hospital_ita['ricoverati_con_sintomi'], color='#C0392B', 
        legend_label='Mild conditions', line_width=1.5)
p1.circle(datetime, hospital_ita['ricoverati_con_sintomi'], fill_color="white", size=2)
p1.line(datetime, intensive_ita['terapia_intensiva'], color='#5DADE2', legend_label='Serious or Critical',
       line_width=1.5)
p1.circle(datetime, intensive_ita['terapia_intensiva'], fill_color="white", size=2)
p1.line(datetime, home_ita['isolamento_domiciliare'], color='#E67E22', legend_label='Home isolation',
       line_width=1.5)
p1.circle(datetime, home_ita['isolamento_domiciliare'], fill_color="white", size=2)
p1.legend.location = "top_left"

output_file("coronavirus.html", title="coronavirus.py")


show(p1)

In [None]:
deaths = nazionale_ita['deceduti']

death_rate = []
for i in range(1,len(deaths)):
    death_rate.append((deaths[i] - deaths[i-1]) / deaths[i-1])

data = list(day_veneto['data'])
datetime = []
for elm in data:
    datetime.append(elm[:10])
    
datetime = pd.to_datetime(datetime)

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="Coronavirus n° deaths in Italy")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Number of deaths'

p1 = figure(plot_width=600, plot_height=500, title="Death growth % day by day")
p1.line(np.arange(0,len(death_rate),1), death_rate, color='#C0392B', 
        legend_label='Death Rate', line_width=1.5)
p1.circle(np.arange(0,len(death_rate),1), death_rate, fill_color="white", size=5)
p1.xaxis.axis_label = 'Number of days'
p1.yaxis.axis_label = 'Rate'

p1.legend.location = "top_left"

output_file("coronavirus.html", title="coronavirus.py")

show(p1)

It is worth of mention that you can think that the death rate of Covid-19 in Italy is very high. However you are blinded about a statistical problem known as the Simpson Paradox. If we look at the trends of the number of deaths for set of populations grouped by age and we compare these sets with the same sets from China, it is possible to notice that Italy has a lower death rate for each set with respect to China, however, if you just look at the trend of the death rate without grouping people by age the trend is reversed. To get precise percentages of each group you need to manually check every day data modifing properly the plots.

Let's look at the number of new cases in Italy compared with the number of medical swabs done every day just to see if there are some intersting results:

In [None]:
day_swabs = []

for i in range(1,len(nazionale_ita)):
    day_swabs.append(list(nazionale_ita['tamponi'])[i]- list(nazionale_ita['tamponi'])[i-1])

def running_mean(x, N):
    cumsum = np.cumsum(np.insert(x, 0, 0)) 
    return (cumsum[N:] - cumsum[:-N]) / float(N)
# Shade the area between y1 and line y=0
plt.figure(figsize=(10,8))
plt.fill_between(np.arange(0,len(day_swabs)), day_swabs, 0,
                 facecolor="orange", # The fill color
                 color='blue',       # The outline color
                 alpha=0.5)          # Transparency of the fill
plt.fill_between(np.arange(0,len(daybyday_cases)), daybyday_cases, 0,
                 facecolor="orange", # The fill color
                 color='orange',       # The outline color
                 alpha=0.5)          # Transparency of the fill
plt.legend(['Swabs','New cases'],loc=2)
plt.xticks(np.arange(0,len(daybyday_cases),2))
plt.xlabel('Days')
plt.ylabel('Number')
#plt.plot(running_mean(day_swabs,5))
# Show the plot
plt.show()

## Further analysis for Covid-19 in the world

In [None]:
active_cases = virus_data.groupby(['Date'])['Confirmed','Deaths','Recovered'].sum().reset_index()
active_cases['Co-Deaths'] = active_cases['Confirmed'] - active_cases['Deaths']
active_cases['Co-Recov'] = active_cases['Confirmed'] - active_cases['Recovered']
active_cases['Active'] = active_cases['Confirmed'] - active_cases['Deaths'] - active_cases['Recovered']

datetime = []
a = active_cases['Date'].to_frame()
for elm in a['Date']:   
    b = elm[0:10]
    datetime.append(b)
    
datetime = pd.to_datetime(datetime)

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="Active cases of COVID-19 (Confirmed - Recovered - Deaths)")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Active cases'

p1.line(datetime, active_cases['Active'], color='#8B4513', 
        legend_label='Sick people without counting deaths & recovered', line_width=1.5)
p1.circle(datetime, active_cases['Active'], fill_color="black", size=1.5)
#p1.line(datetime, active_cases['Co-Deaths'], color='#FFA500', 
        #legend_label='Sick people without counting deaths', line_width=1.5)
#p1.circle(datetime, active_cases['Co-Deaths'], fill_color="black", size=5)
#p1.line(datetime, active_cases['Co-Recov'], color='#FF4500', 
        #legend_label='Sick people without counting recovered', line_width=1.5)
#p1.circle(datetime, active_cases['Co-Recov'], fill_color="black", size=5)

p1.legend.location = 'bottom_right'

output_file("coronavirus.html", title="coronavirus.py")

show(p1)

Deaths people rate over the time. The normalized histogram below the plot shows the number of infections, which is useful in order to track the relation between the number of new cases and the number of deaths people.

In [None]:
mortality_rate = virus_data.groupby(['Date'])['Confirmed','Deaths'].sum().reset_index()
mortality_rate['Rate'] = mortality_rate['Deaths'] / mortality_rate['Confirmed']
mortality_rate['Infection'] = mortality_rate['Confirmed'] / mortality_rate['Confirmed'].max()

datetime = []
a = mortality_rate['Date'].to_frame()
for elm in a['Date']:   
    b = elm[0:10]
    datetime.append(b)
    
datetime = pd.to_datetime(datetime)

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="Mortality rate of COVID-19")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Mortality (%)'

p1.line(datetime, mortality_rate['Rate'], color='#900C3F', 
        legend_label='Mortality Rate', line_width=1.5)
p1.circle(datetime, mortality_rate['Rate'], fill_color="black", size=1.5)

p2 = figure(plot_width=600, plot_height=200, title="Normalized histogram of infections")
p2.vbar(x=np.arange(0,len(datetime),1), top=mortality_rate['Infection'], 
        width=0.7, bottom=0, color="firebrick")
p2.xaxis.visible = False
p1.xaxis.axis_label = 'Infections over time'
p1.yaxis.axis_label = 'Deaths (%)'

p1.legend.location = 'bottom_right'

output_file("coronavirus.html", title="coronavirus.py")

show(column(p1,p2))

Recovered people rate over the time. The normalized histogram below the plot shows the number of infections, which is useful in order to track the relation between the number of new cases and the number of recovered people.

In [None]:
healed_rate = virus_data.groupby(['Date'])['Confirmed','Recovered'].sum().reset_index()
healed_rate['Rate'] = healed_rate['Recovered'] / healed_rate['Confirmed']
healed_rate['Infection'] = healed_rate['Confirmed'] / healed_rate['Confirmed'].max()

datetime = []
a = mortality_rate['Date'].to_frame()
for elm in a['Date']:   
    b = elm[0:10]
    datetime.append(b)
    
datetime = pd.to_datetime(datetime)

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", 
            title="Recovered people rate of COVID-19")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Recovered (%)'

p1.line(datetime, healed_rate['Rate'], color='#498748', 
        legend_label='Recovered people Rate', line_width=1.5)
p1.circle(datetime, healed_rate['Rate'], fill_color="black", size=1.5)

p2 = figure(plot_width=600, plot_height=200, title="Normalized histogram of infections")
p2.vbar(x=np.arange(0,len(datetime),1), top=healed_rate['Infection'], 
        width=0.7, bottom=0, color="firebrick")
p2.xaxis.visible = False
p1.xaxis.axis_label = 'Infections over time'
p1.yaxis.axis_label = 'Recovered (%)'

p1.legend.location = 'top_left'

output_file("coronavirus.html", title="coronavirus.py")

show(column(p1,p2))

A comparison between the mortality rate and the rate of recovered people over the time is provided

In [None]:
datetime = []
a = mortality_rate['Date'].to_frame()
for elm in a['Date']:   
    b = elm[0:10]
    datetime.append(b)
    
datetime = pd.to_datetime(datetime)

p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", 
            title="Recovered / Deaths people rate of COVID-19")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Percentual'

p1.line(datetime, mortality_rate['Rate'], color='#900C3F', 
        legend_label='Mortality Rate', line_width=1.5)
p1.circle(datetime, mortality_rate['Rate'], fill_color="black", size=1.5)

p1.line(datetime, healed_rate['Rate'], color='#498748', 
        legend_label='Recovered people Rate', line_width=1.5)
p1.circle(datetime, healed_rate['Rate'], fill_color="black", size=1.5)

p1.legend.location = 'top_left'

output_file("coronavirus.html", title="coronavirus.py")

show(p1)

Logarithmic scale of the Coronavirus spreading among the top 50 infected countries sorted for confirmed cases in descending order

## DISCLAIMER
### This part has not been done following detailed statistical analyzes for now and therefore has no scientific value.
Analysis with the prophet library of Facebook for Time-Series data

In [None]:
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot

fb_virus_data = virus_data
fb_virus_data = fb_virus_data.groupby('Date')['Confirmed'].sum().reset_index()
# Prophet requires columns ds (Date) and y (value)
fb_confirmed = fb_virus_data[["Date","Confirmed"]]
fb_confirmed = fb_confirmed.rename(columns={'Date': 'ds', 'Confirmed': 'y'})
# Make the prophet model and fit on the data
changepoint_prior_scale = [0.05,0.1,0.15,0.2,0.25]

model = Prophet(seasonality_mode = 'additive', changepoint_prior_scale=0.15)
model.fit(fb_confirmed)
future = model.make_future_dataframe(periods=7)
forecast = model.predict(future)
#figure = model.plot(forecast)
#axes = figure.get_axes()
#axes[0].set_xlabel('Date')
#axes[0].set_ylabel('Confirmed cases forecast')

dates = []
for elm in fb_confirmed.ds:
    a = elm[6:]
    b = elm[:2]
    c = elm[3:5]
    d = a+'-'+b+'-'+c
    dates.append(d)


trace1 = {
  "fill": None, 
  "mode": "markers",
  "marker_size": 10,
  "name": "n° of Confirmed", 
  "type": "scatter", 
  "x": dates, 
  "y": fb_confirmed.y
}
trace2 = {
  "fill": "tonexty", 
  "line": {"color": "#57b8ff"}, 
  "mode": "lines", 
  "name": "upper_band", 
  "type": "scatter", 
  "x": forecast.ds, 
  "y": forecast.yhat_upper
}
trace3 = {
  "fill": "tonexty", 
  "line": {"color": "#57b8ff"}, 
  "mode": "lines", 
  "name": "lower_band", 
  "type": "scatter", 
  "x": forecast.ds, 
  "y": forecast.yhat_lower
}
trace4 = {
  "line": {"color": "#eb0e0e"}, 
  "mode": "lines+markers",
  "marker_size": 4,
  "name": "prediction", 
  "type": "scatter", 
  "x": forecast.ds, 
  "y": forecast.yhat
}
data = [trace1, trace2, trace3, trace4]
layout = {
  "title": "Confirmed cases - Time Series Forecast", 
  "xaxis": {
    "title": "Daily Dates", 
    "ticklen": 5, 
    "gridcolor": "rgb(255, 255, 255)", 
    "gridwidth": 2, 
    "zerolinewidth": 1
  }, 
  "yaxis": {
    "title": "Confirmed cases", 
    "ticklen": 5, 
    "gridcolor": "rgb(255, 255, 255)", 
    "gridwidth": 2, 
    "zerolinewidth": 1
  }, 
}
fig = go.Figure(data=data, layout=layout)
iplot(fig)
    
yhat = list(forecast['yhat'][:-7])
y = list(fb_virus_data['Confirmed'])

def mape(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

print('Mean absolute percentage error: ', mape(y,yhat))

## **Susceptible - Infected - Recovered Model (SIR) for Italy**

SIR model is an epidemiological model that computes the theoretical number of people infected with a contagious illness in a closed population over time. The models assumptions for this notebook are the followings:

* Costant (Closed) population in time
* Avoid counting bhirds and deaths
* The population is mixed (However if you consider some cluster, you can simplify the view by a fractal point of view where small subsets of the cluster moves from one cluster to another, even if in reality there are some restricted zones where no one can exit)


According with [a study from the University of Standford](https://web.stanford.edu/~jhj1/teachingdocs/Jones-on-R0.pdf), the basic reproduction number, R0, is defined as the expected number of secondary cases
produced by a single (typical) infection in a completely susceptible population. It is important
to note that R0 is a dimensionless number and not a rate, which would have units of time.
Some authors incorrectly call R0 the “basic reproductive rate.” R0 is defined as:

$ R_0 = \tau * \hat c * d $, 

where $\tau$ is the transmissibility (i.e., probability of infection given contact between a susceptible and infected individual), $\hat c$ is the average rate of contact between susceptible and infected
individuals, and d is the duration of infectiousness. d can be obtained as $\frac{1}{v}$ where $v$ is the recovering rate of a specific population, in our case it is equal to:

In [None]:
recovered_rate = []

for i in range(1,len(recovered_ita)):
    
    x = list(recovered_ita['dimessi_guariti'])[i]
    y = list(recovered_ita['dimessi_guariti'])[i-1]
    if y==0 and x==0:
        recovered_rate.append(0)
    elif(y==0):
        recovered_rate.append(x/x)
    else:
        z = (x - y) / x
        recovered_rate.append(z)
 
print('Recovered rate in Italy: ', np.mean(recovered_rate))

The effective contact rate $\beta$ is equal to $\tau * \hat c$. Since $\beta$ is not available, it is difficult to do some analysis, however, scientists have declared that the reproducibily rate of the Covid-19 is in a range from 1.4 to 2.5. Hence it is possible to estimate $\beta$ as:

$ \beta = \frac{R_0}{d} = R_0 * v$ 


As reported on [Wikipedia](https://en.wikipedia.org/wiki/Compartmental_models_in_epidemiology) the dynamics of an epidemic, for example the flu, are often much faster than the dynamics of birth and death, therefore, birth and death are often omitted in simple compartmental models. The SIR system without so-called vital dynamics (birth and death, sometimes called demography) described above can be expressed by the following set of ordinary differential equations

* $\frac{\delta S}{\delta I} = - \frac{\beta I S}{N}$
* $\frac{\delta I}{\delta t} = \frac{\beta I S}{N} - vI$
* $\frac{\delta R}{\delta t} =  vI$

For now, we suppose that the Covid-19 is arrived in Italy with the start of the new year, which is resoneable if we think that the first confirmed case was detected in the end of February and probably many other undetected cases has been passed unobserved. 

We also suppose a reproducibily rate of 1.7, which is in the range of 1.4-2.5. Gamma has been estimated assuming the virus lives inside the human body for 10 days on average, which is resonable since the estimated period ranges from 2 to 14 days. Beta is obtained thanks to Gamma and the reproducibiliy rate. 


Very important assumptions about this model is that once a person has recovered, the person received immunity, which has no scientific proves. Age, sex, social status, and race do not affect the probability of being infected. There is no inherited immunity. The member of the population mix homogeneously (have the same interactions with one another to the same degree).

One other important assumption about SIR is that it does not take into account the containment measures of the infection.

The results obtained with the SIR model are the followings:

In [None]:
# Total population, N.
Ro = 2.5

d = 1 / np.mean(recovered_rate)
gamma = 0.1 #np.mean(recovered_rate)
beta = Ro * gamma

N = 60000000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 2, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
beta, gamma = beta, gamma 
# A grid of time points (in days)
t = np.linspace(0, 180, 180)

# The SIR model differential equations.
def sir_model(y, t, N, beta, gamma):
    S, I, R = y
    dSdt = -beta * S * I / N
    dIdt = beta * S * I /N - gamma * I
    dRdt = gamma * I
    return dSdt, dIdt, dRdt


# Integrate the SIR equations over the time grid, t.
solution = odeint(sir_model, [S0, I0, R0], t, args=(N, beta, gamma))
soultion = np.array(solution)

p1 = figure(plot_width=600, plot_height=500, title="SIR Model for Coronavirus (2019-nCoV)")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Days from 27/01/20 (Estimate of the first cases in Italy)'
p1.yaxis.axis_label = 'Population'

p1.line(t, solution[:,0], color='#D35400', 
        legend_label='Susceptible', line_width=1.5)
p1.circle(t, solution[:,0], fill_color="black", size=1)

p1.line(t, solution[:,1], color='#2E4053', 
        legend_label='Infected', line_width=1.5)
p1.circle(t, solution[:,1], fill_color="black", size=1)

p1.line(t, solution[:,2], color='#28B463', 
       legend_label='Recovered', line_width=1.5)
p1.circle(t, solution[:,2], fill_color="black", size=1)

p1.legend.location = 'top_right'

show(p1)

As you can see this is a toy model, as the number of infected people is extremely huge and the previous assumptions are strong and not realistic. 

## **Susceptible - Exposed Infected - Recovered Model (SEIR) for Italy**

This is a more precise model for studying the spread of the virus. For many important infections there is a significant incubation period during which the individual has been infected but is not yet infectious themselves. During this period the individual is in compartment E (for exposed).

![](https://upload.wikimedia.org/wikipedia/commons/thumb/3/3d/SEIR.PNG/798px-SEIR.PNG)

According with [this source](https://www.health.harvard.edu/diseases-and-conditions/coronavirus-resource-center) the incubation period of the Covid-19 ranges from 3 to 14 days, in particular it has been estimated an average period of 5.1 days (at least in Italy). Given the death rate as $\mu$, the birth rate as $\Lambda$, assuming that the incubation period is a random variable with exponential distribution with parameter $\alpha$, and also assuming the presence of vital dynamics with birth rate equal to death rate, we have the model:

* $\frac{\delta S}{\delta t} = \Lambda - \mu S - \frac{\beta I S}{N} $
* $\frac{\delta E}{\delta t} = \frac{\beta I S}{N} - (\mu + \alpha)E $
* $\frac{\delta I}{\delta t} = \alpha E - (\lambda+\mu)I$
* $\frac{\delta R}{\delta t} = \lambda I - \mu R $

For now let's suppose that the birth and the death rate does not affect the virus spreading.
$R_0$ this time is equal to $R_0 = \frac{\alpha}{\mu+\alpha}\frac{\beta}{\mu+\lambda} $ where we can obtain $\alpha$, anyway according with the sources online we can find that $\alpha = \frac{1}{incubation}$


In [None]:
# Total population, N.
Ro = 2.8

#d = 1 / np.mean(recovered_rate)
gamma = 1/10
mu = 0.
alpha = 1/5
beta = Ro*((mu + alpha)*(mu+gamma))/alpha 
Blambda = 0.
#print(beta)

N = 60000000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0, E0 = 500, 0, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0 - E0
# A grid of time points (in days)
t = np.linspace(0, 200, 200)

# The SIR model differential equations.
def seir_model(y, t, N, beta, gamma, alpha, Blambda, mu):
    S, E, I, R = y
    dSdt = Blambda -mu*S -beta*S*I/N
    dEdt = beta*S*I/N - (mu+alpha)*E
    dIdt = alpha*E - (gamma+mu)*I
    dRdt = gamma*I - mu*R
    return dSdt, dEdt, dIdt, dRdt

# Integrate the SIR equations over the time grid, t.
solution2 = odeint(seir_model, [S0, E0, I0, R0], t, args=(N, beta, gamma, alpha, Blambda, mu))
soultion2 = np.array(solution2)

p1 = figure(plot_width=600, plot_height=500, title="SEIR Model for Coronavirus (2019-nCoV)")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Days from 27/01/20 (Estimate of the first cases in Italy)'
p1.yaxis.axis_label = 'Population'

p1.line(t, solution2[:,0], color='#D35400', 
        legend_label='Susceptible', line_width=1.5)
p1.circle(t, solution2[:,0], fill_color="black", size=1)

p1.line(t, solution2[:,1], color='#2E4053', 
        legend_label='Exposed', line_width=1.5)
p1.circle(t, solution2[:,1], fill_color="black", size=1)

p1.line(t, solution2[:,2], color='#28B463', 
       legend_label='Infected', line_width=1.5)
p1.circle(t, solution2[:,2], fill_color="black", size=1)

p1.line(t, solution2[:,3], color='#821063', 
       legend_label='Recovered', line_width=1.5)
p1.circle(t, solution2[:,3], fill_color="black", size=1)

p1.legend.location = 'top_right'

show(p1)

Compared with the SIR model this seems to be more realistic. To do a further step let's see just in principle how the social distaance affects the number of contagions. The social distance is defined with a parameter $\rho$ which ranges from 0 to 1. Zero means that people are totally locked inside their home in separte rooms, while 1 means that people are totally free to go very they want, this parameter affects $\beta$ as follows:

* $\frac{\delta S}{\delta t} = \Lambda - \mu S - \frac{\rho \beta I S}{N} $
* $\frac{\delta E}{\delta t} = \frac{\rho \beta I S}{N} - (\mu + \alpha)E $
* $\frac{\delta I}{\delta t} = \alpha E - (\lambda+\mu)I$
* $\frac{\delta R}{\delta t} = \lambda I - \mu R $

In [None]:
# Total population, N.
Ro = 2.8

#d = 1 / np.mean(recovered_rate)
gamma = 1/10
mu = 0.
alpha = 1/5
beta = Ro*((mu + alpha)*(mu+gamma))/alpha 
Blambda = 0.
#print(beta)

N = 60000000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0, E0 = 500, 0, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0 - E0
# A grid of time points (in days)
t = np.linspace(0, 360, 360)

# The SIR model differential equations.
def seir_model_social(y, t, N, ro, beta, gamma, alpha, Blambda, mu):
    S, E, I, R = y
    dSdt = Blambda -mu*S -ro*beta*S*I/N
    dEdt = ro*beta*S*I/N - (mu+alpha)*E
    dIdt = alpha*E - (gamma+mu)*I
    dRdt = gamma*I - mu*R
    return dSdt, dEdt, dIdt, dRdt

# Integrate the SIR equations over the time grid, t.
ro1=1
solution1 = odeint(seir_model_social, [S0, E0, I0, R0], t, args=(N, ro1, beta, gamma, alpha, Blambda, mu))
soultion1 = np.array(solution1)

ro2=0.8
solution2 = odeint(seir_model_social, [S0, E0, I0, R0], t, args=(N, ro2, beta, gamma, alpha, Blambda, mu))
soultion2 = np.array(solution2)

ro3=0.6
solution3 = odeint(seir_model_social, [S0, E0, I0, R0], t, args=(N, ro3, beta, gamma, alpha, Blambda, mu))
soultion3 = np.array(solution3)

p1 = figure(plot_width=600, plot_height=500, title="SEIR Model for Coronavirus (2019-nCoV) with social distance")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Days from 27/01/20 (Estimate of the first cases in Italy)'
p1.yaxis.axis_label = 'Population'

p1.line(t, solution1[:,2], color='#D35400', 
        legend_label='Infected ρ=1', line_width=1.5)
p1.circle(t, solution1[:,2], fill_color="black", size=1)
p1.line(t, solution1[:,1], color='#D35400', 
        legend_label='Exposed ρ=1', line_dash="4 4", line_width=0.5)
p1.circle(t, solution1[:,1], fill_color="black", line_dash="4 4", size=0.02)

p1.line(t, solution2[:,2], color='#2E4053', 
        legend_label='Infected ρ=0.8', line_width=1.5)
p1.circle(t, solution2[:,2], fill_color="black", size=1)
p1.line(t, solution2[:,1], color='#2E4053', 
        legend_label='Exposed ρ=0.8', line_dash="4 4", line_width=0.5)
p1.circle(t, solution2[:,1], fill_color="black", line_dash="4 4", size=0.02)

p1.line(t, solution3[:,2], color='#28B463', 
       legend_label='Infected ρ=0.6', line_width=1.5)
p1.circle(t, solution3[:,2], fill_color="black", size=1)
p1.line(t, solution3[:,1], color='#28B463', 
       legend_label='Exposed ρ=0.6', line_dash="4 4", line_width=0.5)
p1.circle(t, solution3[:,1], fill_color="black", line_dash="4 4", size=0.02)

p1.legend.location = 'top_right'

show(p1)

## Suscepible Infected Quarantined Recovered SIQR for Italy (Work on progress)

This model is another variant for modeling the spread of the virus by taking into account the quarantined people. $Q$ denotes the number of quarantined people, while $R$ models the positive and isolated individuals that recover or die from the disease. Also for the previous models the number of recovered or deceased non-identified patients is not modelled because its impossible. 

* $\frac{\delta S}{\delta t} = -\frac{\beta I S}{N} $
* $\frac{\delta E}{\delta t} = \frac{\beta I S}{N} - (\mu + \eta)E $
* $\frac{\delta I}{\delta t} = \eta I - \gamma Q$
* $\frac{\delta R}{\delta t} = \gamma Q $

In [None]:
# Total population, N.
Ro = 2.78

#d = 1 / np.mean(recovered_rate)
gamma = 1/10
alpha = 0.067
eta = alpha
beta = 0.373
#print(beta)

N = 60000000
# Initial number of infected and recovered individuals, I0 and R0.
I0, Q0, R0 = 500, 250, 250
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0
# A grid of time points (in days)
t = np.linspace(0, 200, 200)

# The SIR model differential equations.
def siqr_model(y, t, N, beta, gamma, alpha, eta):
    S, I, Q, R = y
    dSdt = -beta*S*I/N
    dIdt = beta*S*I/N - (alpha+eta)*I
    dQdt = eta*I - gamma*Q
    dRdt = gamma*Q
    return dSdt, dIdt, dQdt, dRdt


# Integrate the SIR equations over the time grid, t.
solution2 = odeint(siqr_model, [S0, I0, Q0, R0], t, args=(N, beta, gamma, alpha, eta))
soultion2 = np.array(solution2)

p1 = figure(plot_width=600, plot_height=500, title="SIQR Model for Coronavirus (2019-nCoV)")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Days from 27/01/20 (Estimate of the first cases in Italy)'
p1.yaxis.axis_label = 'Population'

p1.line(t, solution2[:,0], color='#D35400', 
        legend_label='Susceptible', line_width=1.5)
p1.circle(t, solution2[:,0], fill_color="black", size=1)

p1.line(t, solution2[:,1], color='#2E4053', 
        legend_label='Infected', line_width=1.5)
p1.circle(t, solution2[:,1], fill_color="black", size=1)

p1.line(t, solution2[:,2], color='#28B463', 
       legend_label='Quarantined', line_width=1.5)
p1.circle(t, solution2[:,2], fill_color="black", size=1)

p1.line(t, solution2[:,3], color='#821063', 
       legend_label='Recovered', line_width=1.5)
p1.circle(t, solution2[:,3], fill_color="black", size=1)

p1.legend.location = 'top_right'

show(p1)

In [None]:
x = np.linspace(0,100,1000)
L = 18800
k = 0.137
a = 35
y =  L / ( 1 + np.exp(-k*(x-a)) ) 

daybyday_veneto = []
for i in range(1,len(list(day_veneto['totale_casi']))):
    daybyday_veneto.append(list(day_veneto['totale_casi'])[i] -  list(day_veneto['totale_casi'])[i-1])
    

growth_veneto = []

for i in range(1,len(daybyday_veneto)):
    
    if(daybyday_veneto==0):
        continue
    else:
        growth_veneto.append(daybyday_veneto[i] / daybyday_veneto[i-1])

xm = np.argmax(daybyday_veneto) + 5.5
T = 0.0001
L1 = 18000
k = 0.085
y1 = L1 / (1 + T*np.exp(-k*(x-xm)))**(1/T)


p1 = figure(plot_width=600, plot_height=500, title="Logistic curve of Veneto")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Days from first cases in reported by Protezione Civile)'
p1.yaxis.axis_label = 'Population'

p1.line(x, y, color='#D35400', 
        legend_label='Logistic curve', line_width=1.5)
p1.circle(np.arange(0,len(list(day_veneto['totale_casi'])),1),
          list(day_veneto['totale_casi']), fill_color="black", size=3)

p1.line(x, y1, color='#B91422', 
        legend_label='Logistic curve asymmetrical', line_dash="4 4",line_width=0.8)

p1.legend.location = 'bottom_right'
show(p1)

In [None]:
import scipy.stats


def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, m-h, m+h


mean_hm_list = []
mean_hp_list = []
mean_list = []

start = 7
growth_veneto_s = growth_veneto[start:]

for i in range(len(growth_veneto_s)):
    mean, mean_hm, mean_hp = mean_confidence_interval(growth_veneto_s[:i], 0.95)
    mean_hm_list.append(mean_hm)
    mean_hp_list.append(mean_hp)
    mean_list.append(mean)
    
mean_hm_list = np.asarray(mean_hm_list)
mean_hp_list = np.asarray(mean_hp_list)
mean_list = np.asarray(mean_list)

mean_range = [i for i in range(len(mean_list))]

plt.figure(figsize=(10,8))
plt.plot(mean_list)
plt.fill_between(mean_range, mean_hm_list, mean_hp_list, 
                 facecolor='b', alpha=0.4, edgecolor='#8F94CC', 
                 linewidth=2, linestyle='dashed')

plt.title("95% - Confidence intervals for the mean R0 in Veneto")
plt.legend(["Average R0","CI (95%)"])
plt.ylabel('Average R0')
plt.xlabel('Number of days')
plt.ylim(0,3)
plt.show()

In [None]:
veneto_det = pd.read_csv('/kaggle/input/regioni/dpc-covid19-ita-regioni.csv')
veneto_det = veneto_det.loc[veneto_det['denominazione_regione']=='Veneto']

variazione_morti = []

for i in range(1,len(veneto_det)):
    variazione_morti.append(list(veneto_det['deceduti'])[i] - list(veneto_det['deceduti'])[i-1])
    
tamponi_veneto = []

for i in range(1,len(veneto_det)):
    tamponi_veneto.append(list(veneto_det['tamponi'])[i] - list(veneto_det['tamponi'])[i-1])
    
guariti_veneto = []

for i in range(1,len(veneto_det)):
    guariti_veneto.append(list(veneto_det['dimessi_guariti'])[i] - list(veneto_det['dimessi_guariti'])[i-1])

In [None]:
print("DATI ELABORATI PER L'ITALIA (RIFERIMENTO PROTEZIONE CIVILE)")
print("\n")
print('Numero totale di contagi in Italia ad oggi: ', list(confirmed_ita['totale_casi'])[-1])
print('Variazione percentuale nuovi positivi: ', 
      np.round(((list(confirmed_ita['totale_casi'])[-1] - list(confirmed_ita['totale_casi'])[-2]) / list(confirmed_ita['totale_casi'])[-1])*100,2),'%')
print('Contagi in tutta Italia oggi: ', list(nazionale_ita['totale_casi'])[-1] - list(nazionale_ita['totale_casi'])[-2])
print("Variazione nuovi positivi: ", list(nazionale_ita['variazione_totale_positivi'])[-1])
print("Rapporto tra i contagi di oggi e quelli di ieri (R0): ", np.round(growth_factor[-1],3))
print("Totale dei morti in tutta Italia: ", list(deaths_ita['deceduti'])[-1])
print("Morti in tutta Italia oggi: ", list(deaths_ita['deceduti'])[-1] - list(deaths_ita['deceduti'])[-2])
print("Crescita dei morti rispetto a ieri in tutta Italia: ", np.round(death_rate[-1]*100,2),'%')
print("Totale dei guariti in tutta Italia ad oggi: ", list(recovered_ita['dimessi_guariti'])[-1])
print("Guariti in tutta Italia oggi: ", list(recovered_ita['dimessi_guariti'])[-1] - list(recovered_ita['dimessi_guariti'])[-2])
print("Numero di persone ricoverate con sintomi attualmente: ",
      list(hospital_ita['ricoverati_con_sintomi'])[-1])
print("Numero di persone ricoverate in terapia intensiva attualmente: ",
      list(intensive_ita['terapia_intensiva'])[-1])
print("Numero di persone in isolamento domiciliare attualmente: ",
      list(home_ita['isolamento_domiciliare'])[-1])
print("Numero dei tamponi effettutuati oggi: ",
      list(day_swabs)[-1])

print("\n") 
print("DATI ELABORATI PER IL VENETO (RIFERIMENTO PROTEZIONE CIVILE)")
print("\n")
print('Numero di contagi totali in Veneto: ', list(day_veneto['totale_casi'])[-1])
print('Contagi in Veneto oggi: ', list(day_veneto['totale_casi'])[-1] - list(day_veneto['totale_casi'])[-2])
#print('Variazione nuovi positivi: ', list(veneto_det['variazione_totale_positivi'])[-1])
print('Variazione percentuale nuovi positivi: ', 
      np.round(((list(day_veneto['totale_casi'])[-1] - list(day_veneto['totale_casi'])[-2]) / list(day_veneto['totale_casi'])[-1])*100,2),'%')
print("Morti in totale: ", np.sum(variazione_morti))
print("Morti di oggi: ", variazione_morti[-1])
print("Numero dei tamponi effettutuati oggi:", tamponi_veneto[-1])
print("Guariti in totale: ", np.sum(guariti_veneto))
print("Guariti di oggi: ", guariti_veneto[-1])
print("Rapporto tra i contagi di oggi e quelli di ieri (R0): ", np.round(growth_veneto[-1],3))

In [None]:
p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="Condizioni degli infetti in Veneto")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Number of cases'

p1.line(datetime, veneto_det['ricoverati_con_sintomi'], color='#C0392B', 
        legend_label='Ricoverati con sintomi', line_width=1.5)
p1.circle(datetime, veneto_det['ricoverati_con_sintomi'], fill_color="white", size=5)
p1.line(datetime, veneto_det['terapia_intensiva'], color='#5DADE2', legend_label='Terapia intensiva',
       line_width=1.5)
p1.circle(datetime, veneto_det['terapia_intensiva'], fill_color="white", size=5)
p1.line(datetime, veneto_det['isolamento_domiciliare'], color='#E67E22', legend_label='Isolamento',
       line_width=1.5)
p1.circle(datetime, veneto_det['isolamento_domiciliare'], fill_color="white", size=5)
p1.legend.location = "top_left"


output_file("coronavirus.html", title="coronavirus.py")


show(p1)

In [None]:
p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="Condizioni degli infetti in Veneto")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Number of cases'

p1.line(datetime, veneto_det['totale_positivi'], color='#4C31C8', 
        legend_label='Attualmente positivi', line_width=1.5)
p1.circle(datetime, veneto_det['totale_positivi'], fill_color="white", size=4)

p1.line(datetime, veneto_det['deceduti'], color='#C0392B', 
        legend_label='Deceduti', line_width=1.5)
p1.circle(datetime, veneto_det['deceduti'], fill_color="white", size=4)

p1.line(datetime, veneto_det['dimessi_guariti'], color='#2635A5', 
        legend_label='Dimessi/Guariti', line_width=1.5)
p1.circle(datetime, veneto_det['dimessi_guariti'], fill_color="white", size=4)




output_file("coronavirus.html", title="coronavirus.py")

p1.legend.location = "top_left"
show(p1)

In [None]:
p1 = figure(plot_width=600, plot_height=500, x_axis_type="datetime", title="Variazione positivi in Italia")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Number of cases'

p1.line(datetime, nazionale_ita['variazione_totale_positivi'], color='#C0392B', 
        legend_label='Variazione nuovi positivi', line_width=1.5)
p1.circle(datetime, nazionale_ita['variazione_totale_positivi'], fill_color="white", size=5)

output_file("coronavirus.html", title="coronavirus.py")

p1.legend.location = "top_left"
show(p1)

In [None]:
period = 1

regioni = pd.read_csv('/kaggle/input/regioni/dpc-covid19-ita-regioni.csv')

veneto = regioni.loc[regioni['denominazione_regione']=='Veneto']
veneto_positivi = list(veneto['nuovi_positivi'])[::period]
veneto_casi = list(veneto['totale_casi'])[::period]

veneto_positivi = [0 if x<0 else x for x in veneto_positivi]


emilia = regioni.loc[regioni['denominazione_regione']=='Emilia-Romagna']
emilia_positivi = list(emilia['nuovi_positivi'])[::period]
emilia_casi = list(emilia['totale_casi'])[::period]

emilia_positivi = [0 if x<0 else x for x in emilia_positivi]
    
    
lomba = regioni.loc[regioni['denominazione_regione']=='Lombardia']
lomba_positivi = list(lomba['nuovi_positivi'])[::period]
lomba_casi = list(lomba['totale_casi'])[::period]

lomba_positivi = [0 if x<0 else x for x in lomba_positivi]
  

piemonte = regioni.loc[regioni['denominazione_regione']=='Piemonte']
piemonte_positivi = list(piemonte['nuovi_positivi'])[::period]
piemonte_casi = list(piemonte['totale_casi'])[::period]

piemonte_positivi = [0 if x<0 else x for x in piemonte_positivi]


toscana = regioni.loc[regioni['denominazione_regione']=='Toscana']
toscana_positivi = list(toscana['nuovi_positivi'])[::period]
toscana_casi = list(toscana['totale_casi'])[::period]

toscana_positivi = [0 if x<0 else x for x in toscana_positivi]


marche = regioni.loc[regioni['denominazione_regione']=='Marche']
marche_positivi = list(marche['nuovi_positivi'])[::period]
marche_casi = list(marche['totale_casi'])[::period] 

marche_positivi = [0 if x<0 else x for x in marche_positivi]
    
    
friuli = regioni.loc[regioni['denominazione_regione']=='Friuli Venezia Giulia']
friuli_positivi = list(friuli['nuovi_positivi'])[::period]
friuli_casi = list(friuli['totale_casi'])[::period]

friuli_positivi = [0 if x<0 else x for x in friuli_positivi]

    
exponential_line_x = []
exponential_line_y = []
for i in range(10):
    exponential_line_x.append(i)
    exponential_line_y.append(i)

p1 = figure(plot_width=800, plot_height=550, title="Trajectory of Covid-19")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Total number of detected cases (Log scale)'
p1.yaxis.axis_label = 'New confirmed cases (Log scale)'

p1.line(exponential_line_x, exponential_line_y, line_dash="4 4", line_width=0.5)
p1.line(np.log(friuli_casi), np.log(friuli_positivi), color='#DBAE23', 
        legend_label='Friuli Venezia Giulia', line_width=1)
p1.circle(np.log(friuli_casi), np.log(friuli_positivi), fill_color="white", size=2)

p1.line(np.log(emilia_casi), np.log(emilia_positivi), color='#3EC358', 
        legend_label='Emilia Romagna', line_width=1)
p1.circle(np.log(emilia_casi), np.log(emilia_positivi), fill_color="white", size=2)

p1.line(np.log(veneto_casi), np.log(veneto_positivi), color='#3E4CC3', 
        legend_label='Veneto', line_width=1)
p1.circle(np.log(veneto_casi), np.log(veneto_positivi), fill_color="white", size=2)

p1.line(np.log(piemonte_casi), np.log(piemonte_positivi), color='#F54138', 
        legend_label='Piemonte', line_width=1)
p1.circle(np.log(piemonte_casi), np.log(piemonte_positivi), fill_color="white", size=2)

p1.line(np.log(marche_casi), np.log(marche_positivi), color='#23BCDB', 
        legend_label='Marche', line_width=1)
p1.circle(np.log(marche_casi), np.log(marche_positivi), fill_color="white", size=2)

p1.line(np.log(toscana_casi), np.log(toscana_positivi), color='#010A0C', 
        legend_label='Toscana', line_width=1)
p1.circle(np.log(toscana_casi), np.log(toscana_positivi), fill_color="white", size=2)

p1.line(np.log(lomba_casi), np.log(lomba_positivi), color='#017A0C', 
        legend_label='Lombardia', line_width=1)
p1.circle(np.log(lomba_casi), np.log(lomba_positivi), fill_color="white", size=2)

p1.legend.location = "top_left"

output_file("coronavirus.html", title="coronavirus.py")

show(p1)

In [None]:
period = 2

regioni = pd.read_csv('/kaggle/input/regioni/dpc-covid19-ita-regioni.csv')

veneto = regioni.loc[regioni['denominazione_regione']=='Veneto']
veneto_positivi = list(veneto['nuovi_positivi'])[::period]
veneto_casi = list(veneto['totale_casi'])[::period]

veneto_positivi = [0 if x<0 else x for x in veneto_positivi]


emilia = regioni.loc[regioni['denominazione_regione']=='Emilia-Romagna']
emilia_positivi = list(emilia['nuovi_positivi'])[::period]
emilia_casi = list(emilia['totale_casi'])[::period]

emilia_positivi = [0 if x<0 else x for x in emilia_positivi]
    
    
lomba = regioni.loc[regioni['denominazione_regione']=='Lombardia']
lomba_positivi = list(lomba['nuovi_positivi'])[::period]
lomba_casi = list(lomba['totale_casi'])[::period]

lomba_positivi = [0 if x<0 else x for x in lomba_positivi]
  

piemonte = regioni.loc[regioni['denominazione_regione']=='Piemonte']
piemonte_positivi = list(piemonte['nuovi_positivi'])[::period]
piemonte_casi = list(piemonte['totale_casi'])[::period]

piemonte_positivi = [0 if x<0 else x for x in piemonte_positivi]


toscana = regioni.loc[regioni['denominazione_regione']=='Toscana']
toscana_positivi = list(toscana['nuovi_positivi'])[::period]
toscana_casi = list(toscana['totale_casi'])[::period]

toscana_positivi = [0 if x<0 else x for x in toscana_positivi]


marche = regioni.loc[regioni['denominazione_regione']=='Marche']
marche_positivi = list(marche['nuovi_positivi'])[::period]
marche_casi = list(marche['totale_casi'])[::period] 

marche_positivi = [0 if x<0 else x for x in marche_positivi]
    
    
friuli = regioni.loc[regioni['denominazione_regione']=='Friuli Venezia Giulia']
friuli_positivi = list(friuli['nuovi_positivi'])[::period]
friuli_casi = list(friuli['totale_casi'])[::period]

friuli_positivi = [0 if x<0 else x for x in friuli_positivi]

    
exponential_line_x = []
exponential_line_y = []
for i in range(10):
    exponential_line_x.append(i)
    exponential_line_y.append(i)

p1 = figure(plot_width=800, plot_height=550, title="Trajectory of Covid-19")
p1.grid.grid_line_alpha=0.3
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.xaxis.axis_label = 'Total number of detected cases'
p1.yaxis.axis_label = 'New confirmed cases'

#p1.line(exponential_line_x, exponential_line_y, line_dash="4 4", line_width=0.5)
p1.line(friuli_casi, friuli_positivi, color='#DBAE23', 
        legend_label='Friuli Venezia Giulia', line_width=1)
p1.circle(friuli_casi, friuli_positivi, fill_color="white", size=2)

p1.line(emilia_casi, emilia_positivi, color='#3EC358', 
        legend_label='Emilia Romagna', line_width=1)
p1.circle(emilia_casi, emilia_positivi, fill_color="white", size=2)

p1.line(veneto_casi, veneto_positivi, color='#3E4CC3', 
        legend_label='Veneto', line_width=1)
p1.circle(veneto_casi, veneto_positivi, fill_color="white", size=2)

p1.line(piemonte_casi, piemonte_positivi, color='#F54138', 
        legend_label='Piemonte', line_width=1)
p1.circle(piemonte_casi, piemonte_positivi, fill_color="white", size=2)

p1.line(marche_casi, marche_positivi, color='#23BCDB', 
        legend_label='Marche', line_width=1)
p1.circle(marche_casi, marche_positivi, fill_color="white", size=2)

p1.line(toscana_casi, toscana_positivi, color='#010A0C', 
        legend_label='Toscana', line_width=1)
p1.circle(toscana_casi, toscana_positivi, fill_color="white", size=2)

p1.line(lomba_casi, lomba_positivi, color='#017A0C', 
        legend_label='Lombardia', line_width=1)
p1.circle(lomba_casi, lomba_positivi, fill_color="white", size=2)

#p1.legend.location = "bottom_right"

output_file("coronavirus.html", title="coronavirus.py")

show(p1)