# Objetivo

En este trabajo vamos a tratar de implementar el modelo de evaluación de riesgo de la epidemia desarrollado por el equipo de la [Computational Biology and Complex Systems; Universitat Politècnica de Catalunya - BarcelonaTech](https://biocomsc.upc.edu/en/shared/20200412_report_web_27.pdf) . Para obtener los datos epidemiológicos nos apoyamos en el grupo de trabajo [#escovid19data](https://github.com/montera34/escovid19data).


@author: Manuel H. Arias 

@Twitter: @walyt

@mail: mharias@me.com



## Importación de librerias estándares para DAE

In [1]:
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib import cm
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from matplotlib.dates import (YEARLY, MONTHLY, DateFormatter, MonthLocator,DayLocator,
                              rrulewrapper, RRuleLocator, drange)
import matplotlib.image as mpimg
from matplotlib.animation import FuncAnimation
from matplotlib.offsetbox import TextArea, DrawingArea, OffsetImage, AnnotationBbox
import matplotlib.colors as colors

import numpy as np
from datetime import datetime,timedelta
import seaborn as sns
%matplotlib inline

import urllib.request

import matplotlib.image as mpimg
from matplotlib.animation import FuncAnimation
from matplotlib.offsetbox import TextArea, DrawingArea, OffsetImage, AnnotationBbox

## Descargamos la información

Hacemos en primer lugar una actualización de parámetros y preparación de variables que necesitaremos durante el ejercicio

In [2]:
pd.options.display.max_rows = 999 #Variable de contexto para permitir la presentación de datos por pantalla
pd.set_option('display.max_columns', None)

In [3]:
#url de este notebook en github
github_url = 'https://github.com/mharias/covid/blob/master/graficos_escovid19data.ipynb'

In [4]:
hashtag_fuente='#escovid19data' #hashtag de la fuente para los créditos

In [5]:
#url de la fuente de datos
path_montera34='https://github.com/montera34/escovid19data/blob/master/data/output/covid19-provincias-spain_consolidated.csv?raw=true'

Leemos los datos en un `pandas`

In [6]:
df = pd.read_csv(path_montera34)

veamos una rápida descripción de la información:

In [7]:
df.describe()

Unnamed: 0,ine_code,new_cases,PCR,TestAc,activos,hospitalized,intensive_care,deceased,cases_accumulated,cases_accumulated_PCR,recovered,num_casos,num_casos_prueba_pcr,num_casos_prueba_test_ac,num_casos_prueba_otras,num_casos_prueba_desconocida,poblacion,cases_per_cienmil,intensive_care_per_1000000,deceassed_per_100000,hospitalized_per_100000,cases_14days,cases_7days,cases_PCR_14days,cases_PCR_7days,daily_cases,daily_cases_avg7,daily_cases_PCR,daily_cases_PCR_avg7,daily_deaths,daily_deaths_inc,daily_deaths_avg3,daily_deaths_avg7,deaths_last_week,num_casos_prueba_pcr_avg7
count,13186.0,4136.0,3660.0,927.0,1496.0,9319.0,9182.0,10479.0,6797.0,7900.0,6782.0,13104.0,13104.0,13104.0,13104.0,13104.0,13186.0,6797.0,9182.0,10479.0,9319.0,5966.0,6348.0,7182.0,7507.0,6641.0,5958.0,8415.0,8051.0,10417.0,9755.0,9783.0,10115.0,8794.0,12792.0
mean,26.512968,54.827611,96.979508,5546.153182,1073.078209,178.657581,24.364735,574.499857,5207.203913,7945.146456,1657.858133,65.950168,61.139042,1.577839,0.692002,2.541285,903299.5,470.908573,2.60636,52.257393,20.065567,1136.213208,557.966289,1333.186995,667.692554,78.234302,79.785532,92.437908,91.173879,3.865988,inf,3.969069,3.854533,29.49511,61.970059
std,15.009258,128.617717,188.623814,17733.436939,1559.631645,797.363086,91.191897,1776.417377,13159.561091,21726.331373,2285.471634,252.282294,247.523445,6.678773,17.831666,17.163213,1175389.0,509.352419,3.926624,50.641406,42.912002,2560.538805,1290.289631,4154.604659,2086.567115,198.096174,188.433457,316.581071,288.648254,20.661165,,20.966105,20.421711,152.560493,237.900927
min,1.0,-67.0,-67.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,84777.0,0.0,0.0,0.0,0.0,-54.0,-82.0,-62.0,-99.0,-106.0,-11.7,-84.0,-12.7,-22.0,-30.8,-1.3,-3.3,-4.0,0.0
25%,14.0,3.0,5.0,3.0,162.0,10.0,2.0,88.0,689.0,1432.0,405.0,1.0,1.0,0.0,0.0,0.0,316798.0,122.84,0.28,12.24,2.235,118.0,50.0,42.0,20.0,5.0,7.0,2.0,2.9,0.0,0.0,0.0,0.0,0.0,1.4
50%,27.0,19.0,32.0,22.0,498.0,40.0,6.0,200.0,2001.0,2857.5,1114.5,11.0,8.0,0.0,0.0,0.0,581078.0,309.05,1.21,33.34,7.88,477.5,225.0,239.0,113.0,26.0,31.7,14.0,15.7,0.0,0.0,0.3,0.4,4.0,8.6
75%,40.0,58.0,99.0,430.0,1239.5,117.0,18.0,368.0,4208.0,5304.5,2064.0,56.0,49.0,0.0,0.0,0.0,1022800.0,658.65,3.32,81.47,22.13,1074.0,529.0,1047.75,528.0,73.0,75.1,71.0,72.95,2.0,1.0,2.0,2.0,16.0,51.0
max,52.0,1992.0,2029.0,70280.0,10275.0,15227.0,1528.0,11704.0,131010.0,267947.0,26374.0,6738.0,6725.0,128.0,1159.0,331.0,6663394.0,3148.32,28.72,242.86,474.75,29954.0,17440.0,51918.0,27882.0,3374.0,2491.4,6728.0,3983.1,377.0,inf,350.7,339.0,2373.0,4150.9


y un muestreo de valores y de algunas columnas de interés:

In [8]:
df.head()

Unnamed: 0,date,province,ine_code,ccaa,new_cases,PCR,TestAc,activos,hospitalized,intensive_care,deceased,cases_accumulated,cases_accumulated_PCR,recovered,num_casos,num_casos_prueba_pcr,num_casos_prueba_test_ac,num_casos_prueba_otras,num_casos_prueba_desconocida,poblacion,cases_per_cienmil,intensive_care_per_1000000,deceassed_per_100000,hospitalized_per_100000,cases_14days,cases_7days,cases_PCR_14days,cases_PCR_7days,daily_cases,daily_cases_avg7,daily_cases_PCR,daily_cases_PCR_avg7,daily_deaths,daily_deaths_inc,daily_deaths_avg3,daily_deaths_avg7,deaths_last_week,num_casos_prueba_pcr_avg7,source_name,source,comments
0,2020-01-31,Albacete,2,Castilla - La Mancha,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,388167,,,,,,,,,,,,,,,,,,,ISCIII RENAVE,https://cnecovid.isciii.es/covid19/resources/d...,
1,2020-01-31,Alicante/Alacant,3,Comunitat Valenciana,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,1858683,,,,,,,,,,,,,,,,,,,ISCIII RENAVE,https://cnecovid.isciii.es/covid19/resources/d...,
2,2020-01-31,Almería,4,Andalucía,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,716820,,,,,,,,,,,,,,,,,,,ISCIII RENAVE,https://cnecovid.isciii.es/covid19/resources/d...,
3,2020-01-31,Araba/Álava,1,País Vasco,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,331549,,,,,,,,,,,,,,,,,,,ISCIII RENAVE,https://cnecovid.isciii.es/covid19/resources/d...,
4,2020-01-31,Asturias,33,"Asturias, Principado de",,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,1022800,,,,,,,,,,,,,,,,,,,ISCIII RENAVE,https://cnecovid.isciii.es/covid19/resources/d...,


In [9]:
df.tail()

Unnamed: 0,date,province,ine_code,ccaa,new_cases,PCR,TestAc,activos,hospitalized,intensive_care,deceased,cases_accumulated,cases_accumulated_PCR,recovered,num_casos,num_casos_prueba_pcr,num_casos_prueba_test_ac,num_casos_prueba_otras,num_casos_prueba_desconocida,poblacion,cases_per_cienmil,intensive_care_per_1000000,deceassed_per_100000,hospitalized_per_100000,cases_14days,cases_7days,cases_PCR_14days,cases_PCR_7days,daily_cases,daily_cases_avg7,daily_cases_PCR,daily_cases_PCR_avg7,daily_deaths,daily_deaths_inc,daily_deaths_avg3,daily_deaths_avg7,deaths_last_week,num_casos_prueba_pcr_avg7,source_name,source,comments
13181,2020-10-11,Salamanca,37,Castilla y León,187.0,,,,107.0,19.0,443.0,,9988.0,1796.0,,,,,,330119,,5.76,134.19,32.41,,,1454.0,882.0,,,187.0,126.0,4.0,0.9,2.0,2.0,14.0,,Junta de Castilla y León,https://analisis.datosabiertos.jcyl.es/explore...,
13182,2020-10-11,Segovia,40,Castilla y León,41.0,,,,22.0,4.0,224.0,,5814.0,1028.0,,,,,,153129,,2.61,146.28,14.37,,,512.0,257.0,,,41.0,36.7,0.0,0.0,0.0,0.3,2.0,,Junta de Castilla y León,https://analisis.datosabiertos.jcyl.es/explore...,
13183,2020-10-11,Soria,42,Castilla y León,36.0,,,,16.0,6.0,131.0,,3783.0,526.0,,,,,,88636,,6.77,147.8,18.05,,,191.0,97.0,,,36.0,13.9,1.0,0.8,0.3,0.1,1.0,,Junta de Castilla y León,https://analisis.datosabiertos.jcyl.es/explore...,
13184,2020-10-11,Valladolid,47,Castilla y León,56.0,,,,171.0,42.0,492.0,,14345.0,2617.0,,,,,,519546,,8.08,94.7,32.91,,,2981.0,1375.0,,,56.0,196.4,4.0,0.8,4.7,3.4,24.0,,Junta de Castilla y León,https://analisis.datosabiertos.jcyl.es/explore...,
13185,2020-10-11,Zamora,49,Castilla y León,66.0,,,,76.0,2.0,142.0,,3315.0,574.0,,,,,,172539,,1.16,82.3,44.05,,,557.0,245.0,,,66.0,35.0,1.0,0.7,1.3,1.4,10.0,,Junta de Castilla y León,https://analisis.datosabiertos.jcyl.es/explore...,


In [10]:
df['province'].unique()

array(['Albacete', 'Alicante/Alacant', 'Almería', 'Araba/Álava',
       'Asturias', 'Ávila', 'Badajoz', 'Balears, Illes', 'Barcelona',
       'Bizkaia', 'Burgos', 'Cáceres', 'Cádiz', 'Cantabria',
       'Castellón/Castelló', 'Ceuta', 'Ciudad Real', 'Córdoba',
       'Coruña, A', 'Cuenca', 'Gipuzkoa', 'Girona', 'Granada',
       'Guadalajara', 'Huelva', 'Huesca', 'Jaén', 'León', 'Lleida',
       'Lugo', 'Madrid', 'Málaga', 'Melilla', 'Murcia', 'Navarra',
       'Ourense', 'Palencia', 'Palmas, Las', 'Pontevedra', 'Rioja, La',
       'Salamanca', 'Santa Cruz de Tenerife', 'Segovia', 'Sevilla',
       'Soria', 'Tarragona', 'Teruel', 'Toledo', 'Valencia/València',
       'Valladolid', 'Zamora', 'Zaragoza'], dtype=object)

Vamos a formatear correctamente la columna `Date`, para posteriormente ordenar el $pandas$ por fecha creciente:

In [11]:
df['date']=pd.to_datetime(df['date'])

In [12]:
df.columns

Index(['date', 'province', 'ine_code', 'ccaa', 'new_cases', 'PCR', 'TestAc',
       'activos', 'hospitalized', 'intensive_care', 'deceased',
       'cases_accumulated', 'cases_accumulated_PCR', 'recovered', 'num_casos',
       'num_casos_prueba_pcr', 'num_casos_prueba_test_ac',
       'num_casos_prueba_otras', 'num_casos_prueba_desconocida', 'poblacion',
       'cases_per_cienmil', 'intensive_care_per_1000000',
       'deceassed_per_100000', 'hospitalized_per_100000', 'cases_14days',
       'cases_7days', 'cases_PCR_14days', 'cases_PCR_7days', 'daily_cases',
       'daily_cases_avg7', 'daily_cases_PCR', 'daily_cases_PCR_avg7',
       'daily_deaths', 'daily_deaths_inc', 'daily_deaths_avg3',
       'daily_deaths_avg7', 'deaths_last_week', 'num_casos_prueba_pcr_avg7',
       'source_name', 'source', 'comments'],
      dtype='object')

# Gráfico de BiocomSC

## preparamos el Pandas

In [13]:
# Gráfico de BiocomSC


provincias=['Madrid']
datos = df.fillna(0,axis=1)
datos = datos[(datos['date']>=datetime(year=2020,month=3,day=1)) & (datos['province'].isin(provincias))]

clave_avg='daily_cases_PCR_avg7'
clave_ratio_avg = 'ratio_daily_cases_PCR_avg7'
color_avg = 'darkred'
color_titulos = 'navy'
color_diarios = 'royalblue'
clave_ratio = 'tasa_ia_7_dias'
AI='incidencia_acumulada_14'
casos_PCR_14='cases_PCR_14days'
casos_nuevos = 'daily_cases_PCR'
clave_casos_popu = 'casos_per_population'
ventana_IA=14
titulo = 'Incidencia Acumulada de 14 días de PCR+ por 100.000 hab \n Ratio de la IA día(0)/día(-7)'
texto_y1='IA a 14 días'
texto_y2='Ratio a 7 días'

un_dia=timedelta(days=1)
dos_dias=timedelta(days=2)
cinco_dias=timedelta(days=5)
seis_dias=timedelta(days=6)
siete_dias=timedelta(days=7)
  
for provincia in provincias:
    
    datos.loc[datos['province']==provincia,AI]=\
    datos.loc[datos['province']==provincia,casos_PCR_14]\
    /datos.loc[datos['province']==provincia,'poblacion']*100000

    datos.loc[datos['province']==provincia,clave_ratio] = \
    datos.loc[datos['province']==provincia][AI].pct_change(periods=7).add(1)    
    
    datos.loc[datos['province']==provincia,clave_casos_popu]=\
    datos.loc[datos['province']==provincia,casos_nuevos]\
    /datos.loc[datos['province']==provincia,'poblacion']*1000000

#datos = datos.set_index('date')[[casos_PCR_14,clave_ratio]]   
  
def calculo_p7(x):
    fecha_actual=x['date']
    #print (fecha_actual)
    if fecha_actual>datetime(year=2020,month=3,day=8):
        valor = (datos.loc[datos['date']==fecha_actual,'daily_cases_PCR'].values[0]+\
        datos.loc[datos['date']==(fecha_actual-un_dia),'daily_cases_PCR'].values[0]+\
        datos.loc[datos['date']==(fecha_actual-dos_dias),'daily_cases_PCR'].values[0])/\
        (datos.loc[datos['date']==(fecha_actual-cinco_dias),'daily_cases_PCR'].values[0]+\
         datos.loc[datos['date']==(fecha_actual-seis_dias),'daily_cases_PCR'].values[0]+\
         datos.loc[datos['date']==(fecha_actual-siete_dias),'daily_cases_PCR'].values[0])
    else:
        valor = 0
    return valor


datos['p7']=datos.apply(calculo_p7,axis=1)
datos['p7_avg7'] = datos['p7'].rolling(window=7).mean()
datos = datos.reset_index()
datos = datos.drop(['index'],axis=1)

In [23]:
color_rojo = sns.color_palette("bright", 10)[3]
color_verde = sns.color_palette("muted", 10)[2]
color_amarillo = sns.color_palette("YlOrRd", 10)[1]
eje_X = ' Incidencia Acumulada a 14 días'
eje_Y = 'Parámetro \u03C1'+'7'

fuente_modelo = 'https://biocomsc.upc.edu/en/shared/20200412_report_web_27.pdf'


y0, y1 = (-0.21951443768357135, 4.609803191354998)

x0, x1 = (-26.666590629339943, 817.5248229355791)
rango = np.arange(x0,x1,1)
def formateamos_eje(ax):
    '''
    función que formatea el gráfico con las características que queramos
    '''
    ax.set_facecolor('.8')
    ax.tick_params(labelsize=8, length=0)
    ax.grid(True, axis='x', color='white')
    ax.set_axisbelow(True)
    [spine.set_visible(False) for spine in ax.spines.values()]

    

def init():
    '''
    función que inicia el eje en el que dibujamos las barras
    
    '''
    ax.clear()
    formateamos_eje(ax)
    #ax.set_ylim(.2, 6.8)



def update(i):
    '''
    Función invocada para ir regenerando el gráfico, 
    Va actualizando añadiendo días (i incrementandose).
    
    '''
  
    
    ax.set_xlim(0,x1)
    ax.set_ylim(0,y1)
    #colores = sns.color_palette("YlOrBr",i)
    colores = sns.color_palette("Blues",i)
    ax.scatter(datos[AI][:i],datos['p7_avg7'][:i],color=colores[:i:],marker='o',
               s=5)
    #ax.xaxis.set_major_locator(DayLocator([1]))
    #majorFmt = DateFormatter('%-d-%b')
    #ax.xaxis.set_major_formatter(majorFmt)
    ax.grid(True,axis='both')
    ax.yaxis.set_tick_params(labelsize=0,labelcolor=color_avg,width=0)
    ax.set_ylabel(eje_Y,size=14,color='black')
    ax.set_xlabel(eje_X,size=14,color='black')
    #ax4.set_yticks([])
    ax.yaxis.set_tick_params(labelsize=16,labelcolor=color_avg,width=0)
        
    ax.yaxis.set_tick_params(labelsize=12)
    ax.xaxis.set_tick_params(labelsize=12)
    
    style = dict(size=8, color='black',weight='bold')
    if i % 15 == 0 or i==7 or i==len(datos.index)-1:
        ax.annotate(datos.iloc[i]['date'].strftime('%B %-d'),
                    xy=(datos.iloc[i][AI],datos.iloc[i]['p7_avg7']),
                    xycoords='data',
                    xytext=(-10, 10), 
                    textcoords='offset points',
                    **style)
    
    
    #date_str = datos.index[i].strftime('%B %-d, %Y')
    ax.hlines(1,x0,x1,colors='black',linestyles='dotted')
    
    ax.fill_between(rango, 0, 30/rango,
                 facecolor="blue", # The fill color
                 color=color_verde,       # The outline color
                 alpha=0.02) 
    ax.fill_between(rango, 30/rango, 100/rango,
                 facecolor='blue', # The fill color
                 color=color_amarillo,       # The outline color
                 alpha=0.02) 
    ax.fill_between(rango,100/rango,10,
                 facecolor="blue", # The fill color
                 color=color_rojo,       # The outline color
                 alpha=0.02)
    
    
    ax.set_title(f' Evolución de Effective Potential Growth en Madrid\n Fuente del modelo @biocomsc \n Fuente de los datos #escovid19data', fontsize=12)
    
   

    
fig = plt.Figure(figsize=(10, 5), dpi=288)
ax = fig.add_subplot()


anim = FuncAnimation(fig=fig, func=update, init_func=init, frames=len(datos), 
                     interval=150, repeat=False)

#con interval puedes modular la velocidad del video, más alto --> mas lento

In [None]:
from IPython.display import HTML
html = anim.to_html5_video()
HTML(html)

In [None]:
anim.save('modelo_biocomsc_madrid.mp4',dpi=288)