# Os dados

Lendo os dados de todas as cidades disponíveis:

In [1]:

import os 
import pandas as pd


path = "./PG_IMT/DadosEpidemia/UKCities/"
files = os.listdir(path) 

dataframes = []
for file in files:
    data = pd.read_csv(path + file).to_dict()
    keys = [k for k in data.keys()]
    size = len(data[keys[0]])
    # Create a column for the city name
    city = [file[:-4] for n in range(size)]
    data["city"] = dict(zip(range(size), city))
    dataframes.append(pd.DataFrame(data))

data = pd.concat(dataframes, ignore_index=True)
data = data.dropna()
data.head()


Unnamed: 0,time,cases,births,pop,city
0,1944.016427,0,27.269231,28350.0,Bedwellty
1,1944.054757,0,27.148291,28339.031079,Bedwellty
2,1944.093087,0,27.027352,28328.062157,Bedwellty
3,1944.131417,0,26.906413,28317.093236,Bedwellty
4,1944.169747,1,26.785473,28306.124314,Bedwellty


# Comportamento do $R_0$

Nesta seção, iremos utilizar a biblioteca `models`, que concentra todas as funções dos modelos SIR desenvolvidos nas seções anteriores.


## Para uma única cidade




In [2]:
import numpy as np

# Getting the data for a particular city
dataset = data.where(data["city"] == "London").dropna()
# dataset = dataset.where(dataset["time"].astype(int) == 1948).dropna()

# Creating the data for trainning
N = int(dataset["pop"].mean())
B = dataset["births"].to_numpy()
I = dataset["cases"].to_numpy()
S = dataset["pop"].to_numpy() - I + B
t = dataset["time"].to_numpy()

from models import *

# Creating the SIR model
model = ss.SIR(pop=N, focus=["I"])

# Fitting the model on data
fit_data = model.monteCarlo_multiple(S, I, B, t)



├─ Windows starting at:  [17, 46, 98, 124, 174, 222, 276, 331, 383, 429, 481]
├─ Windows ending at:    [47, 72, 124, 153, 206, 260, 311, 361, 415, 468, 519]
├─ Window start cases:   [41.0, 88.0, 113.0, 157.0, 308.0, 1521.0, 249.0, 183.0, 194.0, 256.0, 141.0]
│
├──┬ ✣✣✣ New window ➙  1  ✣✣✣
│  ├─ S(0) ─   2550525   I(0) ─   41
│  ├─ beta ─   1   r ─   0.14285714285714285
│  ├─ beta bound ─   0.001  ─  10
│  ├─ r bound ─   0.00014285714285714284  ─  1.4285714285714284
│  │
│  ├─┬─ ⨭ Initializing Monte Carlo ⨮
│  │ ├─ Progress at : 51%
│  │ ├─ Progress at : 77%
│  │ ├─ Progress at : 91%
│  │ └─ Finished! ✓
│  └─ ✣✣✣ ➙  1  ✣✣✣
│
├──┬ ✣✣✣ New window ➙  2  ✣✣✣
│  ├─ S(0) ─   2949891   I(0) ─   88
│  ├─ beta ─   1   r ─   0.14285714285714285
│  ├─ beta bound ─   0.001  ─  10
│  ├─ r bound ─   0.00014285714285714284  ─  1.4285714285714284
│  │
│  ├─┬─ ⨭ Initializing Monte Carlo ⨮
│  │ ├─ Progress at : 51%
│  │ ├─ Progress at : 75%
│  │ ├─ Progress at : 93%
│  │ └─ Finished! ✓
│  └─ ✣✣✣ ➙  2  ✣

In [None]:

from bokeh.models import ColumnDataSource, RangeTool, LinearAxis, Range1d
from bokeh.palettes import brewer, Inferno10
from bokeh.plotting import figure, show, output_file
from bokeh.layouts import column
from bokeh.io import output_notebook

output_notebook()

estimation_data = model.data

source = ColumnDataSource(data=dict(date=estimation_data["full"]["t"], value=estimation_data["full"]["I"]))


# Plotting the estimation data
p1 = figure(
  tools="hover",
  x_range=(
      estimation_data["full"]["t"][5], 
      estimation_data["full"]["t"][105]),
  plot_width=700, 
  plot_height=300
)

# Plotting the full data
p1.line("date","value", 
  source=source,
  legend_label="Casos", 
  line_width=2, 
  color="#f4511e", 
  line_cap='round', 
  line_alpha=0.9
)

# Plotting the windowed original data
for dataset in estimation_data["data"]["original"]:
    p1.line(dataset["t"], dataset["I"], 
        legend_label="Casos", 
        line_width=4, 
        color="#f4511e", 
        line_cap='round', 
        line_alpha=0.9
    )
    
# Plotting the estimated data
for dataset in estimation_data["data"]["simulated"]:
    p1.line(dataset["t"], dataset["I"], 
        line_dash='dashed',
        legend_label="Estimado", 
        line_width=3, 
        color="#0288d1", 
        line_cap='round', 
        line_alpha=0.9
    )
    
# Buildging figure background
p1.grid.grid_line_alpha = 0
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.yaxis.axis_label = "Indivíduos"
p1.xaxis.axis_label = "Ano"


select = figure(title="Drag the middle and edges of the selection box to change the range above",
                  plot_height=130, plot_width=700, y_range=p1.y_range,
                  y_axis_type=None, tools="", toolbar_location=None)

range_tool = RangeTool(x_range=p1.x_range)
range_tool.overlay.fill_color = "navy"
range_tool.overlay.fill_alpha = 0.2

select.line('date', 'value', source=source,
          line_width=2, color="#f4511e", line_cap='round', line_alpha=0.9)
select.ygrid.grid_line_color = None
select.add_tools(range_tool)
select.toolbar.active_multi = range_tool
select.xaxis.axis_label = 'Ano'


p = figure(
  tools="hover",
  y_axis_type="log",
  x_range=p1.x_range,
  plot_width=700, 
  plot_height=300
)

for key in fit_data["results"].keys():
    # Correcting the time...
    value = fit_data["results"][key]
    corr_time = (value["time"] - model.data["time"][int(key)-1])/365
    corr_time = corr_time/365 + model.data["time"][int(key)-1]
    
    # Computing the Ro values
    Ro = [p[0] / p[1] for p in value["pars"]]
    
    p.line(corr_time, Ro, 
           legend_label="Ro", 
           line_width=4, 
           color="#4527a0", 
           line_cap='round', 
           line_alpha=0.9)
    
# Buildging figure background
p.grid.grid_line_alpha = 0
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1


show(column(p1,p,select))


In [None]:

import pickle 

with open('mc_London.pickle', 'wb') as f:
    pickle.dump(fit_data, f)
    

### Visualizando os resultados

In [None]:

from bokeh.models import ColumnDataSource, RangeTool, LinearAxis, Range1d
from bokeh.palettes import brewer, Inferno10
from bokeh.plotting import figure, show, output_file
from bokeh.layouts import column
from bokeh.io import output_notebook


estimation_data = model.data

source = ColumnDataSource(data=dict(date=estimation_data["full"]["t"], value=estimation_data["full"]["I"]))

# Plotting the estimation data
p1 = figure(
  tools="hover",
  x_range=(
      estimation_data["full"]["t"][5], 
      estimation_data["full"]["t"][105]),
  plot_width=700, 
  plot_height=300
)

# Plotting the full data
p1.line("date","value", 
  source=source,
  legend_label="Casos", 
  line_width=2, 
  color="#f4511e", 
  line_cap='round', 
  line_alpha=0.9
)

# Plotting the windowed original data
for dataset in estimation_data["data"]["original"]:
    p1.line(dataset["t"], dataset["I"], 
        legend_label="Casos", 
        line_width=4, 
        color="#f4511e", 
        line_cap='round', 
        line_alpha=0.9
    )
    
# Plotting the estimated data
for dataset in estimation_data["data"]["simulated"]:
    p1.line(dataset["t"], dataset["I"], 
        line_dash='dashed',
        legend_label="Estimado", 
        line_width=3, 
        color="#0288d1", 
        line_cap='round', 
        line_alpha=0.9
    )
    
# Buildging figure background
p1.grid.grid_line_alpha = 0
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.yaxis.axis_label = "Indivíduos"
p1.xaxis.axis_label = "Ano"


select = figure(title="Drag the middle and edges of the selection box to change the range above",
                  plot_height=130, plot_width=700, y_range=p1.y_range,
                  y_axis_type=None, tools="", toolbar_location=None)

range_tool = RangeTool(x_range=p1.x_range)
range_tool.overlay.fill_color = "navy"
range_tool.overlay.fill_alpha = 0.2

select.line('date', 'value', source=source,
          line_width=2, color="#f4511e", line_cap='round', line_alpha=0.9)
select.ygrid.grid_line_color = None
select.add_tools(range_tool)
select.toolbar.active_multi = range_tool
select.xaxis.axis_label = 'Ano'


p = figure(
  tools="hover",
  y_axis_type="log",
  x_range=p1.x_range,
  plot_width=700, 
  plot_height=300
)

for key in fit_data["results"].keys():
    # Correcting the time...
    value = fit_data["results"][key]
    corr_time = (value["time"] - model.data["time"][int(key)-1])/365
    corr_time = corr_time/365 + model.data["time"][int(key)-1]
    
    # Computing the Ro values
    Ro = [p[0] / p[1] for p in value["pars"]]
    
    p.line(corr_time, Ro, 
           legend_label="Ro", 
           line_width=4, 
           color="#4527a0", 
           line_cap='round', 
           line_alpha=0.9)
    
# Buildging figure background
p.grid.grid_line_alpha = 0
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1


show(column(p1,p,select))


In [None]:

for key, value in fit_data["results"]:
    
    corr_time = (value["time"] - model.data["time"][int(key)])/365
    corr_time = corr_time/365 + model.data["time"][int(key)]



p1 = figure(
  tools="hover",
  x_range=p.x_range,
  plot_width=plot_size[0], 
  plot_height=plot_size[1]
)

# Plotting the full data
p1.line(estimation_data["full"]["t"], estimation_data["full"]["I"],
  legend_label="Casos", 
  line_width=2, 
  color="#f4511e", 
  line_cap='round', 
  line_alpha=0.9
)
# Plotting the windowed original data
for dataset in estimation_data["data"]["original"]:
    p1.line(dataset["t"], dataset["I"], 
        legend_label="Casos", 
        line_width=4, 
        color="#f4511e", 
        line_cap='round', 
        line_alpha=0.9
    )
# Plotting the estimated data
for dataset in estimation_data["data"]["simulated"]:
    p1.line(dataset["t"], dataset["I"], 
        line_dash='dashed',
        legend_label="Estimado", 
        line_width=3, 
        color="#0288d1", 
        line_cap='round', 
        line_alpha=0.9
    )
# Buildging figure background
p1.grid.grid_line_alpha = 0
p1.ygrid.band_fill_color = "olive"
p1.ygrid.band_fill_alpha = 0.1
p1.yaxis.axis_label = "Indivíduos"
p1.xaxis.axis_label = "Ano"
p1.toolbar.autohide = True



## Estimando para todas cidades

In [None]:

from models import *

# Define the city names
cities = data["city"].unique()

par_data = dict()
for i, city in enumerate(cities):
    
    # Getting the data for a particular city
    dataset = data.where(data["city"] == city).dropna()

    # Creating the data for trainning
    B = dataset["births"].to_numpy()
    I = dataset["cases"].to_numpy()
    S = dataset["pop"].to_numpy() - I + B
    t = dataset["time"].to_numpy()

    # Creating the SIR model
    model = ss.SIR(verbose=False)
    
    # Fitting the model on data
    par_data[city] = model.fit_multiple(S,I,B,t, out_type=1)
    
    # Save the summary on folder
    model.result_summary(
      save_results=True,
      plot_size=[700,500],
      folder_path="./estimation_summaries/",
      file_name=city+"_summary.png")
    
    print("Finished - ", city, " - ", i+1, " of ", len(cities))


# Obtendo informações de Lat e Long

In [None]:

from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="UK_EpidemicModels_App")

for city in cities:
    city_name = city.replace(".", " ")
    location = geolocator.geocode(city_name)
    par_data[city]["lat"] = location[-1][0]
    par_data[city]["lon"] = location[-1][1]
    print("- Located: ", location[-1], "  ", city_name)
    

# Correlação dos modelos

## Interpolando os modelos estimados

In [None]:

import numpy as np
import scipy.signal as scs
from scipy.interpolate import Rbf, InterpolatedUnivariateSpline

first_year = round(min([par_data[city]["time"][0] for city in cities]))
last_year = round(max([par_data[city]["time"][-1] for city in cities]))
points = int(last_year - first_year) - 1

data_struc = {
    "lat": [],
    "lon": [],
    "city": [],
    "year": [],
    "beta": [],
    "r": [],
}

points = 30
time = np.linspace(first_year, last_year, points)

for city in cities:
    
    # Selecionando os parâmetros estimados
    beta_res = par_data[city]["pars"]["beta"]
    r_res = par_data[city]["pars"]["r"]
    year = par_data[city]["time"]
    
    # Interpolando com reamostragem
    #beta_res, t_res = scs.resample(beta, points, t=year-year[0])
    #r_res, t_res = scs.resample(r, points, t=year-year[0])
    #t_res = t_res.astype(int)
    
    # Interpolando com Univariate Splines
    #beta_rbf = InterpolatedUnivariateSpline(year, beta_res)
    #r_rbf = InterpolatedUnivariateSpline(year, r_res)
    
    # Interpolando com Radial Basis Functions
    beta_rbf = Rbf(year, beta_res, function='gaussian', smooth=2)
    r_rbf = Rbf(year, r_res, function='gaussian', smooth=2)
    
    # Salvando do dicionário cada campo
    data_struc["lat"] += [par_data[city]["lat"] for k in range(points)]
    data_struc["lon"] += [par_data[city]["lon"] for k in range(points)]
    data_struc["city"] += [city for k in range(points)]
    data_struc["beta"] += beta_rbf(time).tolist()
    data_struc["r"]    += r_rbf(time).tolist()
    data_struc["year"] += time.tolist()
    
# Criando um data frame
df = pd.DataFrame(data_struc)    


### Visualizando a interpolação

In [None]:

# Creating the parameters plot
# 

from bokeh.models   import Legend, ColumnDataSource, RangeTool, LinearAxis, Range1d
from bokeh.palettes import brewer, Inferno256
from bokeh.plotting import figure, show
from bokeh.layouts  import column
from bokeh.io       import output_notebook


p_beta = figure(
    title="Beta Parameter",
    y_axis_type="log",
    plot_width=700, 
    plot_height=500
)

p_r = figure(
    title="R Parameter",
    y_axis_type="log",
    plot_width=700, 
    plot_height=500
)

legend_it, legend_it_r = [], []
for i, city in enumerate(cities):
    
    color = Inferno256[int((i/len(cities))*256)]
    
    df_filt = df.where(df["city"]==city).dropna()
    
    c = p_beta.line(
        df_filt["year"].to_list(), 
        abs(df_filt["beta"]),
        line_width=4,
        line_cap="round",
        color=color
    )
    
    cr = p_r.line(
        df_filt["year"], 
        abs(df_filt["r"]),
        line_width=4,
        line_cap="round",
        color=color
    )
    
    legend_it.append((city, [c]))
    legend_it_r.append((city, [cr]))
    
p_beta.grid.grid_line_alpha = 0
p_beta.ygrid.band_fill_color = "olive"
p_beta.ygrid.band_fill_alpha = 0.1
p_beta.xaxis.axis_label = "Ano"

legend = Legend(items=legend_it, location=(0, -10))
legend.click_policy="mute"
p_beta.add_layout(legend, "right")
p_beta.legend.click_policy="hide"

p_r.grid.grid_line_alpha = 0
p_r.ygrid.band_fill_color = "olive"
p_r.ygrid.band_fill_alpha = 0.1
p_r.xaxis.axis_label = "Ano"

legend = Legend(items=legend_it_r, location=(0, -10))
legend.click_policy="mute"
p_r.add_layout(legend, "right")
p_r.legend.click_policy="hide"

show(column(p_beta, p_r))


## Criando os data frames para matrix de correlação

In [None]:

r_data = {}
beta_data = {}

for city in cities:
    
    df_filt = df.where(df["city"]==city).dropna()
    
    data_dict = dict()
    zipped_data = zip(
        df_filt["year"].to_list(), 
        df_filt["beta"].to_list())
    for y, b in zipped_data:
        data_dict[y] = b
    beta_data[city] = data_dict
    
    data_dict = dict()
    zipped_data = zip(
        df_filt["year"].to_list(), 
        df_filt["r"].to_list())
    for y, r in zipped_data:
        data_dict[y] = r
    r_data[city] = data_dict
    
r_df = pd.DataFrame(r_data)
r_df = r_df.sort_index()
r_df.index = (1000 * r_df.index.to_numpy()).astype(int)  

beta_df = pd.DataFrame(beta_data)
beta_df = beta_df.sort_index()
beta_df.index = (1000 * beta_df.index.to_numpy()).astype(int)
beta_df.head()


## Correlação do parâmetro $\beta$

In [None]:

import seaborn as sns
import matplotlib.pyplot as plt

# Correlation Matrix Heatmap
f, ax = plt.subplots(figsize=(18, 15))

beta_corr = beta_df.corr()
hm = sns.heatmap(round(beta_corr,2), 
                 annot=True, 
                 ax=ax, 
                 cmap="coolwarm",
                 fmt='.2f',
                 linewidths=.05)

f.subplots_adjust(top=0.93)

t = f.suptitle('Beta Parameter - Features Correlation Heatmap', fontsize=14)


## Correlação do parâmetro $r$

In [None]:

# Correlation Matrix Heatmap
f, ax = plt.subplots(figsize=(18, 15))

r_corr = r_df.corr()
hm = sns.heatmap(round(r_corr,2), 
                 annot=True, 
                 ax=ax, 
                 cmap="coolwarm",
                 fmt='.2f',
                 linewidths=.05)

f.subplots_adjust(top=0.93)

t = f.suptitle('r Parameter - Features Correlation Heatmap', fontsize=14)


## Correlação do modelo

In [None]:

f, ax = plt.subplots(figsize=(18, 15))

model_corr = 0.5 * ( r_corr +  beta_corr )
hm = sns.heatmap(round(model_corr,2), 
                 annot=True, 
                 ax=ax, 
                 cmap="coolwarm",
                 fmt='.2f',
                 linewidths=.05)

f.subplots_adjust(top=0.93)

t = f.suptitle('Model - Features Correlation Heatmap', fontsize=14)
