In [1]:
import sys
sys.path.append('./gustavos_code/initial')
import seir
import pandas as pd
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
%matplotlib inline

# To use PyJulia
from julia.api import Julia
jl = Julia(compiled_modules=False)
from julia import Main as Julia
Julia.eval('include("la_danza_1.jl")')

<PyCall.jlwrap fit_initial>

In [2]:
# Configuration
tinc = 2.9
tinf = 5.2
covid_window = int(round(tinc*tinf))
min_days = 5

In [3]:
# Read data and define what are the cities of interest
covid_data = pd.read_csv("/home/pjssilva/nuvem/unicamp/compartilhados/ICMCxCovid/Data/covid_with_cities.csv")
covid_data = covid_data[covid_data["state"] == "SP"]
large_cities = covid_data[covid_data["estimated_population_2019"] > 100000]["city"].unique()

In [4]:
def initial_conditions(city, covid_data, covid_window, min_days, Julia):
    # Gets the city data
    city_data = covid_data[covid_data["city"] == city].copy()
    city_data.reset_index(inplace=True)
    city_data.sort_values(by=["date"], inplace=True)
    population = city_data["estimated_population_2019"].iloc[0]
    confirmed = city_data["confirmed"]

    # I am computing the new cases instead of using the new_confirmed column because
    # there is error at least in the first element for São Paulo. It should be 1.
    new_cases = confirmed.values[1:] - confirmed.values[:-1]
    new_cases = np.append(confirmed[0], new_cases)
    city_data["new_cases"] = new_cases
    
    observed_I = city_data["new_cases"].rolling(covid_window).sum()
    observed_I[:covid_window] = confirmed[:covid_window]
    if len(observed_I) >= min_days:
        observed_I /= population
        Julia.observed_I = observed_I.values
        Julia.eval('initialc = fit_initial(observed_I)')
        S0 = Julia.initialc[0]
        E0 = Julia.initialc[1]
        I0 = Julia.initialc[2]
        R0 = Julia.initialc[3]
        return (S0, E0, I0, R0), observed_I
    else:
        raise ValueError("Not enough data for %s only %d days available" % (city, len(observed_I)))   

In [5]:
%%time

parameters = {}
ignored = []

n_cities = len(large_cities)
for i in range(n_cities):
    c = large_cities[i]
    print("%d/%d" %(i + 1, n_cities), c)
    try:
        parameters[c], observed_I = initial_conditions(c, covid_data, covid_window, min_days, Julia)
    except ValueError:
        print("Ignoring ", c, "not enough data.")
        ignored.append(c)

1/68 Americana
2/68 Araçatuba
3/68 Araraquara
4/68 Araras
5/68 Barretos
6/68 Barueri
7/68 Bauru
8/68 Birigui
9/68 Botucatu
10/68 Bragança Paulista
11/68 Campinas
12/68 Carapicuíba
13/68 Catanduva
14/68 Cotia
15/68 Diadema
16/68 Embu Das Artes
17/68 Ferraz De Vasconcelos
18/68 Franca
19/68 Francisco Morato
20/68 Franco Da Rocha
21/68 Guaratinguetá
22/68 Guarujá
23/68 Guarulhos
24/68 Hortolândia
25/68 Indaiatuba
26/68 Itapecerica Da Serra
27/68 Itapetininga
28/68 Itapevi
29/68 Itaquaquecetuba
30/68 Itu
31/68 Jacareí
32/68 Jandira
33/68 Jaú
34/68 Jundiaí
35/68 Limeira
36/68 Marília
37/68 Mauá
38/68 Mogi Das Cruzes
39/68 Mogi Guaçu
40/68 Osasco
41/68 Ourinhos
42/68 Pindamonhangaba
43/68 Piracicaba
44/68 Poá
45/68 Praia Grande
46/68 Presidente Prudente
47/68 Ribeirão Pires
48/68 Ribeirão Preto
49/68 Rio Claro
50/68 Salto
51/68 Santa Bárbara D'Oeste
52/68 Santo André
53/68 Santos
54/68 São Bernardo Do Campo
55/68 São Caetano Do Sul
56/68 São Carlos
57/68 São José Do Rio Preto
58/68 São José 

In [6]:
%%time 

def error(x, y):
    return la.norm(x - y)**2

c = "São Paulo"

def simulate(c, covid_data, covid_window, min_days):
    """Simulate from the computed initial parameters until the last day.
    """

    city_data = covid_data[covid_data["city"] == c].copy()
    city_data.reset_index(inplace=True)
    city_data.sort_values(by=["date"], inplace=True)
    population = city_data["estimated_population_2019"].iloc[0]
    confirmed = city_data["confirmed"]

    # I am computing the new cases instead of using the new_confirmed column because
    # there is error at least in the first element for São Paulo. It should be 1.
    new_cases = confirmed.values[1:] - confirmed.values[:-1]
    new_cases = np.append(confirmed[0], new_cases)
    city_data["new_cases"] = new_cases

    observed_I = city_data["new_cases"].rolling(covid_window).sum()
    observed_I[:covid_window] = confirmed[:covid_window]
    observed_I /= population

    ndays = len(observed_I)
    S0, E0, I0, R0 = parameters[c]
    covid = seir.seir(ndays)
    last_day = city_data["date"].iloc[-1]
    print("Simulating", c, "until", last_day)
    result = covid.run((S0,E0,I0,R0))
    return result[:, -1], last_day

    # plt.plot(np.arange(ndays), population*result[2,:], label='seir - Julia')
    # plt.plot(np.arange(ndays), population*observed_I.values, label=c)
    # plt.legend()
    # print("Error =", error(result[2,:], observed_I))
    # print("Feasibility error =", np.abs(1.0 - np.array((S0, E0, I0, R0)).sum()))
    

parameters_at_final_day = {}
for c in large_cities:
    parameters_at_final_day[c], last_day = simulate(c, covid_data, covid_window, min_days)

Simulating Americana until 2020-04-23
Simulating Araçatuba until 2020-04-23
Simulating Araraquara until 2020-04-23
Simulating Araras until 2020-04-23
Simulating Barretos until 2020-04-23
Simulating Barueri until 2020-04-23
Simulating Bauru until 2020-04-23
Simulating Birigui until 2020-04-23
Simulating Botucatu until 2020-04-23
Simulating Bragança Paulista until 2020-04-23
Simulating Campinas until 2020-04-23
Simulating Carapicuíba until 2020-04-23
Simulating Catanduva until 2020-04-23
Simulating Cotia until 2020-04-23
Simulating Diadema until 2020-04-23
Simulating Embu Das Artes until 2020-04-23
Simulating Ferraz De Vasconcelos until 2020-04-23
Simulating Franca until 2020-04-23
Simulating Francisco Morato until 2020-04-23
Simulating Franco Da Rocha until 2020-04-23
Simulating Guaratinguetá until 2020-04-23
Simulating Guarujá until 2020-04-23
Simulating Guarulhos until 2020-04-23
Simulating Hortolândia until 2020-04-23
Simulating Indaiatuba until 2020-04-23
Simulating Itapecerica Da S

In [7]:
parameters_at_final_day = pd.DataFrame.from_dict(parameters_at_final_day, orient="index", columns=["S0", "E0", "I0", "R0"])

In [8]:
with open('data/initial_values.csv', 'w') as f:
    f.write("# Initial condition for " + str(last_day) + "\n")
    parameters_at_final_day.to_csv(f)