In [23]:
import sys
sys.path.append('./gustavos_code/initial')
import seir
import pandas as pd
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
%matplotlib inline

# To use PyJulia
from julia.api import Julia
jl = Julia(compiled_modules=False)
from julia import Main as Julia
Julia.eval('include("la_dança_1.jl")')

<PyCall.jlwrap fit_initial>

In [24]:
# Configuration
tinc = 2.9
tinf = 5.2
covid_window = int(round(tinc*tinf))
min_days = 5

In [25]:
# Read data and define what are the cities of interest
covid_data = pd.read_csv("/home/pjssilva/nuvem/unicamp/compartilhados/ICMCxCovid/Data/covid_with_cities.csv")
covid_data = covid_data[covid_data["state"] == "SP"]
large_cities = covid_data[covid_data["estimated_population_2019"] > 100000]["city"].unique()

In [26]:
def initial_conditions(city, covid_data, covid_window, min_days, Julia):
    # Gets the city data
    city_data = covid_data[covid_data["city"] == city].copy()
    city_data.reset_index(inplace=True)
    city_data.sort_values(by=["date"], inplace=True)
    population = city_data["estimated_population_2019"].iloc[0]
    confirmed = city_data["confirmed"]

    # I am computing the new cases instead of using the new_confirmed column because
    # there is error at least in the first element for São Paulo. It should be 1.
    new_cases = confirmed.values[1:] - confirmed.values[:-1]
    new_cases = np.append(confirmed[0], new_cases)
    city_data["new_cases"] = new_cases
    
    observed_I = city_data["new_cases"].rolling(covid_window).sum()
    observed_I[:covid_window] = confirmed[:covid_window]
    if len(observed_I) >= min_days:
        observed_I /= population
        Julia.observed_I = observed_I.values
        Julia.eval('initialc = fit_initial(observed_I)')
        S0 = Julia.initialc[0]
        E0 = Julia.initialc[1]
        I0 = Julia.initialc[2]
        R0 = Julia.initialc[3]
        return (S0, E0, I0, R0), observed_I
    else:
        raise ValueError("Not enough data for %s only %d days available" % (city, len(observed_I)))   

In [27]:
%%time

parameters = {}
ignored = []

n_cities = len(large_cities)
for i in range(n_cities):
    c = large_cities[i]
    print("%d/%d" %(i + 1, n_cities), c)
    try:
        parameters[c], observed_I = initial_conditions(c, covid_data, covid_window, min_days, Julia)
    except ValueError:
        print("Ignoring ", c, "not enough data.")
        ignored.append(c)

1/57 Americana
2/57 Araçatuba
3/57 Araraquara
4/57 Araras
5/57 Barretos
6/57 Barueri
7/57 Bauru
8/57 Birigui
9/57 Botucatu
10/57 Bragança Paulista
11/57 Campinas
12/57 Carapicuíba
13/57 Catanduva
14/57 Cotia
15/57 Diadema
16/57 Franca
17/57 Francisco Morato
18/57 Guaratinguetá
19/57 Guarujá
20/57 Guarulhos
21/57 Hortolândia
22/57 Indaiatuba
23/57 Itapetininga
24/57 Itapevi
25/57 Itaquaquecetuba
26/57 Itu
27/57 Jacareí
28/57 Jandira
29/57 Jaú
30/57 Jundiaí
31/57 Limeira
32/57 Marília
33/57 Mauá
34/57 Mogi Guaçu
35/57 Osasco
36/57 Ourinhos
37/57 Pindamonhangaba
38/57 Piracicaba
39/57 Poá
40/57 Praia Grande
41/57 Presidente Prudente
42/57 Ribeirão Pires
43/57 Ribeirão Preto
44/57 Rio Claro
45/57 Salto
46/57 Santo André
47/57 Santos
48/57 São Carlos
49/57 São Paulo
50/57 São Vicente
51/57 Sertãozinho
52/57 Sorocaba
53/57 Sumaré
54/57 Suzano
55/57 Taubaté
56/57 Várzea Paulista
57/57 Votorantim
CPU times: user 7.91 s, sys: 133 ms, total: 8.05 s
Wall time: 8.17 s


In [28]:
%%time 

def error(x, y):
    return la.norm(x - y)**2

c = "São Paulo"

def simulate(c, covid_data, covid_window, min_days):
    """Simulate from the computed initial parameters until the last day.
    """

    city_data = covid_data[covid_data["city"] == c].copy()
    city_data.reset_index(inplace=True)
    city_data.sort_values(by=["date"], inplace=True)
    population = city_data["estimated_population_2019"].iloc[0]
    confirmed = city_data["confirmed"]

    # I am computing the new cases instead of using the new_confirmed column because
    # there is error at least in the first element for São Paulo. It should be 1.
    new_cases = confirmed.values[1:] - confirmed.values[:-1]
    new_cases = np.append(confirmed[0], new_cases)
    city_data["new_cases"] = new_cases

    observed_I = city_data["new_cases"].rolling(covid_window).sum()
    observed_I[:covid_window] = confirmed[:covid_window]
    observed_I /= population

    ndays = len(observed_I)
    S0, E0, I0, R0 = parameters[c]
    covid = seir.seir(ndays)
    last_day = city_data["date"].iloc[-1]
    print("Simulating", c, "until", last_day)
    result = covid.run((S0,E0,I0,R0))
    return result[:, -1], last_day

    # plt.plot(np.arange(ndays), population*result[2,:], label='seir - Julia')
    # plt.plot(np.arange(ndays), population*observed_I.values, label=c)
    # plt.legend()
    # print("Error =", error(result[2,:], observed_I))
    # print("Feasibility error =", np.abs(1.0 - np.array((S0, E0, I0, R0)).sum()))
    

parameters_at_final_day = {}
for c in large_cities:
    parameters_at_final_day[c], last_day = simulate(c, covid_data, covid_window, min_days)

Simulating Americana until 2020-04-19
Simulating Araçatuba until 2020-04-19
Simulating Araraquara until 2020-04-19
Simulating Araras until 2020-04-19
Simulating Barretos until 2020-04-19
Simulating Barueri until 2020-04-19
Simulating Bauru until 2020-04-19
Simulating Birigui until 2020-04-19
Simulating Botucatu until 2020-04-19
Simulating Bragança Paulista until 2020-04-19
Simulating Campinas until 2020-04-19
Simulating Carapicuíba until 2020-04-19
Simulating Catanduva until 2020-04-19
Simulating Cotia until 2020-04-19
Simulating Diadema until 2020-04-19
Simulating Franca until 2020-04-19
Simulating Francisco Morato until 2020-04-19
Simulating Guaratinguetá until 2020-04-19
Simulating Guarujá until 2020-04-19
Simulating Guarulhos until 2020-04-19
Simulating Hortolândia until 2020-04-19
Simulating Indaiatuba until 2020-04-19
Simulating Itapetininga until 2020-04-19
Simulating Itapevi until 2020-04-19
Simulating Itaquaquecetuba until 2020-04-19
Simulating Itu until 2020-04-19
Simulating 

In [29]:
parameters_at_final_day = pd.DataFrame.from_dict(parameters_at_final_day, orient="index", columns=["S0", "E0", "I0", "R0"])

In [30]:
with open('data/initial_values.csv', 'w') as f:
    f.write("# Initial condition for " + str(last_day) + "\n")
    parameters_at_final_day.to_csv(f)