# New case studies for Robot Dance paper

In [None]:
import os
import datetime
from importlib import reload
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pylab as plt
from matplotlib import rc
rc("text", usetex=True)
rc("font", family="serif")

import run_robot
import prepare_data
reload(run_robot)

## Define Subnotification factor

Between 21st and 29th of July the city of São Paulo made public the result of a research that [17.9% of its population](https://www1.folha.uol.com.br/equilibrioesaude/2020/08/em-sao-paulo-22-dos-moradores-dos-bairros-mais-pobres-ja-pegaram-coronavirus.shtml) had alredy had Covid-19. Here we use that number to find out a reasonable subnotification factor.

In [None]:
basic_prm = prepare_data.save_basic_parameters(min_level=0.8, rep=2.5, ndays=30)
subnot_factor = 11.6
cities_data = prepare_data.compute_initial_condition_evolve_and_save(basic_prm, "SP", ["SP"], 10000000, subnot_factor, 1, "data/report_covid_with_drs_07_29.csv")
cities_data



Now, we define some important decisions:

* The basic reproduction rate (R0). The original literature and our own estimates suggest 2.5. But this value seems high nowdays when people are wearing masks, have learned stricter hygiene habits (more hand wahing), and do basic social distancing. I am trying now with 1.8.

* Horizon of simulation: we use a little more than one year because after that we should probably have a vacine and the game changes completely.

* Mean stay in ICU: mean time in ICU, it will also be used to select the right time series to estimate the number of ICU needed below. We are using 7.

* Lockdown level: what is the reproduction level achievable by a strict lockdown. We are using 0.8. Should be smaller than 1.

In [None]:
# Define the basic data for the case studies

# Basic reproduction number
basic_rep = 1.8

# Simulation horizon
# A little more than a year when thevaccine should be here
ndays = 14*2*14

# Mean time in ICU
time_icu = 7

# Lockdown level
lock_level = 0.8

# Define basic paramters
basic_prm = prepare_data.save_basic_parameters(min_level=lock_level, rep=basic_rep, time_icu=time_icu, ndays=ndays)

# Compute initial values

# For cities
# cities_data = prepare_data.compute_initial_condition_evolve_and_save(basic_prm, "SP", ["Araçatuba", "São José Do Rio Preto"], 500000, 1)
# cities_data = prepare_data.compute_initial_condition_evolve_and_save(basic_prm, "SP", ["São José Do Rio Preto"], 25000, 6, 1)

# For DRS
cities_data = prepare_data.compute_initial_condition_evolve_and_save(basic_prm, "SP", [], 000000, subnot_factor, 1, "data/report_covid_with_drs_07_01.csv")

# Sub-groups for figures
sp = ["SP"]
sp_so = sp + ["SW"]
masp_names = sp + ["E", "N", "W", "SE", "SW"]

In [None]:
# Create a target matrix (max infected level)
ncities, ndays = len(cities_data.index), int(basic_prm["ndays"])
target = 0.8*np.ones((ncities, ndays))
target = prepare_data.save_target(cities_data, target)

# Use a forcedif that releases the cities in the end
force_dif = np.ones((ncities, ndays))
cities_data

## Add information on the time series that estimate the need of ICUs

We are using the time series adjusted considering that the mean ICU stay is 7 days (which lead to larger ICU capacity).

In [None]:
if basic_prm["time_icu"] == 11:
    # Time series adjusted considering the mean ICU time is 11 days
    ts_sp = np.array([0.0074335, 0.01523406, -0.00186355, 0.0, 1.67356018, -0.68192908, np.sqrt(0.00023883),
        0.007682840158843, 0.007536060983504])
    ts_notsp = np.array([0.00520255, 0.01532709, 0.00044498, 0.0, 1.75553282, -0.76360711, np.sqrt(3.567E-05),
        0.005426447471187, 0.005282217308748])
elif basic_prm["time_icu"] == 7:
    # Time series adjusted considering the mean ICU time is 7 days
    ts_sp = np.array([0.01099859, 0.02236023, 0.00370254, 0.0, 1.79119571, -0.80552926, np.sqrt(0.00034005),
        0.011644768910252, 0.011221496171591])
    ts_notsp = np.array([0.0076481, 0.0218084, 0.00367839, 0.0, 1.81361379, -0.82550856, np.sqrt(8.028E-05),
        0.007907216664912, 0.007721801045322])
else:
    raise NotImplementedError

# Index of the cities that form the Metropolitan area of São Paulo
MASP = np.array([7, 10, 15, 16, 17, 22]) - 1

ts_drs = np.ones((len(cities_data), len(ts_notsp)))
ts_drs *= ts_notsp
ts_drs[MASP, :] = ts_sp
ts_drs = pd.DataFrame(data=ts_drs, index=cities_data.index, columns=[
    "rho_min", "rho_max", "intercept", "trend", "phi_1", "phi_2", "sigma_omega", "state0", "state_less_1"
])
ts_drs["confidence"] = 0.9
ts_drs["time_icu"] = time_icu
cities_data = pd.concat([cities_data, ts_drs], axis=1)
cities_data





In [None]:
pd.set_option("display.width", 120)

# Simple function to run a test and save results
def run_a_test(basic_prm, result_file, figure_file, cities_data, M, target, force_dif, pools=None, verbosity=1):
    run_robot.prepare_optimization(basic_prm, cities_data, M, target, hammer_data, force_dif, pools, verbosity=verbosity)
    run_robot.optimize_and_show_results(basic_prm, figure_file, result_file, cities_data, target, verbosity=verbosity)
    result = pd.read_csv(result_file, index_col=[0, 1])
    run_robot.plot_result(basic_prm, result, figure_file[:-4] + "_sp.png", hammer_data.loc[sp, "duration"].values, 
        cities_data["start_date"][0], sp)
    plt.savefig(figure_file[:-4] + "_sp.png", dpi=150, bbox_inches='tight')
    run_robot.plot_result(basic_prm, result, figure_file[:-4] + "_spso.png", hammer_data.loc[sp_so, "duration"].values, 
        cities_data["start_date"][0], sp_so)
    plt.savefig(figure_file[:-4] + "_sp_so.png", dpi=150, bbox_inches='tight')
    run_robot.plot_result(basic_prm, result, figure_file[:-4] + "_rmsp.png", 
        hammer_data.loc[masp_names, "duration"].values, cities_data["start_date"][0], masp_names)
    plt.savefig(figure_file[:-4] + "_rmsp.png", dpi=150, bbox_inches='tight')


## Case 1: 14 day window, no alternation, no mobility

In [None]:
# Define mobility matrix.
M = prepare_data.convert_mobility_matrix_and_save(cities_data, max_neighbors=0, drs="data/report_drs_mobility.csv")
hammer_data = prepare_data.save_hammer_data(cities_data, 0, basic_prm["min_level"])
run_robot.find_feasible_hammer(basic_prm, cities_data, M, target, hammer_data, out_file=None, 
    incr_all=True, verbosity=1)
M.loc["SP", "SW"], M.loc["SW", "SP"]



In [None]:
%%time
basic_prm["alternate"] = 0.0
result_file = "results/window_14_noalt_nomobility.csv"
figure_file = "results/window_14_noalt_nomobility.png"
run_a_test(basic_prm, result_file, figure_file, cities_data, M, target, force_dif)


## Case 2: 14 day window, no alternation, with mobility

In [None]:
# Define mobility matrix (full connection)
M = prepare_data.convert_mobility_matrix_and_save(cities_data, max_neighbors=22, drs="data/report_drs_mobility.csv")
hammer_data = prepare_data.save_hammer_data(cities_data, 0, basic_prm["min_level"])
run_robot.find_feasible_hammer(basic_prm, cities_data, M, target, hammer_data, out_file=None, 
    incr_all=True, verbosity=1)
M.loc["SP", "SW"], M.loc["SW", "SP"]

In [None]:
%%time
basic_prm["alternate"] = 0.0
result_file = "results/window_14_noalt_withmobility.csv"
figure_file = "results/window_14_noalt_withmobility.png"
run_a_test(basic_prm, result_file, figure_file, cities_data, M, target, force_dif)

## Case 3: 14 day window, with alternation, with mobility

In [None]:
# Start searching for when the "no alternation" solution decided for full opening.
results = pd.read_csv("results/window_14_noalt_withmobility.csv")
results = results[results["Variable"] == "rt"]
results.drop(["Variable"], axis=1, inplace=True)
results.set_index("City", inplace=True)

def find_last_opening(rts, rep):
    """Find the first moment where the decision of the nonalternating solution is
    to fully open the region.
    """
    rts = rts.values.copy()
    rts[rts < 0.95*rep] = 0.0
    return len(rts) - rts[::-1].argmin() + 1

# Turn off alternation after two windows after the time needed for opening.
for i in range(len(results.index)):
    opening = find_last_opening(results.iloc[i,:], basic_prm["rep"])
    force_dif[i, opening + 2*int(basic_prm["window"]):] = 0.0
    

In [None]:
%%time
# Set up alternation weight
basic_prm["alternate"] = 1.0
result_file = "results/window_14_withalt_withmobility.csv"
figure_file = "results/window_14_withalt_withmobility.png"
run_a_test(basic_prm, result_file, figure_file, cities_data, M, target, force_dif)

## Case 4: 14 day window, no alternation, link SP - SW broken

In [None]:
# Define mobility matrix (full connection)
M = prepare_data.convert_mobility_matrix_and_save(cities_data, max_neighbors=22, drs="data/report_drs_mobility.csv")
# Destroy the link between SP and SW
M.loc["SP", "SW"], M.loc["SW", "SP"] = 0, 0
hammer_data = prepare_data.save_hammer_data(cities_data, 0, basic_prm["min_level"])
run_robot.find_feasible_hammer(basic_prm, cities_data, M, target, hammer_data, out_file=None, 
    incr_all=True, verbosity=1)
M.loc["SP", "SW"], M.loc["SW", "SP"]

In [None]:
force_dif =  np.ones((ncities, ndays))
basic_prm["alternate"] = 0.0
result_file = "results/window_14_noalt_withmobility_no_link_sp_sw.csv"
figure_file = "results/window_14_noalt_withmobility_no_link_sp_sw.png"
run_a_test(basic_prm, result_file, figure_file, cities_data, M, target, force_dif)

## Case 5: 14 day window, no alternation, with mobility, ICU shared in metropolitan area from day 1

In [None]:
# Define mobility matrix (full connection)
M = prepare_data.convert_mobility_matrix_and_save(cities_data, max_neighbors=22, drs="data/report_drs_mobility.csv")
hammer_data = prepare_data.save_hammer_data(cities_data, 0, basic_prm["min_level"])
run_robot.find_feasible_hammer(basic_prm, cities_data, M, target, hammer_data, out_file=None, 
    incr_all=True, verbosity=1)
M.loc["SP", "SW"], M.loc["SW", "SP"]

In [None]:
%%time
# Pool with all Sao Paulo metropolitan area
pools = list([[i + 1] for i in range(0, 22) if i not in MASP])
pools.append(list([i + 1 for i in MASP]))

force_dif =  np.ones((ncities, ndays))
basic_prm["alternate"] = 0.0
result_file = "results/window_14_noalt_withmobility_icushared.csv"
figure_file = "results/window_14_noalt_withmobility_icushared.png"
run_a_test(basic_prm, result_file, figure_file, cities_data, M, target, force_dif, pools)

## Some code to check results

In [None]:
# Configuration
pool = MASP
total_duration = int(basic_prm["ndays"])
first_day = 0 #hammer_data.iloc[pool, 0].min()
last_day = total_duration #first_day + 50 + 1
simulation = pd.read_csv("results/window_14_noalt_withmobility_icushared.csv", index_col=[0, 1])

cities_names = cities_data.iloc[pool].index
population = cities_data["population"]
icu_capacity = cities_data["icu_capacity"]
total_icus = np.array([(target.loc[c]*population.loc[c]*icu_capacity.loc[c]).values for c in cities_names]).sum(axis=0)
total_icus = total_icus[first_day:last_day]

# Plot mean 
c = cities_names[0]
icus = simulation.loc[c, "mean_used_icu"]
for c in cities_names[1:]:
    icus += simulation.loc[c, "mean_used_icu"]
plt.plot(icus[first_day:last_day], color="C0", label="ICU occupation")
simuls = {"mean": icus}

# Plot upper bound
c = cities_names[0]
icus = simulation.loc[c, "upper_used_icu"]
for c in cities_names[1:]:
    icus += simulation.loc[c, "upper_used_icu"]
plt.plot(icus[first_day:last_day], label="", color="C0")
simuls["upper"] = icus

# Make random simulations
time_series = run_robot.SimpleTimeSeries(*cities_data.iloc[pool[0], 7:-2])
total_days = 0
bad_days = 0
for i in range(1000):
    total_days += last_day - first_day
    need_icu = time_series.simulate(total_duration, True)
    used_icus = simulation.loc[cities_names[0], "i"]*need_icu*population[cities_names[0]]
    for c in cities_names[1:]:
        # TODO: Delete below
        need_icu = time_series.simulate(total_duration, True)
        used_icus += simulation.loc[c, "i"]*need_icu*population[c]
    used_icus *= basic_prm["time_icu"]/basic_prm["tinf"]
    used_icus = used_icus[first_day:last_day]
    bad_days += (used_icus > total_icus).sum()
    plt.plot(used_icus, label="", alpha=0.025, color="C0")
    simuls[i] = used_icus

print(f"Bad days = {bad_days:d}/{total_days:d} == {bad_days / total_days * 100:f}%")

# Plot results
import matplotlib.pylab as plt
plt.plot(total_icus, color="C3", label="Maximal ICU target")
simuls["target"] = total_icus
start_date = pd.Timestamp(cities_data["start_date"][0]) + first_day*pd.to_timedelta("1D")
ticks = pd.date_range(start_date, start_date + (last_day - first_day)*pd.to_timedelta("1D"), freq="2MS")
ticks = list(ticks)
if ticks[0] <= start_date + pd.to_timedelta("10D"):
    ticks[0] = start_date
else:
    ticks = [start_date] + ticks
plt.gca().set_xticks([(i - start_date).days for i in ticks])
labels = [i.strftime('%m/%Y') for i in ticks]
plt.gca().set_xticklabels(labels, rotation=45, ha='right')
plt.legend()
plt.title("Metropolitan Area of São Paulo - ICU occupation")
plt.savefig("results/icu_usage_with_mobility_sharing.png", dpi=150, bbox_inches='tight')

df = pd.DataFrame(simuls)
df.T.to_csv("results/random_simuls.csv")

In [None]:
# Configuration
def check_icus(basic_prm, cities_data, target, pool=MASP, first_day=0, last_day=-1, simulation_file="results/window_14_noalt_withmobility_icushared.csv", reps=1000):
    total_duration = int(basic_prm["ndays"])
    if last_day < 0:
        last_day = total_duration - int(basic_prm["time_icu"])
    simulation = pd.read_csv(simulation_file, index_col=[0, 1])

    cities_names = cities_data.iloc[pool].index
    population = cities_data["population"]
    icu_capacity = cities_data["icu_capacity"]
    total_icus = np.array([(target.loc[c]*population.loc[c]*icu_capacity.loc[c]).values for c in cities_names]).sum(axis=0)
    total_icus = total_icus[first_day:last_day]

    # Make random simulations
    time_series = run_robot.SimpleTimeSeries(*cities_data.iloc[pool[0], 7:-2])
    total_days = 0
    bad_days = np.zeros(last_day - first_day)
    for i in range(reps):
        total_days += last_day - first_day
        need_icu = time_series.simulate(total_duration, True)
        used_icus = simulation.loc[cities_names[0], "i"]*need_icu*population[cities_names[0]]
        for c in cities_names[1:]:
            # TODO: Delete below
            need_icu = time_series.simulate(total_duration, True)
            used_icus += simulation.loc[c, "i"]*need_icu*population[c]
        used_icus *= basic_prm["time_icu"]/basic_prm["tinf"]
        used_icus = used_icus[first_day:last_day]
        bad_days += used_icus > total_icus

    n_days = last_day - first_day
    mean_violation = bad_days.sum() / total_days
    n_violation = (bad_days > 0).sum()
    mean_pos_violation = bad_days.sum() / (reps*n_violation)
    max_violation = bad_days.max() / reps
    return  n_days, n_violation, mean_violation, mean_pos_violation, max_violation


In [None]:

def quality_table(pools, pool_names, file_name, basic_prm, cities_data, target, hammer_duration, reps=1000):
    quality_bench = {}
    quality_bench["# days"] = []
    quality_bench["Bad days"], quality_bench["Mean violation"] = [], []
    quality_bench["Mean violation in bad days"], quality_bench["Max violation"] = [], []
    for p in pools:
        print(p, end=" ")
        first_day = hammer_duration[p].min()
        n_days, n_v, mean_v, mean_bad_v, max_v = check_icus(
            basic_prm, cities_data, target, pool=p, first_day=first_day,
            simulation_file=file_name, reps=reps)
        quality_bench["# days"].append(n_days)
        quality_bench["Bad days"].append(n_v)
        quality_bench["Mean violation"].append(mean_v)
        quality_bench["Mean violation in bad days"].append(mean_bad_v)
        quality_bench["Max violation"].append(max_v)

    print()
    return pd.DataFrame(quality_bench, index=pool_names)



In [None]:
suburbs = list(MASP).copy()
suburbs.remove(15)
pools = list([[i] for i in range(22) if i not in MASP])
cities_names = list(cities_data.index[[p[0] for p in pools]])
pools += [list(MASP), suburbs, [15]]
cities_names += ["MASP", "Suburbs", "São Paulo city"]

qt1 = quality_table(pools, cities_names, "results/window_14_noalt_withmobility.csv", 
    basic_prm, cities_data, target, hammer_data["duration"].values, 10000)
qt1.to_csv("results/icu_quality_icu_not_shared.csv")
qt1

In [None]:
qt2 = quality_table(pools, cities_names, 
    "results/window_14_noalt_withmobility_icushared.csv", basic_prm, cities_data, target, hammer_data["duration"], 10000)
qt2.to_csv("results/icu_quality_icu_shared.csv")
qt2

## Scratch area, you can ignore

In [None]:
# c = MASP[0]
# time_series_data = cities_data.iloc[c, 7:-2]
# confidence = cities_data.iloc[c, -2]
# need_icu, upper_bound = get_rho_icu(ndays, time_series_data, confidence, True)
# plt.plot(need_icu, color="C1")
# plt.plot(upper_bound, color="C1")

# #plt.plot(real_data_sp,color="C3")
# time_series = run_robot.SimpleTimeSeries(*time_series_data)
# for i in range(100):
#     time_series.reset()
#     random_traj = [time_series.iterate(random=True) for i in range(ndays)]
#     plt.plot(random_traj, color="C1", alpha=0.1)

In [None]:
# e_icu_interior, upper_interior = get_rho_icu(ndays, ts_notsp_7, 0.9, True)
# e_icu_masp, upper_masp = get_rho_icu(ndays, ts_sp_7, 0.9, True)
# pre_df = {
#     "SP rho_icu": e_icu_masp, "SP upper bound": upper_masp, 
#     "Interior rho_icu": e_icu_interior, "Interior upper bound": upper_interior
# }
# df = pd.DataFrame(pre_df)
# df.to_csv("rho_icu.csv")
# df
