# Setup

First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "end_to_end_project"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

# Get the data

In [2]:
import os
import tarfile
from six.moves import urllib

DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml/master/"
HOUSING_PATH = os.path.join("datasets", "housing")
HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz"

def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
    os.makedirs(housing_path, exist_ok=True)
    tgz_path = os.path.join(housing_path, "housing.tgz")
    urllib.request.urlretrieve(housing_url, tgz_path)
    housing_tgz = tarfile.open(tgz_path)
    housing_tgz.extractall(path=housing_path)
    housing_tgz.close()

In [3]:
fetch_housing_data()

In [4]:
import pandas as pd
pd.set_option('display.max_rows', 500)

#def load_housing_data(housing_path=HOUSING_PATH):
#    csv_path = os.path.join(housing_path, "housing.csv")
#    return pd.read_csv(csv_path)

DATASET_PATH = "datasets/uci/"
def load_uci_data(path):
    csv_path = os.path.join(DATASET_PATH, path)
    return pd.read_csv(csv_path)

In [5]:
#housing = load_housing_data()
#housing.head()

#Otros candidatos a agregar:

# Producto 11: Incidencia por region
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto11

# Producto 26: Casos nuevos con sintomas por region
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto26

# Producto 27: Casos nuevos sin sintomas por region
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto27

# Producto 38: Casos fallecidos por comuna (usar CasosFallecidosPorComuna_std)
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto38

# -------------------------------------------------------------------------------

# Producto 8: Pacientes COVID UCI (RM)
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto8
uci_region = load_uci_data("UCI_T.csv")

# Producto 19: Casos activos (RM)
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto19
casos_activos = load_uci_data("ActivosComuna_std.csv") #Nuevo

# Producto 20: Número de ventiladores
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto20
vent_disp = load_uci_data("NumeroVentiladores_T.csv")

# Producto 23: Pacientes críticos
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto23
uci_covid_nacional = load_uci_data("PacientesCriticos_T.csv")

# Producto 24: Camas Hospital Diario
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto24
camas = load_uci_data("CamasHospital_Diario_T.csv")

# Producto 35: Comorbilidad
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto35
comorb_nacional = load_uci_data("Comorbilidad_T.csv")

# Producto 37: Defunciones
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto37
defunc_nacional = load_uci_data("Defunciones_T.csv")

# Productos 38: Casos fallecidos por comuna
#https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto38
defunc_regional = load_uci_data("CasosFallecidosComuna_std.csv")   

# Producto 44: Evolución semanal de egresos hospitalarios pacientes COVID-19
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto44
egresos_nacional = load_uci_data("Egresos_std.csv") #Nuevo

# Producto 52: Camas Hospital (RM)
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto52
camas_region = load_uci_data("CamasHospital_Regional_std.csv")

# Producto 74: Paso a paso
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto74
paso_a_paso = load_uci_data("paso_a_paso_T.csv")

# Producto 76: Avance en Campaña de Vacunación COVID-19
# https://github.com/MinCiencia/Datos-COVID19/tree/master/output/producto76
Vacunacion = pd.read_csv(os.path.join("datasets/uci/", "vacunacion_t.txt"), sep=",", header=None)

## Limpieza de datos

In [7]:
from statistics import mode

camas_region = camas_region[camas_region["Region"].str.match("Metropolitana")]
camas_region = camas_region.groupby(["Fecha", "Serie"])["Fecha", "Serie", "Casos"].sum().reset_index()
camas_region = camas_region.pivot_table(index=['Fecha'], columns=['Serie'], values='Casos').fillna(0)
#camas_region.set_index(camas_region["Fecha"], inplace=True)
#camas_region.drop(axis=1, labels="Fecha", inplace=True)
camas_region.drop(axis=1, labels="Camas base (2019)", inplace=True)

camas_region.index.name=None

#Tenemos tambien las camas nacionales, pero es preferible usar las regionales
camas.columns = ['Fecha', 'Camas basicas nac.', 'Camas medias nac.', 'Camas UTI nac.', 'Camas UCI nac.']
camas.set_index(camas["Fecha"], drop=True, inplace=True)
camas.drop(axis=1, labels=["Fecha"], inplace=True)
camas.index.name = None


comorb_nacional.drop(comorb_nacional.columns[[*range(1,12)]], axis=1, inplace=True)
comorb_nacional.columns = ['Comorbilidad', 'Hipertensión arterial', 'Diabetes', 'Obesidad', 'Asma',
       'Enfermedad cardiovascular', 'Enfermedad pulmonar crónica',
       'Cardiopatía crónica', 'Enfermedad renal crónica',
       'Enfermedad neurológica crónica', 'Inmunocomprometido',
       'Enfermedad hepática crónica']
comorb_nacional.drop([0],axis=0, inplace=True)
comorb_nacional.set_index(comorb_nacional["Comorbilidad"], drop=True, inplace=True)
comorb_nacional.drop(axis=1, labels=["Comorbilidad"], inplace=True)
comorb_nacional.index.name = None

defunc_nacional = defunc_nacional[["Fecha","Defunciones_2020-07-16"]]
defunc_nacional.columns = ["Fecha", "Defunciones nacional"]
defunc_nacional.set_index(defunc_nacional["Fecha"], inplace=True)
defunc_nacional.drop(axis=1, labels="Fecha", inplace=True)
defunc_nacional.index.name = None

defunc_regional = defunc_regional[4:]
defunc_regional.set_index(defunc_regional["Region"], inplace=True)
defunc_regional = defunc_regional.iloc[:,95:148]
defunc_regional = defunc_regional.apply(pd.to_numeric)
defunc_regional["Defunciones regional"] = defunc_regional.sum(axis=1,numeric_only=True)
defunc_regional.index.name = None
defunc_regional = defunc_regional["Defunciones regional"]

vent_disp.columns = ["Fecha", "Total vent.", "Vent. ocupados", "Vent. disponibles"]
vent_disp.set_index(vent_disp["Fecha"], inplace=True)
vent_disp.drop(axis=1, labels="Fecha", inplace=True)
vent_disp.index.name = None

uci_covid_nacional.columns = ["Fecha", "Pac. criticos nacional"]
uci_covid_nacional.set_index(uci_covid_nacional["Fecha"], inplace=True)
uci_covid_nacional.drop(axis=1, labels="Fecha", inplace=True)
uci_covid_nacional.index.name = None

uci_region = uci_region[["Region", "Metropolitana"]]
uci_region.columns = ["Fecha", "Casos diarios region"]
uci_region.drop([0,1],axis=0, inplace=True)
uci_region.set_index(uci_region["Fecha"], inplace=True)
uci_region.drop(axis=1, labels="Fecha", inplace=True)
uci_region.index.name = None

#casos_activos = casos_activos.rename(columns = {'Casos activos':'Casos activos region'})
casos_activos = casos_activos.loc[:,["Region", "Fecha", "Casos activos"]]
casos_activos.columns = ["Region", "Fecha", "Casos activos region"]
casos_activos = casos_activos[casos_activos["Region"].str.match("Metropolitana")]
casos_activos = casos_activos.groupby("Fecha")["Fecha", "Casos activos region"].sum().reset_index()
casos_activos.set_index(casos_activos["Fecha"], inplace=True)
casos_activos.drop(axis=1, labels="Fecha", inplace=True)
casos_activos.index.name=None

#paso_a_paso = paso_a_paso.iloc[4:,:]
#paso_a_paso.set_index(paso_a_paso["codigo_region"], drop=True, inplace=True)
#paso_a_paso.drop(axis=1, labels=["codigo_region"], inplace=True)
#paso_a_paso.index.name = None
#moda = [mode(paso_a_paso.iloc[i,:]) for i in range(paso_a_paso.shape[0])]
#moda = np.asarray(moda)
#paso_a_paso["Paso a paso"] = moda
#paso_a_paso = pd.to_numeric(paso_a_paso.iloc[:,-1], downcast='float')

# Este requiere tratamiento
#egresos_nacional = egresos_nacional[["Fecha Publicación", "Egresos"]]
#egresos_nacional.set_index(egresos_nacional["Fecha Publicación"],inplace=True)
#egresos_nacional.drop(axis=1, labels="Fecha Publicación", inplace=True)
#egresos_nacional.index.name = None



#camas.set_index(camas["Tipo de cama"], drop=True)
#amas.drop(axis=1, labels=["Tipo de cama"])

#amas.set_index(camas["Tipo de cama"], drop=True)
#amas.drop(axis=1, labels=["Tipo de cama"])

KeyboardInterrupt: 

In [None]:
#ESTE
paso_a_paso = paso_a_paso.drop([1,2,3])
paso_a_paso

In [None]:
#ESTE

paso_a_paso.set_index("codigo_region", drop=True, inplace=True)
paso_a_paso.index.name = "Fecha"

#paso_a_paso1.drop(["codigo_region"], axis=1)
paso_a_paso

In [None]:
#ESTE
i=0
modaArica = []
modaTarapaca = []
modaAntofagasta = []
modaAtacama = []
modaCoquimbo = []
modaValparaiso = []
modaMetropolitana = []
modaOHiggins = []
modaMaule = []
modaNuble = []
modaBiobio = []
modaAraucania = []
modaLosRios = []
modaLosLagos = []
modaAysen = []
modaMagallanes = []

while(i<len(paso_a_paso)):
    modaArica.append(mode(paso_a_paso[["15", "15.1", "15.2", "15.3", "15.4"]].iloc[i,:]))
    modaTarapaca.append(mode(paso_a_paso[["1", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7"]].iloc[i,:]))
    modaAntofagasta.append(mode(paso_a_paso[["2","2.1","2.2","2.3","2.4","2.5","2.6","2.7","2.8","2.9","2.10","2.11"]].iloc[i,:]))
    modaAtacama.append(mode(paso_a_paso[["3","3.1","3.2","3.3","3.4","3.5","3.6","3.7","3.8"]].iloc[i,:]))
    modaCoquimbo.append(mode(paso_a_paso[["4","4.1","4.2","4.3","4.4","4.5","4.6","4.7","4.8","4.9","4.10","4.11","4.12","4.13","4.14"]].iloc[i,:]))
    modaValparaiso.append(mode(paso_a_paso[["5","5.1","5.2","5.3","5.4","5.5","5.6","5.7","5.8","5.9","5.10","5.11","5.12","5.13","5.14","5.15","5.16","5.17","5.18","5.19","5.20","5.21","5.22","5.23","5.24","5.25","5.26","5.27","5.28","5.29","5.30","5.31","5.32","5.33","5.34","5.35","5.36","5.37"]].iloc[i,:]))
    modaMetropolitana.append(mode(paso_a_paso[["13","13.1","13.2","13.3","13.4","13.5","13.6","13.7","13.8","13.9","13.10","13.11","13.12","13.13","13.14","13.15","13.16","13.17","13.18","13.19","13.20","13.21","13.22","13.23","13.24","13.25","13.26","13.27","13.28","13.29","13.30","13.31","13.32","13.33","13.34","13.35","13.36","13.37","13.38","13.39","13.40","13.41","13.42","13.43","13.44","13.45","13.46","13.47","13.48","13.49","13.50","13.51","13.52","13.53","13.54"]].iloc[i,:]))
    modaOHiggins.append(mode(paso_a_paso[["6","6.1","6.2","6.3","6.4","6.5","6.6","6.7","6.8","6.9","6.10","6.11","6.12","6.13","6.14","6.15","6.16","6.17","6.18","6.19","6.20","6.21","6.22","6.23","6.24","6.25","6.26","6.27","6.28","6.29","6.30","6.31","6.32"]].iloc[i,:]))
    modaMaule.append(mode(paso_a_paso[["7","7.1","7.2","7.3","7.4","7.5","7.6","7.7","7.8","7.9","7.10","7.11","7.12","7.13","7.14","7.15","7.16","7.17","7.18","7.19","7.20","7.21","7.22","7.23","7.24","7.25","7.26","7.27","7.28","7.29","7.30","7.31","7.32","7.33"]].iloc[i,:]))
    modaNuble.append(mode(paso_a_paso[["16","16.1","16.2","16.3","16.4","16.5","16.6","16.7","16.8","16.9","16.10","16.11","16.12","16.13","16.14","16.15","16.16","16.17","16.18","16.19","16.20","16.21","16.22"]].iloc[i,:]))
    modaBiobio.append(mode(paso_a_paso[["8","8.1","8.2","8.3","8.4","8.5","8.6","8.7","8.8","8.9","8.10","8.11","8.12","8.13","8.14","8.15","8.16","8.17","8.18","8.19","8.20","8.21","8.22","8.23","8.24","8.25","8.26","8.27","8.28","8.29","8.30","8.31","8.32","8.33","8.34"]].iloc[i,:]))
    modaAraucania.append(mode(paso_a_paso[["9","9.1","9.2","9.3","9.4","9.5","9.6","9.7","9.8","9.9","9.10","9.11","9.12","9.13","9.14","9.15","9.16","9.17","9.18","9.19","9.20","9.21","9.22","9.23","9.24","9.25","9.26","9.27","9.28","9.29","9.30","9.31","9.32","9.33","9.34","9.35"]].iloc[i,:]))
    modaLosRios.append(mode(paso_a_paso[["14","14.1","14.2","14.3","14.4","14.5","14.6","14.7","14.8","14.9","14.10","14.11","14.12"]].iloc[i,:]))
    modaLosLagos.append(mode(paso_a_paso[["10","10.1","10.2","10.3","10.4","10.5","10.6","10.7","10.8","10.9","10.10","10.11","10.12","10.13","10.14","10.15","10.16","10.17","10.18","10.19","10.20","10.21","10.22","10.23","10.24","10.25","10.26","10.27","10.28","10.29","10.30","10.31","10.32","10.33","10.34","10.35","10.36","10.37"]].iloc[i,:]))
    modaAysen.append(mode(paso_a_paso[["11","11.1","11.2","11.3","11.4","11.5","11.6","11.7","11.8","11.9","11.10","11.11","11.12","11.13","11.14","11.15","11.16","11.17","11.18","11.19"]].iloc[i,:]))
    modaMagallanes.append(mode(paso_a_paso[["12","12.1","12.2","12.3","12.4","12.5","12.6","12.7","12.8","12.9","12.10","12.11"]].iloc[i,:]))
    i = i+1   

paso_a_paso["Arica y Parinacota"] = modaArica
paso_a_paso["Tarapacá"] = modaTarapaca
paso_a_paso["Antofagasta"] = modaAntofagasta
paso_a_paso["Atacama"] = modaAtacama
paso_a_paso["Coquimbo"] = modaCoquimbo
paso_a_paso["Valparaíso"] = modaValparaiso
paso_a_paso["Metropolitana"] = modaMetropolitana
paso_a_paso["O'Higgins"] = modaOHiggins
paso_a_paso["Maule"] = modaMaule
paso_a_paso["Nuble"] = modaNuble
paso_a_paso["Biobío"] = modaBiobio
paso_a_paso["Araucania"] = modaAraucania
paso_a_paso["Los Rios"] = modaLosRios
paso_a_paso["Los Lagos"] = modaLosLagos
paso_a_paso["Aysén"] = modaAysen
paso_a_paso["Magallanes"] = modaMagallanes




In [None]:
#ESTE
paso_a_paso = paso_a_paso.drop(columns=["15", "15.1", "15.2", "15.3", "15.4",
                            "1", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7",
                            "2","2.1","2.2","2.3","2.4","2.5","2.6","2.7","2.8","2.9","2.10","2.11",
                            "3","3.1","3.2","3.3","3.4","3.5","3.6","3.7","3.8",
                            "4","4.1","4.2","4.3","4.4","4.5","4.6","4.7","4.8","4.9","4.10","4.11","4.12","4.13","4.14",
                            "5","5.1","5.2","5.3","5.4","5.5","5.6","5.7","5.8","5.9","5.10","5.11","5.12","5.13","5.14","5.15","5.16","5.17","5.18","5.19","5.20","5.21","5.22","5.23","5.24","5.25","5.26","5.27","5.28","5.29","5.30","5.31","5.32","5.33","5.34","5.35","5.36","5.37",
                            "13","13.1","13.2","13.3","13.4","13.5","13.6","13.7","13.8","13.9","13.10","13.11","13.12","13.13","13.14","13.15","13.16","13.17","13.18","13.19","13.20","13.21","13.22","13.23","13.24","13.25","13.26","13.27","13.28","13.29","13.30","13.31","13.32","13.33","13.34","13.35","13.36","13.37","13.38","13.39","13.40","13.41","13.42","13.43","13.44","13.45","13.46","13.47","13.48","13.49","13.50","13.51","13.52","13.53","13.54",
                            "6","6.1","6.2","6.3","6.4","6.5","6.6","6.7","6.8","6.9","6.10","6.11","6.12","6.13","6.14","6.15","6.16","6.17","6.18","6.19","6.20","6.21","6.22","6.23","6.24","6.25","6.26","6.27","6.28","6.29","6.30","6.31","6.32",
                            "7","7.1","7.2","7.3","7.4","7.5","7.6","7.7","7.8","7.9","7.10","7.11","7.12","7.13","7.14","7.15","7.16","7.17","7.18","7.19","7.20","7.21","7.22","7.23","7.24","7.25","7.26","7.27","7.28","7.29","7.30","7.31","7.32","7.33",
                            "16","16.1","16.2","16.3","16.4","16.5","16.6","16.7","16.8","16.9","16.10","16.11","16.12","16.13","16.14","16.15","16.16","16.17","16.18","16.19","16.20","16.21","16.22",
                            "8","8.1","8.2","8.3","8.4","8.5","8.6","8.7","8.8","8.9","8.10","8.11","8.12","8.13","8.14","8.15","8.16","8.17","8.18","8.19","8.20","8.21","8.22","8.23","8.24","8.25","8.26","8.27","8.28","8.29","8.30","8.31","8.32","8.33","8.34",
                            "9","9.1","9.2","9.3","9.4","9.5","9.6","9.7","9.8","9.9","9.10","9.11","9.12","9.13","9.14","9.15","9.16","9.17","9.18","9.19","9.20","9.21","9.22","9.23","9.24","9.25","9.26","9.27","9.28","9.29","9.30","9.31","9.32","9.33","9.34","9.35",
                            "14","14.1","14.2","14.3","14.4","14.5","14.6","14.7","14.8","14.9","14.10","14.11","14.12",
                            "10","10.1","10.2","10.3","10.4","10.5","10.6","10.7","10.8","10.9","10.10","10.11","10.12","10.13","10.14","10.15","10.16","10.17","10.18","10.19","10.20","10.21","10.22","10.23","10.24","10.25","10.26","10.27","10.28","10.29","10.30","10.31","10.32","10.33","10.34","10.35","10.36","10.37",
                            "11","11.1","11.2","11.3","11.4","11.5","11.6","11.7","11.8","11.9","11.10","11.11","11.12","11.13","11.14","11.15","11.16","11.17","11.18","11.19",
                            "12","12.1","12.2","12.3","12.4","12.5","12.6","12.7","12.8","12.9","12.10","12.11"]) 

In [None]:
#ESTE
paso_a_paso = paso_a_paso.drop(["region_residencia"])

#paso_a_paso

In [None]:
#ESTE
i=0
j=0

while(i < len(paso_a_paso)):
    while(j<len(paso_a_paso.columns)):
        if(paso_a_paso.iloc[i,j] == '1'):
             paso_a_paso.iloc[i,j] = 0.98
        if(paso_a_paso.iloc[i,j] == '2'):
            paso_a_paso.iloc[i,j] = 0.45
        if(paso_a_paso.iloc[i,j] == '3'):
            paso_a_paso.iloc[i,j] = 0.25
        if(paso_a_paso.iloc[i,j] == '4'):
            paso_a_paso.iloc[i,j] = 0.25
        if(paso_a_paso.iloc[i,j] == '5'):
            paso_a_paso.iloc[i,j] = 0.25
        j=j+1
    i=i+1
    j=0

paso_a_paso

In [None]:
#ESTE
paso_a_paso["Promedio nacional de encierro"] = paso_a_paso.mean(axis=1)
#while(i<len(paso_a_paso)):

In [None]:
#ESTE
paso_a_paso = paso_a_paso[["Promedio nacional de encierro"]]


In [None]:
paso_a_paso

In [None]:
# Ajuste de datos de vacunacion
Vacunacion = Vacunacion[[0, 1, 2, 3]]
Vacunacion.columns = ["Fecha", "Primera_V", "Segunda_V", "V_Unica"]
Vacunacion = Vacunacion.drop([0,1])
Vacunacion.set_index("Fecha", drop=True, inplace=True)

i=0
anterior = 0
tmp_list = []
while(i < len(Vacunacion)):
    tmp_list.append(int(float(Vacunacion["Primera_V"][i])) - anterior)
    anterior = int(float(Vacunacion["Primera_V"][i]))
    i=i+1

Vacunacion["Primera_V_Daily"] = tmp_list

i=0
anterior = 0
tmp_list = []
while(i < len(Vacunacion)):
    tmp_list.append(int(float(Vacunacion["Segunda_V"][i])) - anterior)
    anterior = int(float(Vacunacion["Segunda_V"][i]))
    i=i+1
    
Vacunacion["Segunda_V_Daily"] = tmp_list

i=0
anterior = 0
tmp_list = []
while(i < len(Vacunacion)):
    tmp_list.append(int(float(Vacunacion["V_Unica"][i])) - anterior)
    anterior = int(float(Vacunacion["V_Unica"][i]))
    i=i+1
    
Vacunacion["V_Unica_Daily"] = tmp_list
Vacunacion = Vacunacion[["Primera_V_Daily", "Segunda_V_Daily", "V_Unica_Daily"]]
Vacunacion.columns = ["Primera_V", "Segunda_V", "V_Unica"]

In [None]:
comorb_nacional.columns

In [None]:
#ESTE
data = pd.merge(camas, comorb_nacional, how='outer',left_index=True, right_index=True)
data = pd.merge(data, camas_region, how='outer', left_index=True, right_index=True)
data = pd.merge(data, defunc_nacional,how='outer',left_index=True, right_index=True)
data = pd.merge(data, defunc_regional,how='outer',left_index=True, right_index=True)
data = pd.merge(data, vent_disp,how='outer', left_index=True, right_index=True)
data = pd.merge(data, uci_covid_nacional,how='outer', left_index=True, right_index=True)
data = pd.merge(data, uci_region,how='outer', left_index=True, right_index=True)
data = pd.merge(data, casos_activos,how='outer', left_index=True, right_index=True)
data = pd.merge(data, paso_a_paso,how='outer', left_index=True, right_index=True)
data = pd.merge(data, Vacunacion,how='outer', left_index=True, right_index=True)

data = data.loc['2020-04-14':]
data.index = pd.to_datetime(data.index)
for col in data:
    data[col] = pd.to_numeric(data[col], errors='coerce')

In [None]:
data

In [None]:
#ESTE
i=0
while(i < len(data)):
    if(data[["Promedio nacional de encierro"]].isnull().iloc[i,0]):
        data["Promedio nacional de encierro"][i] = 0
    i=i+1

In [None]:
#tmp_data = data[["Primera_V", "Segunda_V", "V_Unica"]].loc[data["Primera_V"].isnull()].replace(to_replace = np.nan, value = 0, inplace = True)
#tmp_data.append(data[["Primera_V", "Segunda_V", "V_Unica"]].loc[not data["Primera_V"].isnull()])
#data

#data[["Primera_V", "Segunda_V", "V_Unica"]][~data["Primera_V"].isnull()]
i=0
while(i < len(data)):
    if(data[["Primera_V"]].isnull().iloc[i,0]):
        data["Primera_V"][i] = 0
        data["Segunda_V"][i] = 0
        data["V_Unica"][i] = 0
    i = i+1


In [None]:
#uci_region
# VER UMBRAL y proporción de NaNs
#data.columns
data_rows = data.shape[0]
nan_proportions = data.apply(lambda x: x.isnull().sum() / data_rows, axis=0).to_frame()
#nan_proportions
data_non_imputable = nan_proportions.loc[nan_proportions[0] >= 0.15]
data_non_imputable
#type(nan_proportions)

#data1.apply(lambda x: x.interpolate())

# Hacer



In [None]:
inputable = nan_proportions.loc[nan_proportions[0] <= 0.15]
inputable

In [None]:
#data = data.assign(Hipertensión_arterial_interp=data["Hipertensión arterial"].interpolate(method='spline', order=3, inplace=True))

#Columnas con <15% casos faltantes
data["Camas basicas nac."].interpolate(method='spline', order=2, inplace=True)
data["Camas medias nac."].interpolate(method='spline', order=2, inplace=True)
data["Camas UTI nac."].interpolate(method='spline', order=2, inplace=True)
data["Camas UCI nac."].interpolate(method='spline', order=2, inplace=True)
data["Camas UCI habilitadas"].interpolate(method='spline', order=2, inplace=True)
data["Camas UCI ocupadas COVID-19"].interpolate(method='spline', order=2, inplace=True)
data["Camas UCI ocupadas no COVID-19"].interpolate(method='spline', order=2, inplace=True)

#Columnas con >15% casos faltantes
data["Pac. criticos nacional"].rolling(24, min_periods=1,).median()
data["Hipertensión arterial"].interpolate(method='linear', inplace=True)
data["Diabetes"].interpolate(method='linear', inplace=True)
data["Obesidad"].interpolate(method='linear', inplace=True)
data["Asma"].interpolate(method='linear', inplace=True)
data["Enfermedad cardiovascular"].interpolate(method='linear', inplace=True)
data["Enfermedad pulmonar crónica"].interpolate(method='linear', inplace=True)
data["Cardiopatía crónica"].interpolate(method='linear', inplace=True)
data["Enfermedad renal crónica"].interpolate(method='linear', inplace=True)
data["Enfermedad neurológica crónica"].interpolate(method='linear', inplace=True)
data["Inmunocomprometido"].interpolate(method='linear', inplace=True)
data["Enfermedad hepática crónica"].interpolate(method='linear', inplace=True)
data["Defunciones nacional"].rolling(24, min_periods=1,).median()
data["Defunciones regional"].interpolate(method='linear', inplace=True)
data["Casos activos region"].interpolate(method='spline', order=3, inplace=True)

#data["Pac. criticos nacional"].interpolate(method='time', inplace=True)
#data["Hipertensión arterial"].interpolate(method='time', inplace=True)
#data["Diabetes"].interpolate(method='time', inplace=True)
#data["Obesidad"].interpolate(method='time', inplace=True)
#data["Asma"].interpolate(method='time', inplace=True)
#data["Enfermedad cardiovascular"].interpolate(method='time', inplace=True)
#data["Enfermedad pulmonar crónica"].interpolate(method='time', inplace=True)
#data["Cardiopatía crónica"].interpolate(method='time', inplace=True)
#data["Enfermedad renal crónica"].interpolate(method='time', inplace=True)
#data["Enfermedad neurológica crónica"].interpolate(method='time', inplace=True)
#data["Inmunocomprometido"].interpolate(method='time', inplace=True)
#data["Enfermedad hepática crónica"].interpolate(method='time', inplace=True)
#data["Defunciones nacional"].interpolate(method='time', inplace=True)
#data["Casos activos region"].interpolate(method='time', inplace=True)

#for col in list(inputable.index):
#    print(col)
#    data = inputData(col, data)

In [None]:
data.apply(lambda x: x.isnull().sum() / data_rows, axis=0).to_frame()

In [None]:
data.isnull().sum(axis=1)

In [None]:
data = data.loc['2020-06-12':'2021-06-30']

In [None]:
data.isnull().sum(axis=1)

## Visualización de los datos

In [None]:
#ESTE
data.iloc[:,26]

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [15, 10]
plt.rcParams['figure.dpi'] = 200

In [None]:
fig, ax1 = plt.subplots(1, 1)
ax1.plot(data.iloc[:,[27, 28, 29]])
ax1.legend(('Primera dosis', 'Segunda Dosis', 'Dosis unica'), fontsize=15 , loc=4)
plt.title("Vacunas", fontsize=30)
plt.xlabel("Fecha", fontsize=16)
plt.ylabel("Cantidad de vacunados", fontsize=16)

In [None]:
fig, ax1 = plt.subplots(1, 1)
ax1.plot(data.iloc[:,[0,1,2,3]])
ax1.legend(('Camas basicas nac.', 'Camas medias nac.', 'Camas UTI nac.',
       'Camas UCI nac.'), fontsize=15 , loc=4)
plt.title("Camas nacionales", fontsize=30)
plt.xlabel("Fecha", fontsize=16)
plt.ylabel("Cantidad camas", fontsize=16)

In [None]:
#ESTE
fig, ax2 = plt.subplots(1, 1)
plot = ax2.plot(data.iloc[:,[15,16,17,23]])
ax2.legend(('Camas UCI habilitadas',
       'Camas UCI ocupadas COVID-19', 'Camas UCI ocupadas no COVID-19', 'Pac. críticos nacionales'), fontsize=15)
ax5 = ax2.twinx()
pasos_plot = ax5.plot(data.iloc[:,26], color="red")
ax5.set_yticks([1])
plt.ylabel("Paso a paso (% de encierro)", fontsize=16)
plot = plot + pasos_plot
plt.title("Camas UCI nacional", fontsize=30)
plt.xlabel("Fecha", fontsize=16)
plt.ylabel("Cantidad camas", fontsize=16)

In [None]:
data.iloc[:,26]

In [None]:
fig, ax3 = plt.subplots(1, 1)
ax3.plot(data.iloc[:,[4,5,6,7,8,9,10,11,12,13,14]])
ax3.legend(('Hipertensión arterial', 'Diabetes', 'Obesidad',
       'Asma', 'Enfermedad cardiovascular', 'Enfermedad pulmonar crónica',
       'Cardiopatía crónica', 'Enfermedad renal crónica',
       'Enfermedad neurológica crónica','Inmunocomprometido',
       'Enfermedad hepática crónica'))
plt.yscale('log')
plt.title("Enfermedades", fontsize=30)
plt.xlabel("Fecha", fontsize=16)
plt.ylabel("Personas con comorbilidad", fontsize=16)

In [None]:
#ESTE
fig, ax4 = plt.subplots(1, 1)
ventiladores_plot = ax4.plot(data.iloc[:,[20,21,22]])
#ax4.legend(('Total vent.', 'Vent. ocupados',
       #'Vent. disponibles'), fontsize=15)
plt.title("Ventiladores mecánicos", fontsize=30)
plt.xlabel("Fecha", fontsize=16)
plt.ylabel("Cantidad ventiladores", fontsize=16)
ax5 = ax4.twinx()
pasos_plot = ax5.plot(data.iloc[:,26], color="red")
ax5.set_yticks([1])
plt.ylabel("Paso a paso (% de encierro)", fontsize=16)
#ax5.set_ytickslabels(("Paso 1", "Paso 2", "Paso 3", "Paso 4"))
plot = ventiladores_plot + pasos_plot
ax4.legend(plot, ["Total vent.", "Vent. ocupados", "Vent. disponibles", "Paso a paso"], fontsize=16)

In [None]:
fig, ax5 = plt.subplots(1, 1)
diarios_plot = ax5.plot(data.iloc[:,24])
plt.title("Casos COVID-19 Región Metropolitana", fontsize=30)
plt.xlabel("Fecha", fontsize=16)
plt.ylabel("Casos diarios", fontsize=16)
ax6 = ax5.twinx()
activos_plot = ax6.plot(data.iloc[:,25], color="green")
plot = diarios_plot + activos_plot
ax5.legend(plot, ["Casos diario región", "Casos activos región"], fontsize=15, loc=4)
plt.ylabel("Casos activos", fontsize=16)

In [None]:
fig, ax7 = plt.subplots(1, 1)
defunciones_plot = ax7.plot(data.iloc[:,[18,19]])
#ax7.legend(('Defunciones', 'Vent. ocupados'), fontsize=15)
plt.title("Ventiladores ocupados en base a defunciones", fontsize=30)
plt.xlabel("Fecha", fontsize=16)
plt.ylabel("Cantidad defunciones", fontsize=16)
ax8 = ax7.twinx()
ventiladores_plot = ax8.plot(data.iloc[:,20], color="green")
plot = defunciones_plot + ventiladores_plot
ax8.legend(plot, ["Defunciones nacionales", "Defunciones regionales", "Ventiladores ocupados"], fontsize=15)
plt.ylabel("Cantidad ventiladores", fontsize=16)

## Limpieza de variables
### Se eliminan variables con datos faltantes incluso después de la interpolación.
Candidatos: 
 - Pacientes críticos
 - Defunciones nacional 

In [None]:
data = data.drop(labels=["Pac. criticos nacional", "Defunciones nacional"], axis=1)

Revisando que no queden datos faltantes

In [None]:
data.isnull().sum(axis=1)

## Visualización y análisis de correlación

In [None]:

corr = data.corr(method="pearson")
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(corr,cmap='coolwarm', vmin=-1, vmax=1)
fig.colorbar(cax)
ticks = np.arange(0,len(data.columns),1)
ax.set_xticks(ticks)
plt.xticks(rotation=90)
ax.set_yticks(ticks)
ax.set_xticklabels(data.columns)
ax.set_yticklabels(data.columns)
plt.show()

In [None]:
corr

## Preparación de los datos

### Pacientes críticos acumulados a diarios

In [None]:
defunc_ix = list(data.columns).index("Defunciones regional")

In [None]:
from sklearn.preprocessing import MinMaxScaler
X = data.copy()
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)
X = pd.DataFrame(X, columns=data.columns, index=data.index)

ind = int(X.shape[0]*0.7)
ind_name = data.index[ind]
ind_name_nxt = data.index[ind+1]
#y_index = list(data.columns).index("Camas UCI ocupadas COVID-19")

y_lab = "Camas UCI nac."

X_train = X.loc[:ind_name, X.columns != y_lab]
X_test = X.loc[ind_name_nxt:, X.columns != y_lab]
y_train = X.loc[:ind_name, "Camas UCI nac."]
y_test = X.loc[ind_name_nxt:, "Camas UCI nac."]

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import FunctionTransformer
#from sklearn.preprocessing import StandardScaler

#defunc_ix, bedrooms_ix, population_ix, household_ix = [
#    list(housing.columns).index(col)
#    for col in ("total_rooms", "total_bedrooms", "population", "households")]
defunc_ix = list(X_train.columns).index("Defunciones regional")
# = data.iloc[:, defunc_ix]
class DataFormatter(BaseEstimator, TransformerMixin):
    def __init__(self, add_bedrooms_per_room = True): # no *args or **kwargs
        self.add_bedrooms_per_room = add_bedrooms_per_room
    def fit(self, X, y=None):
        return self  # nothing else to do
    def acum_to_daily(self, X, y=None):
        temp_defunc = X[:, defunc_ix]
        defunc_diario = [temp_defunc[i] - temp_defunc[i-1] for i in range(X.shape[0]-1,0,-1)]
        defunc_diario = np.flip(defunc_diario).tolist()
        defunc_diario[0] = 0
        defunc_diario.append(temp_defunc[X.shape[0]-1])
        X[:,defunc_ix] = defunc_diario
        return X
        #return np.c_[X, defunc_diario]
    def scale_data(self, X, y=None):
        scaler = MinMaxScaler(feature_range=(0, 1))
        data_scaled = scaler.fit_transform(X)
        return data_scaled

formatter = DataFormatter()


In [None]:
data_prec_train = formatter.acum_to_daily(X_train.values)
data_prec_test = formatter.acum_to_daily(X_test.values)

In [None]:
#data_prec_train_norm = formatter.scale_data(data_prec_train)
#data_prec_test_norm = formatter.scale_data(data_prec_test)

In [None]:
x_train = pd.DataFrame(
    data=data_prec_train,
    columns=list(X_train.columns),
    index=X_train.index)

x_test = pd.DataFrame(
    data=data_prec_test,
    columns=list(X_test.columns),
    index=X_test.index)

In [None]:
x_train.shape

In [None]:
y_train.shape

## Eliminación de variables por importancia

In [None]:
from xgboost import XGBClassifier

model = XGBClassifier()
model.fit(X_train, y_train)
importances = pd.DataFrame(data={
    'Attribute': X_train.columns,
    'Importance': model.feature_importances_
})
importances = importances.sort_values(by='Importance', ascending=False)
importances

In [None]:
#ESTE
x_train2 = x_train.drop(["Segunda_V", "Enfermedad hepática crónica", "Enfermedad neurológica crónica","Enfermedad renal crónica","Cardiopatía crónica","Enfermedad pulmonar crónica","Enfermedad cardiovascular","Asma","Obesidad","Diabetes","V_Unica"], axis=1)
x_test2 = x_test.drop(["Segunda_V", "Enfermedad hepática crónica", "Enfermedad neurológica crónica","Enfermedad renal crónica","Cardiopatía crónica","Enfermedad pulmonar crónica","Enfermedad cardiovascular","Asma","Obesidad","Diabetes","V_Unica"], axis=1)

In [None]:
model.fit(x_train2, y_train)
importances = pd.DataFrame(data={
    'Attribute': x_train2.columns,
    'Importance': model.feature_importances_
})
importances = importances.sort_values(by='Importance', ascending=False)
importances

In [None]:
# Hacer plot de importancia

## Eliminar variables correlacionadas

In [None]:
corr = x_train2.corr(method="pearson")
corr

In [None]:

cor_matrix = corr.abs()
upper_tri = cor_matrix.where(np.triu(np.ones(cor_matrix.shape),k=1).astype(np.bool))
#upper_tri.iloc[4,:] > 0.90
#print(upper_tri)
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.90)]
print(to_drop)

#data_dropped = x_train.copy()
#data_dropped = data_dropped.drop(to_drop, axis=1)

In [None]:
#ESTE
i=0
j=0
corrList = []
corr2 = corr.index[np.where((corr.iloc[:,0] >= 0.90) & (corr.iloc[:,0] != 1))]
while(i < len(corr.columns)):
    while(j < len(corr2)):
        corrList.append(corr.index[np.where((corr.iloc[:,0] >= 0.90) & (corr.iloc[:,0] != 1))][j])
        j=j+1
    
    x_train2 = x_train2.drop(corrList, axis=1)
    x_test2 = x_test2.drop(corrList, axis=1)
    corr = x_train2.corr(method="pearson")
    i=i+1
    j=0
    corrList = []
    corr2 = corr.index[np.where((corr.iloc[:,0] >= 0.90) & (corr.iloc[:,0] != 1))]

    

In [None]:
#ESTE
x_train3 = x_train2
x_test3 = x_test2
corr = x_train3.corr(method="pearson")
corr

#### ¿POR QUÉ NO SE ELIMINA HIPERTENSIÓN ARTERIAL/INMUNOCOMPROMETIDO Y VENT.DISP/TOTAL.VENT#####

In [None]:
#ESTE
#x_train3 = x_train2.drop(["Hipertensión arterial", "Camas UTI nac.", "Camas UCI ocupadas COVID-19", "Camas UCI habilitadas", 'Camas medias nac.', 'Vent. disponibles', 'Casos activos region'], axis=1)
#x_test3 = x_test2.drop(["Hipertensión arterial", "Camas UTI nac.", "Camas UCI ocupadas COVID-19", "Camas UCI habilitadas", 'Camas medias nac.', 'Vent. disponibles', 'Casos activos region'], axis=1)
#corr = x_train3.corr(method="pearson")
#corr
#to_drop = x_train[["Enfermedad renal crónica", "Camas UCI nac.", ]]

In [None]:
model.fit(x_train3, y_train)
importances = pd.DataFrame(data={
    'Attribute': x_train3.columns,
    'Importance': model.feature_importances_
})
importances = importances.sort_values(by='Importance', ascending=False)
importances

In [None]:
x_train = x_train3.copy()
x_test = x_test3.copy()

In [None]:
x_train = pd.merge(x_train, y_train, how='outer',left_index=True, right_index=True)
x_test = pd.merge(x_test, y_test, how='outer',left_index=True, right_index=True)

# Modelación

In [None]:
#from sklearn.model_selection import TimeSeriesSplit

#test_size = round(0.25*data.shape[0])
#print(test_size)
#data_x = data.drop("Camas UCI ocupadas COVID-19", axis=1)
#data_y = data["Camas UCI ocupadas COVID-19"].copy()
#split = TimeSeriesSplit(data_x, data_y, test_size=test_size)
#for train_index, test_index in TimeSeriesSplit.split(data_x):
#    print("TRAIN:", train_index, "TEST:", test_index)
#    X_train, X_test = X[train_index], X[test_index]
#    y_train, y_test = y[train_index], y[test_index]

In [None]:
#data = strat_train_set.drop("Camas UCI ocupadas COVID-19", axis=1)
#uci = strat_train_set["Camas UCI ocupadas COVID-19"].copy()

## Modelo de regresión lineal

In [None]:
#import sys
#!{sys.executable} -m pip install sklearn

In [None]:
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()
lin_reg.fit(x_train3, y_train)

In [None]:
linear_predct = lin_reg.predict(x_test3)
print("Predictions:", linear_predct)

In [None]:
linear_predct = pd.Series(linear_predct)
linear_predct.index = x_test3.index

#print(y_test)
#print(linear_predct.index)
y_test.corr(linear_predct)

data_linear = pd.concat([y_test, linear_predct], axis=1)

In [None]:
fig, ax4 = plt.subplots(1, 1)
ax4.plot(data_linear)
ax4.legend(('Valores referencia', 'Valores predichos'), fontsize=15)
plt.title("Predicción Regresión Lineal", fontsize=30)
plt.xlabel("Fecha", fontsize=16)
plt.ylabel("UCI ocupadas", fontsize=16)

In [None]:
from sklearn.metrics import mean_squared_error

lin_mse = mean_squared_error(y_test, linear_predct)
lin_rmse = np.sqrt(lin_mse)
lin_rmse

In [None]:
from sklearn.metrics import mean_absolute_error

lin_mae = mean_absolute_error(y_test, linear_predct)
lin_mae

In [None]:
from sklearn.tree import DecisionTreeRegressor

tree_reg = DecisionTreeRegressor(random_state=42)
tree_reg.fit(x_train, y_train)

In [None]:
tree_predictions = tree_reg.predict(x_test)
tree_mse = mean_squared_error(y_test, tree_predictions)
tree_rmse = np.sqrt(tree_mse)
tree_rmse

## Retrasos

In [None]:
look_back = 2

In [None]:
# Function to create the data set
# Equation f(yt) = f(yt-1)
# convert an array of values into a dataset matrix
def create_dataset(name, dataset, look_back=1):
    dataX, dataY = [], []
    names = [(name + "_t-" + str(i)) for i in range(look_back)]
    names.reverse()
    dt = dataset.values
    for i in range(len(dt)-look_back-1):
        a = dt[i:(i+look_back)]
        dataX.append(a)
        dataY.append(dt[i + look_back])
    indices = dataset.index[(look_back+1):]
    dataset_ret = pd.DataFrame(dataX, columns=names, index=indices)
    return dataset_ret#np.array(dataX), np.array(dataY)

In [None]:
# Insertar retardos en dataset de features train
dt_train = pd.DataFrame()
for col in x_train.columns:
    var = x_train[col]
    var_ret = create_dataset(col, var, look_back)
    selected_cols = var_ret.columns[:-1] # Remove Xt-0
    var_ret = var_ret[selected_cols]
    dt_train[selected_cols] = var_ret
x_train = pd.DataFrame(dt_train)

In [None]:
# Insertar retardos en dataset de features test
dt_test = pd.DataFrame()
for col in x_test.columns:
    var = x_test[col]
    var_ret = create_dataset(col, var, look_back)
    selected_cols = var_ret.columns[:-1] # Remove Xt-0
    var_ret = var_ret[selected_cols]
    dt_test[selected_cols] = var_ret
x_test = pd.DataFrame(dt_test)

In [None]:
# Insertar retardos en dataset de features test
y_lab_t0 = y_lab + "_t-0"

y_train = create_dataset(y_lab, y_train, look_back)
y_train = y_train[[y_lab_t0]]
y_test = create_dataset(y_lab, y_test, look_back)
y_test = y_test[[y_lab_t0]]

## Aplicar Multilayer Perceptron model

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

In [None]:
# create and fit Multilayer Perceptron model
model = Sequential()
model.add(Dense(12, input_dim=x_train.shape[1], activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=100, batch_size=2, verbose=2)

In [None]:
from math import sqrt
# Estimate model performance
trainScore = model.evaluate(x_train, y_train, verbose=0)
print('Train Score: %.2f MSE (%.2f RMSE)' % (trainScore, sqrt(trainScore)))
testScore = model.evaluate(x_test, y_test, verbose=0)
print('Test Score: %.2f MSE (%.2f RMSE)' % (testScore, sqrt(testScore)))

In [None]:
# Generate predictions for training
trainPredict = model.predict(x_train)
testPredict = model.predict(x_test)

In [None]:
# Shift train predictions for plotting
trainPredictPlot = np.empty_like(data[[y_lab]])
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

In [None]:
# Shift test predictions for plotting
testPredictPlot = np.empty_like(data[[y_lab]])
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(data)-1, :] = testPredict

In [None]:
trainPredictPlot = pd.DataFrame(trainPredictPlot, index=data.index)
testPredictPlot = pd.DataFrame(testPredictPlot, index=data.index)

In [None]:
# Plot original data and predictions
fig, ax = plt.subplots(figsize=(18,8)) # Tamaño del gráfico
plt.plot(X[[y_lab]], color="blue", label="Original data")

#Título del gráfico
ax.set_title('Predicción del número de camas UCI nacional')

# Define del eje y
ax.set_ylabel('Camas UCI')
plt.grid(True, 'major', 'y', ls='--', lw=1.5, c='k', alpha=.3)

#Define el eje x
plt.tick_params(axis='x', which='major', labelsize=12)

#plt.axis([0, 155,0,650])

plt.plot(trainPredictPlot, color="green", label="Train predict")
plt.plot(testPredictPlot, color="orange", label="Test predict")
ax.legend(loc="lower right", title="", frameon=False)


plt.show()

# LSTM

In [None]:
x_train_temp = x_train
x_test_temp = x_test

In [None]:
# reshape input to be [samples, time steps, features !=columnas]
features = round(x_train_temp.shape[1]/(look_back-1))
x_train_temp = np.reshape(x_train_temp.values, (x_train_temp.shape[0], features, look_back-1))
x_test_temp = np.reshape(x_test_temp.values, (x_test_temp.shape[0], features, look_back-1))

In [None]:
# create and fit the LSTM network
batch_size = 1
look_forward = 14
model = Sequential()
model.add(LSTM(4, batch_input_shape=(batch_size, features, look_back-1), stateful=True, return_sequences=True))
model.add(LSTM(4, batch_input_shape=(batch_size, features, look_back-1), stateful=True))
model.add(Dense(look_forward))
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
model.summary()

In [None]:
y_train_temp = y_train
y_test_temp = y_test

In [None]:
for i in range(100):
    model.fit(x_train_temp, y_train_temp.values, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
    model.reset_states()

In [None]:
# make predictions
trainPredict = model.predict(x_train_temp, batch_size=batch_size)
model.reset_states()
testPredict = model.predict(x_test_temp, batch_size=batch_size)
# invert predictions
#trainPredict = scaler.inverse_transform(trainPredict)
#y_train = scaler.inverse_transform([y_train])
#testPredict = scaler.inverse_transform(testPredict)
#y_test = scaler.inverse_transform([y_test])
# calculate root mean squared error
trainScore = sqrt(mean_squared_error(y_train_temp.values[:,0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = sqrt(mean_squared_error(y_test_temp.values[:,0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

In [None]:
#trainPredict_temp = trainPredict
#testPredict_temp = testPredict

In [None]:
trainPredict = trainPredict[:,0]

In [None]:
testPredict.shape

In [None]:
#today = testPredict.shape[0]
testPredictActual = testPredict[:,0]
testPredictFuture = testPredict[-1,0:]


In [None]:
from datetime import datetime

Y = X[[y_lab]]
last_date = data.index[-1]
predicted_dates = pd.date_range(start = last_date, periods = 14).to_pydatetime().tolist()
Y = Y.reindex(Y.index.union(predicted_dates))
#new_len = len(trainPredict) + len(predicted_dates)

In [None]:
from copy import deepcopy
# Create datasets for plotting
base = np.empty_like(Y)
base[:] = np.nan

In [None]:
# Create reference data
referencePlot = deepcopy(base[:])
referencePlot[:len(X),:] = X[[y_lab]]
referencePlot = pd.DataFrame(referencePlot, columns=['Reference values'], index=Y.index)

In [None]:
# Create train predict data
trainPredictPlot = deepcopy(base[:])
trainPredictPlot[look_back:len(trainPredict)+look_back,0] = trainPredict[:]
trainPredictPlot = pd.DataFrame(trainPredictPlot, columns=['Predicted Training'], index=Y.index)

In [None]:
# Create test predict data
testPredictPlot = deepcopy(base[:])
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(base)-len(testPredictFuture),0] = testPredictActual[:]
testPredictPlot = pd.DataFrame(testPredictPlot, columns=['Predicted Testing'], index=Y.index)

In [None]:
# Create predicted data
predictedFuturePlot = deepcopy(base[:])
predictedFuturePlot[len(base)-len(testPredictFuture):,0] = testPredictFuture[:]
predictedFuturePlot = pd.DataFrame(predictedFuturePlot, columns=['Predicted Future'],index=Y.index)

In [None]:
plotData = pd.merge(referencePlot, trainPredictPlot, how='outer', left_index=True, right_index=True)
plotData = pd.merge(plotData, testPredictPlot, how='outer', left_index=True, right_index=True)
plotData = pd.merge(plotData, predictedFuturePlot, how='outer', left_index=True, right_index=True)

In [None]:
plotData

In [None]:
 fig = plt.figure(figsize=(18,8))
ax = fig.add_subplot(111)
ax.plot(plotData)
#Título del gráfico
plt.title('Red Neuronal LSTM2', fontsize=30)
plt.xlabel("Fecha", fontsize=16)
plt.ylabel("Camas UCI Normalizado", fontsize=16)
ax.grid(True, 'major', 'y', ls='--', lw=1.5, c='k', alpha=.3)
ax.legend(plotData.columns,loc='upper left')
plt.show()