In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import lightgbm as lgb
import xgboost as xgb
import seaborn as sns
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.api import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
import random


In [None]:
data = pd.read_csv("cleaned_structured_data.csv", sep = ";")
data = data.set_index(["id"])

In [None]:
# for machine learning

dataset = pd.DataFrame()
dataset["id"] = data.reset_index()["id"].unique()
dataset = dataset.set_index("id")
for i in range(107):
    dataset["ADM" + str(i)] = data[data["day"] == i]["ADM"]
    dataset["o2Saturation" + str(i)] = data[data["day"] == i]["o2Saturation"]
    dataset["temperature" + str(i)] = data[data["day"] == i]["temperature"]
    dataset["bloodPressure" + str(i)] = data[data["day"] == i]["bloodPressure"]
    dataset["ADM" + str(i)] = data[data["day"] == i]["ADM"]
dataset["age"] = data.groupby("id").first()["age"]
dataset["gender"] = data.groupby("id").first()["gender"]
dataset["BMI"] = data.groupby("id").mean()["BMI"]
dataset["gender"] = np.where(dataset["gender"] == "Man", 0, 1)

In [None]:
# for traditional time series forecasting

structured_data = data[data["daysOfHospital"] >= 10].reset_index()
structured_data = structured_data[structured_data["BMI"] > 31]
id_list = structured_data[structured_data["daysOfHospital"] >= 10]["id"].drop_duplicates()
time_series_set = [[] for i in range(100)]
for id in id_list:
    data = structured_data[structured_data["id"] == id].reset_index()
    for i in range(10, len(data) + 1):
        time_series_set[i-10].append(data[:i])

# forecast temperature

In [None]:
# forecast temperature: traditional methods

mse_avg_TEMPERATURE = [[] for i in range(100)]
mse_naive_TEMPERATURE = [[] for i in range(100)]
mse_ets_TEMPERATURE = [[] for i in range(100)]
mse_movingavg_TEMPERATURE = [[] for i in range(100)]
mse_arima_TEMPERATURE = [[] for i in range(100)]
mse_arima2_TEMPERATURE = [[] for i in range(100)]
mse_arima3_TEMPERATURE = [[] for i in range(100)]

for i in range(100):
    for series in time_series_set[i]:
        
        data = series["temperature"]

        # average
        y_hat_1 = [sum(data[:-3])/len(data[:-3])] * 3
        mse_avg_TEMPERATURE[i].append(mean_squared_error(data[-3:], y_hat_1))

        # naive
        model_2 = ExponentialSmoothing(data[:-3])
        model_fit_2 = model_2.fit()
        y_hat_2 = model_fit_2.predict(len(data)-3, len(data)-1) 
        mse_naive_TEMPERATURE[i].append(mean_squared_error(data[-3:], y_hat_2))

        # ets
        model_3 = ExponentialSmoothing(data[:-3], trend="add")
        model_fit_3 = model_3.fit()
        y_hat_3 = model_fit_3.predict(len(data)-3, len(data)-1)
        mse_ets_TEMPERATURE[i].append(mean_squared_error(data[-3:], y_hat_3))

        # moving average
        model_5 = ARIMA(data[:-3], order=(0, 0, 1))
        model_fit_5 = model_5.fit()
        y_hat_5 = model_fit_5.predict(len(data)-3, len(data)-1)
        mse_movingavg_TEMPERATURE[i].append(mean_squared_error(data[-3:], y_hat_5))

        model_6 = ARIMA(data[:-3], order=(1, 1, 1))
        model_fit_6 = model_6.fit()
        y_hat_6 = model_fit_6.predict(len(data)-3, len(data)-1)
        mse_arima_TEMPERATURE[i].append(mean_squared_error(data[-3:], y_hat_6))

        model_7 = ARIMA(data[:-3], order=(0, 1, 1))
        model_fit_7 = model_7.fit()
        y_hat_7 = model_fit_7.predict(len(data)-3, len(data)-1)
        mse_arima2_TEMPERATURE[i].append(mean_squared_error(data[-3:], y_hat_7))

        model_8 = ARIMA(data[:-3], order=(1, 1, 0))
        model_fit_8 = model_8.fit()
        y_hat_8 = model_fit_8.predict(len(data)-3, len(data)-1)
        mse_arima3_TEMPERATURE[i].append(mean_squared_error(data[-3:], y_hat_8))


In [None]:
def build_traindata_temperature(i):
    train_data = dataset[dataset["temperature" + str(i + 9)].notna()]
    train_data = train_data[["temperature" + str(i + 6), "temperature" + str(i + 5), "temperature" + str(i + 4), "temperature" + str(i + 3), "temperature" + str(i + 2), "temperature" + str(i + 1), "temperature" + str(i)]]
    train_data.columns = ['temperature6', 'temperature5', 'temperature4', 'temperature3', 'temperature2', 'temperature1', 'temperature0']
    return train_data

def build_testdata0_temperature(i):
    train_data = dataset[dataset["temperature" + str(i + 8)].notna()]
    train_data = train_data[["temperature" + str(i + 6), "temperature" + str(i + 5), "temperature" + str(i + 4), "temperature" + str(i + 3), "temperature" + str(i + 2), "temperature" + str(i + 1), "temperature" + str(i)]]
    train_data.columns = ['temperature6', 'temperature5', 'temperature4', 'temperature3', 'temperature2', 'temperature1', 'temperature0']
    return train_data

def build_testdata_temperature(train_data, y, i):
    test_data = train_data[train_data.columns]
    for j in range(5):
        test_data["temperature" + str(j)] = test_data["temperature" + str(j + 1)]
    test_data["temperature5"] = y
    test_data["temperature6"] = dataset["temperature" + str(i + 6)]
    return test_data

In [None]:
# forecast temperature: lightgbm

mse_temperature = []
y_list_temperature = []
y_hat_list_temperature = []
mse_list_temperature = pd.Series()
train_data_temperature = pd.DataFrame()

params = {
          "objective" : "regression",
          "metric" :"rmse",
          "force_row_wise" : True,
          "learning_rate" : 0.015,
          "bagging_freq" : 1,
          "metric": ["mse"],
          'num_iterations' : 200,
          'num_leaves': 100,
          'min_child_samples': 30,
          'min_child_weight': 0.001,
          'bagging_fraction': 0.9,
          'bagging_freq': 2
         }

for i in range(100):
    train_data_temperature = train_data_temperature.append(build_traindata_temperature(i))

    x = train_data_temperature.iloc[:,1:]
    y = train_data_temperature.iloc[:,0]

    trainData = lgb.Dataset(data=x,label=y)
    m_lgb = lgb.train(params, trainData) 

    test_data1 = build_testdata0_temperature(i+1)
    x1 = test_data1.iloc[:,1:]
    y1 = test_data1.iloc[:,0]
    y1_hat = m_lgb.predict(x1)
    x1["pred"] = y1_hat

    test_data2 = build_testdata_temperature(test_data1, y1_hat, i+2)
    x2 = test_data2.iloc[:,1:]
    y2 = test_data2.iloc[:,0]
    y2_hat = m_lgb.predict(x2)
    x2["pred"] = y2_hat
    
    test_data3 = build_testdata_temperature(test_data2, y2_hat, i+3)
    x3 = test_data3.iloc[:,1:]
    y3 = test_data3.iloc[:,0]
    y3_hat = m_lgb.predict(x3)
    x3["pred"] = y3_hat
    
    y_list_temperature.extend(list(y1))
    y_list_temperature.extend(list(y2))
    y_list_temperature.extend(list(y3))
    
    y_hat_list_temperature.extend(list(y1_hat))
    y_hat_list_temperature.extend(list(y2_hat))
    y_hat_list_temperature.extend(list(y3_hat))
    
    
    mse_list_temperature = mse_list_temperature.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    
    mse_temperature.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))

In [None]:
# forecast temperature: xgboost

mse_xgboost_temperature = []
y_list_xgboost_temperature = []
y_hat_list_xgboost_temperature = []
train_data_xgboost_temperature = pd.DataFrame()
mse_list_xgboost_temperature = pd.Series()

params = {
            'max_depth': 2,
            'n_estimators': 100,
            'learning_rate': 0.1
    }

for i in range(97):
    train_data_xgboost_temperature = train_data_xgboost_temperature.append(build_traindata_temperature(i))

    x = train_data_xgboost_temperature.iloc[:,1:]
    y = train_data_xgboost_temperature.iloc[:,0]
    
    model=xgb.XGBRegressor(**params)
    model.fit(x, y)


    test_data1 = build_testdata0_temperature(i+1)
    x1 = test_data1.iloc[:,1:]
    y1 = test_data1.iloc[:,0]
    y1_hat = model.predict(x1)
    x1["pred"] = y1_hat
    
    test_data2 = build_testdata_temperature(test_data1, y1_hat, i+2)
    x2 = test_data2.iloc[:,1:]
    y2 = test_data2.iloc[:,0]
    y2_hat = model.predict(x2)
    x2["pred"] = y2_hat
    
    test_data3 = build_testdata_temperature(test_data2, y2_hat, i+3)
    x3 = test_data3.iloc[:,1:]
    y3 = test_data3.iloc[:,0]
    y3_hat = model.predict(x3)
    x3["pred"] = y3_hat
    
    y_list_xgboost_temperature.extend(list(y1))
    y_list_xgboost_temperature.extend(list(y2))
    y_list_xgboost_temperature.extend(list(y3))
    
    y_hat_list_xgboost_temperature.extend(list(y1_hat))
    y_hat_list_xgboost_temperature.extend(list(y2_hat))
    y_hat_list_xgboost_temperature.extend(list(y3_hat))
    
    mse_list_xgboost_temperature = mse_list_xgboost_temperature.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    
    mse_xgboost_temperature.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))


In [None]:
print(sum(sum(i) for i in mse_avg_TEMPERATURE)/sum(len(i) for i in mse_avg_TEMPERATURE))
print(sum(sum(i) for i in mse_naive_TEMPERATURE)/sum(len(i) for i in mse_naive_TEMPERATURE))
print(sum(sum(i) for i in mse_ets_TEMPERATURE)/sum(len(i) for i in mse_ets_TEMPERATURE))
print(sum(sum(i) for i in mse_movingavg_TEMPERATURE)/sum(len(i) for i in mse_movingavg_TEMPERATURE))
print(sum(sum(i) for i in mse_arima_TEMPERATURE)/sum(len(i) for i in mse_arima_TEMPERATURE))
print(sum(sum(i) for i in mse_arima2_TEMPERATURE)/sum(len(i) for i in mse_arima2_TEMPERATURE))
print(sum(sum(i) for i in mse_arima3_TEMPERATURE)/sum(len(i) for i in mse_arima3_TEMPERATURE))
print(mean_squared_error(y_list_temperature, y_hat_list_temperature))
print(mean_squared_error(y_list_xgboost_temperature,y_hat_list_xgboost_temperature))

In [None]:
0.37 * 0.37

In [None]:
print((sum(sum(i) for i in mse_avg_TEMPERATURE)/sum(len(i) for i in mse_avg_TEMPERATURE)) ** (1/2) )
print((sum(sum(i) for i in mse_naive_TEMPERATURE)/sum(len(i) for i in mse_naive_TEMPERATURE)) ** (1/2) )
print((sum(sum(i) for i in mse_ets_TEMPERATURE)/sum(len(i) for i in mse_ets_TEMPERATURE)) ** (1/2) )
print((sum(sum(i) for i in mse_movingavg_TEMPERATURE)/sum(len(i) for i in mse_movingavg_TEMPERATURE)) ** (1/2) )
print((sum(sum(i) for i in mse_arima_TEMPERATURE)/sum(len(i) for i in mse_arima_TEMPERATURE)) ** (1/2) )
print((sum(sum(i) for i in mse_arima2_TEMPERATURE)/sum(len(i) for i in mse_arima2_TEMPERATURE)) ** (1/2) )
print((sum(sum(i) for i in mse_arima3_TEMPERATURE)/sum(len(i) for i in mse_arima3_TEMPERATURE)) ** (1/2) )
print((mean_squared_error(y_list_temperature, y_hat_list_temperature)) ** (1/2) )
print((mean_squared_error(y_list_xgboost_temperature,y_hat_list_xgboost_temperature)) ** (1/2) )

# forecast blood pressure

In [None]:
# forecast blood pressure: traditional methods

# forecast blood pressure: traditional methods

mse_avg_BLOODPRESSURE = [[] for i in range(100)]
mse_naive_BLOODPRESSURE = [[] for i in range(100)]
mse_ets_BLOODPRESSURE = [[] for i in range(100)]
mse_movingavg_BLOODPRESSURE = [[] for i in range(100)]
mse_arima_BLOODPRESSURE = [[] for i in range(100)]
mse_arima2_BLOODPRESSURE = [[] for i in range(100)]
mse_arima3_BLOODPRESSURE = [[] for i in range(100)]

for i in range(100):
    for series in time_series_set[i]:
        
        data = series["bloodPressure"]

        # average
        y_hat_1 = [sum(data[:-3])/len(data[:-3])] * 3
        mse_avg_BLOODPRESSURE[i].append(mean_squared_error(data[-3:], y_hat_1))

        # naive
        model_2 = ExponentialSmoothing(data[:-3])
        model_fit_2 = model_2.fit()
        y_hat_2 = model_fit_2.predict(len(data)-3, len(data)-1) 
        mse_naive_BLOODPRESSURE[i].append(mean_squared_error(data[-3:], y_hat_2))

        # ets
        model_3 = ExponentialSmoothing(data[:-3], trend="add")
        model_fit_3 = model_3.fit()
        y_hat_3 = model_fit_3.predict(len(data)-3, len(data)-1)
        mse_ets_BLOODPRESSURE[i].append(mean_squared_error(data[-3:], y_hat_3))

        # moving average
        model_5 = ARIMA(data[:-3], order=(0, 0, 1))
        model_fit_5 = model_5.fit()
        y_hat_5 = model_fit_5.predict(len(data)-3, len(data)-1)
        mse_movingavg_BLOODPRESSURE[i].append(mean_squared_error(data[-3:], y_hat_5))

        model_6 = ARIMA(data[:-3], order=(1, 1, 1))
        model_fit_6 = model_6.fit()
        y_hat_6 = model_fit_6.predict(len(data)-3, len(data)-1)
        mse_arima_BLOODPRESSURE[i].append(mean_squared_error(data[-3:], y_hat_6))

        model_7 = ARIMA(data[:-3], order=(0, 1, 1))
        model_fit_7 = model_7.fit()
        y_hat_7 = model_fit_7.predict(len(data)-3, len(data)-1)
        mse_arima2_BLOODPRESSURE[i].append(mean_squared_error(data[-3:], y_hat_7))

        model_8 = ARIMA(data[:-3], order=(1, 1, 0))
        model_fit_8 = model_8.fit()
        y_hat_8 = model_fit_8.predict(len(data)-3, len(data)-1)
        mse_arima3_BLOODPRESSURE[i].append(mean_squared_error(data[-3:], y_hat_8))


In [None]:
def build_traindata_bloodPressure(i):
    train_data = dataset[dataset["bloodPressure" + str(i + 9)].notna()]
    train_data = train_data[["bloodPressure" + str(i + 6), "bloodPressure" + str(i + 5), "bloodPressure" + str(i + 4), "bloodPressure" + str(i + 3), "bloodPressure" + str(i + 2), "bloodPressure" + str(i + 1), "bloodPressure" + str(i)]]
    train_data.columns = ['bloodPressure6', 'bloodPressure5', 'bloodPressure4', 'bloodPressure3', 'bloodPressure2', 'bloodPressure1', 'bloodPressure0']
    return train_data

def build_testdata0_bloodPressure(i):
    train_data = dataset[dataset["bloodPressure" + str(i + 8)].notna()]
    train_data = train_data[["bloodPressure" + str(i + 6), "bloodPressure" + str(i + 5), "bloodPressure" + str(i + 4), "bloodPressure" + str(i + 3), "bloodPressure" + str(i + 2), "bloodPressure" + str(i + 1), "bloodPressure" + str(i)]]
    train_data.columns = ['bloodPressure6', 'bloodPressure5', 'bloodPressure4', 'bloodPressure3', 'bloodPressure2', 'bloodPressure1', 'bloodPressure0']
    return train_data

def build_testdata_bloodPressure(train_data, y, i):
    test_data = train_data[train_data.columns]
    for j in range(5):
        test_data["bloodPressure" + str(j)] = test_data["bloodPressure" + str(j + 1)]
    test_data["bloodPressure5"] = y
    test_data["bloodPressure6"] = dataset["bloodPressure" + str(i + 6)]
    return test_data


In [None]:
# forecast bloodPressure: lightgbm

mse_bloodPressure = []
y_list_bloodPressure = []
y_hat_list_bloodPressure = []
mse_list_bloodPressure = pd.Series()
train_data_bloodPressure = pd.DataFrame()

params = {
          "objective" : "regression",
          "metric" :"rmse",
          "force_row_wise" : True,
          "learning_rate" : 0.015,
          "bagging_freq" : 1,
          "metric": ["mse"],
          'num_iterations' : 200,
          'num_leaves': 100,
          'min_child_samples': 30,
          'min_child_weight': 0.001,
          'bagging_fraction': 0.9,
          'bagging_freq': 2
         }

for i in range(100):
    train_data_bloodPressure = train_data_bloodPressure.append(build_traindata_bloodPressure(i))

    x = train_data_bloodPressure.iloc[:,1:]
    y = train_data_bloodPressure.iloc[:,0]

    trainData = lgb.Dataset(data=x,label=y)
    m_lgb = lgb.train(params, trainData) 

    test_data1 = build_testdata0_bloodPressure(i+1)
    x1 = test_data1.iloc[:,1:]
    y1 = test_data1.iloc[:,0]
    y1_hat = m_lgb.predict(x1)
    x1["pred"] = y1_hat

    test_data2 = build_testdata_bloodPressure(test_data1, y1_hat, i+2)
    x2 = test_data2.iloc[:,1:]
    y2 = test_data2.iloc[:,0]
    y2_hat = m_lgb.predict(x2)
    x2["pred"] = y2_hat
    
    test_data3 = build_testdata_bloodPressure(test_data2, y2_hat, i+3)
    x3 = test_data3.iloc[:,1:]
    y3 = test_data3.iloc[:,0]
    y3_hat = m_lgb.predict(x3)
    x3["pred"] = y3_hat
    
    y_list_bloodPressure.extend(list(y1))
    y_list_bloodPressure.extend(list(y2))
    y_list_bloodPressure.extend(list(y3))
    
    y_hat_list_bloodPressure.extend(list(y1_hat))
    y_hat_list_bloodPressure.extend(list(y2_hat))
    y_hat_list_bloodPressure.extend(list(y3_hat))
    
    
    mse_list_bloodPressure = mse_list_bloodPressure.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    
    mse_bloodPressure.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))


In [None]:
# forecast bloodPressure: xgboost

mse_xgboost_bloodPressure = []
y_list_xgboost_bloodPressure = []
y_hat_list_xgboost_bloodPressure = []
train_data_xgboost_bloodPressure = pd.DataFrame()
mse_list_xgboost_bloodPressure = pd.Series()

params = {
            'max_depth': 2,
            'n_estimators': 100,
            'learning_rate': 0.1
    }

for i in range(97):
    train_data_xgboost_bloodPressure = train_data_xgboost_bloodPressure.append(build_traindata_bloodPressure(i))

    x = train_data_xgboost_bloodPressure.iloc[:,1:]
    y = train_data_xgboost_bloodPressure.iloc[:,0]
    
    model=xgb.XGBRegressor(**params)
    model.fit(x, y)


    test_data1 = build_testdata0_bloodPressure(i+1)
    x1 = test_data1.iloc[:,1:]
    y1 = test_data1.iloc[:,0]
    y1_hat = model.predict(x1)
    x1["pred"] = y1_hat
    
    test_data2 = build_testdata_bloodPressure(test_data1, y1_hat, i+2)
    x2 = test_data2.iloc[:,1:]
    y2 = test_data2.iloc[:,0]
    y2_hat = model.predict(x2)
    x2["pred"] = y2_hat
    
    test_data3 = build_testdata_bloodPressure(test_data2, y2_hat, i+3)
    x3 = test_data3.iloc[:,1:]
    y3 = test_data3.iloc[:,0]
    y3_hat = model.predict(x3)
    x3["pred"] = y3_hat
    
    y_list_xgboost_bloodPressure.extend(list(y1))
    y_list_xgboost_bloodPressure.extend(list(y2))
    y_list_xgboost_bloodPressure.extend(list(y3))
    
    y_hat_list_xgboost_bloodPressure.extend(list(y1_hat))
    y_hat_list_xgboost_bloodPressure.extend(list(y2_hat))
    y_hat_list_xgboost_bloodPressure.extend(list(y3_hat))
    
    mse_list_xgboost_bloodPressure = mse_list_xgboost_bloodPressure.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    
    mse_xgboost_bloodPressure.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))


In [None]:
print(sum(sum(i) for i in mse_avg_BLOODPRESSURE)/sum(len(i) for i in mse_avg_BLOODPRESSURE))
print(sum(sum(i) for i in mse_naive_BLOODPRESSURE)/sum(len(i) for i in mse_naive_BLOODPRESSURE))
print(sum(sum(i) for i in mse_ets_BLOODPRESSURE)/sum(len(i) for i in mse_ets_BLOODPRESSURE))
print(sum(sum(i) for i in mse_movingavg_BLOODPRESSURE)/sum(len(i) for i in mse_movingavg_BLOODPRESSURE))
print(sum(sum(i) for i in mse_arima_BLOODPRESSURE)/sum(len(i) for i in mse_arima_BLOODPRESSURE))
print(sum(sum(i) for i in mse_arima2_BLOODPRESSURE)/sum(len(i) for i in mse_arima2_BLOODPRESSURE))
print(sum(sum(i) for i in mse_arima3_BLOODPRESSURE)/sum(len(i) for i in mse_arima3_BLOODPRESSURE))
print(mean_squared_error(y_list_bloodPressure, y_hat_list_bloodPressure))
print(mean_squared_error(y_list_xgboost_bloodPressure,y_hat_list_xgboost_bloodPressure))


In [None]:
print((sum(sum(i) for i in mse_avg_BLOODPRESSURE)/sum(len(i) for i in mse_avg_BLOODPRESSURE)) ** (1/2))
print((sum(sum(i) for i in mse_naive_BLOODPRESSURE)/sum(len(i) for i in mse_naive_BLOODPRESSURE)) ** (1/2))
print((sum(sum(i) for i in mse_ets_BLOODPRESSURE)/sum(len(i) for i in mse_ets_BLOODPRESSURE)) ** (1/2))
print((sum(sum(i) for i in mse_movingavg_BLOODPRESSURE)/sum(len(i) for i in mse_movingavg_BLOODPRESSURE)) ** (1/2))
print((sum(sum(i) for i in mse_arima_BLOODPRESSURE)/sum(len(i) for i in mse_arima_BLOODPRESSURE)) ** (1/2))
print((sum(sum(i) for i in mse_arima2_BLOODPRESSURE)/sum(len(i) for i in mse_arima2_BLOODPRESSURE)) ** (1/2))
print((sum(sum(i) for i in mse_arima3_BLOODPRESSURE)/sum(len(i) for i in mse_arima3_BLOODPRESSURE)) ** (1/2))
print((mean_squared_error(y_list_bloodPressure, y_hat_list_bloodPressure)) ** (1/2))
print((mean_squared_error(y_list_xgboost_bloodPressure,y_hat_list_xgboost_bloodPressure)) ** (1/2))

# forecast o2saturation


In [None]:
# forecast blood pressure: traditional methods

# forecast blood pressure: traditional methods

mse_avg_O2SATURATION = [[] for i in range(100)]
mse_naive_O2SATURATION = [[] for i in range(100)]
mse_ets_O2SATURATION = [[] for i in range(100)]
mse_movingavg_O2SATURATION = [[] for i in range(100)]
mse_arima_O2SATURATION = [[] for i in range(100)]
mse_arima2_O2SATURATION = [[] for i in range(100)]
mse_arima3_O2SATURATION = [[] for i in range(100)]

for i in range(100):
    for series in time_series_set[i]:
        
        data = series["o2Saturation"]

        # average
        y_hat_1 = [sum(data[:-3])/len(data[:-3])] * 3
        mse_avg_O2SATURATION[i].append(mean_squared_error(data[-3:], y_hat_1))

        # naive
        model_2 = ExponentialSmoothing(data[:-3])
        model_fit_2 = model_2.fit()
        y_hat_2 = model_fit_2.predict(len(data)-3, len(data)-1) 
        mse_naive_O2SATURATION[i].append(mean_squared_error(data[-3:], y_hat_2))

        # ets
        model_3 = ExponentialSmoothing(data[:-3], trend="add")
        model_fit_3 = model_3.fit()
        y_hat_3 = model_fit_3.predict(len(data)-3, len(data)-1)
        mse_ets_O2SATURATION[i].append(mean_squared_error(data[-3:], y_hat_3))

        # moving average
        model_5 = ARIMA(data[:-3], order=(0, 0, 1))
        model_fit_5 = model_5.fit()
        y_hat_5 = model_fit_5.predict(len(data)-3, len(data)-1)
        mse_movingavg_O2SATURATION[i].append(mean_squared_error(data[-3:], y_hat_5))

        model_6 = ARIMA(data[:-3], order=(1, 1, 1))
        model_fit_6 = model_6.fit()
        y_hat_6 = model_fit_6.predict(len(data)-3, len(data)-1)
        mse_arima_O2SATURATION[i].append(mean_squared_error(data[-3:], y_hat_6))

        model_7 = ARIMA(data[:-3], order=(0, 1, 1))
        model_fit_7 = model_7.fit()
        y_hat_7 = model_fit_7.predict(len(data)-3, len(data)-1)
        mse_arima2_O2SATURATION[i].append(mean_squared_error(data[-3:], y_hat_7))

        model_8 = ARIMA(data[:-3], order=(1, 1, 0))
        model_fit_8 = model_8.fit()
        y_hat_8 = model_fit_8.predict(len(data)-3, len(data)-1)
        mse_arima3_O2SATURATION[i].append(mean_squared_error(data[-3:], y_hat_8))


In [None]:
def build_traindata_o2Saturation(i):
    train_data = dataset[dataset["o2Saturation" + str(i + 9)].notna()]
    train_data = train_data[["o2Saturation" + str(i + 6), "o2Saturation" + str(i + 5), "o2Saturation" + str(i + 4), "o2Saturation" + str(i + 3), "o2Saturation" + str(i + 2), "o2Saturation" + str(i + 1), "o2Saturation" + str(i)]]
    train_data.columns = ['o2Saturation6', 'o2Saturation5', 'o2Saturation4', 'o2Saturation3', 'o2Saturation2', 'o2Saturation1', 'o2Saturation0']
    return train_data

def build_testdata0_o2Saturation(i):
    train_data = dataset[dataset["o2Saturation" + str(i + 8)].notna()]
    train_data = train_data[["o2Saturation" + str(i + 6), "o2Saturation" + str(i + 5), "o2Saturation" + str(i + 4), "o2Saturation" + str(i + 3), "o2Saturation" + str(i + 2), "o2Saturation" + str(i + 1), "o2Saturation" + str(i)]]
    train_data.columns = ['o2Saturation6', 'o2Saturation5', 'o2Saturation4', 'o2Saturation3', 'o2Saturation2', 'o2Saturation1', 'o2Saturation0']
    return train_data

def build_testdata_o2Saturation(train_data, y, i):
    test_data = train_data[train_data.columns]
    for j in range(5):
        test_data["o2Saturation" + str(j)] = test_data["o2Saturation" + str(j + 1)]
    test_data["o2Saturation5"] = y
    test_data["o2Saturation6"] = dataset["o2Saturation" + str(i + 6)]
    return test_data


In [None]:
# forecast o2Saturation: lightgbm

mse_o2Saturation = []
y_list_o2Saturation = []
y_hat_list_o2Saturation = []
mse_list_o2Saturation = pd.Series()
train_data_o2Saturation = pd.DataFrame()

params = {
          "objective" : "regression",
          "metric" :"rmse",
          "force_row_wise" : True,
          "learning_rate" : 0.015,
          "bagging_freq" : 1,
          "metric": ["mse"],
          'num_iterations' : 200,
          'num_leaves': 100,
          'min_child_samples': 30,
          'min_child_weight': 0.001,
          'bagging_fraction': 0.9,
          'bagging_freq': 2
         }

for i in range(100):
    train_data_o2Saturation = train_data_o2Saturation.append(build_traindata_o2Saturation(i))

    x = train_data_o2Saturation.iloc[:,1:]
    y = train_data_o2Saturation.iloc[:,0]

    trainData = lgb.Dataset(data=x,label=y)
    m_lgb = lgb.train(params, trainData) 

    test_data1 = build_testdata0_o2Saturation(i+1)
    x1 = test_data1.iloc[:,1:]
    y1 = test_data1.iloc[:,0]
    y1_hat = m_lgb.predict(x1)
    x1["pred"] = y1_hat

    test_data2 = build_testdata_o2Saturation(test_data1, y1_hat, i+2)
    x2 = test_data2.iloc[:,1:]
    y2 = test_data2.iloc[:,0]
    y2_hat = m_lgb.predict(x2)
    x2["pred"] = y2_hat
    
    test_data3 = build_testdata_o2Saturation(test_data2, y2_hat, i+3)
    x3 = test_data3.iloc[:,1:]
    y3 = test_data3.iloc[:,0]
    y3_hat = m_lgb.predict(x3)
    x3["pred"] = y3_hat
    
    y_list_o2Saturation.extend(list(y1))
    y_list_o2Saturation.extend(list(y2))
    y_list_o2Saturation.extend(list(y3))
    
    y_hat_list_o2Saturation.extend(list(y1_hat))
    y_hat_list_o2Saturation.extend(list(y2_hat))
    y_hat_list_o2Saturation.extend(list(y3_hat))
    
    
    mse_list_o2Saturation = mse_list_o2Saturation.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    
    mse_o2Saturation.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))


In [None]:
# forecast o2Saturation: xgboost

mse_xgboost_o2Saturation = []
y_list_xgboost_o2Saturation = []
y_hat_list_xgboost_o2Saturation = []
train_data_xgboost_o2Saturation = pd.DataFrame()
mse_list_xgboost_o2Saturation = pd.Series()

params = {
            'max_depth': 2,
            'n_estimators': 100,
            'learning_rate': 0.1
    }

for i in range(97):
    train_data_xgboost_o2Saturation = train_data_xgboost_o2Saturation.append(build_traindata_o2Saturation(i))

    x = train_data_xgboost_o2Saturation.iloc[:,1:]
    y = train_data_xgboost_o2Saturation.iloc[:,0]
    
    model=xgb.XGBRegressor(**params)
    model.fit(x, y)


    test_data1 = build_testdata0_o2Saturation(i+1)
    x1 = test_data1.iloc[:,1:]
    y1 = test_data1.iloc[:,0]
    y1_hat = model.predict(x1)
    x1["pred"] = y1_hat
    
    test_data2 = build_testdata_o2Saturation(test_data1, y1_hat, i+2)
    x2 = test_data2.iloc[:,1:]
    y2 = test_data2.iloc[:,0]
    y2_hat = model.predict(x2)
    x2["pred"] = y2_hat
    
    test_data3 = build_testdata_o2Saturation(test_data2, y2_hat, i+3)
    x3 = test_data3.iloc[:,1:]
    y3 = test_data3.iloc[:,0]
    y3_hat = model.predict(x3)
    x3["pred"] = y3_hat
    
    y_list_xgboost_o2Saturation.extend(list(y1))
    y_list_xgboost_o2Saturation.extend(list(y2))
    y_list_xgboost_o2Saturation.extend(list(y3))
    
    y_hat_list_xgboost_o2Saturation.extend(list(y1_hat))
    y_hat_list_xgboost_o2Saturation.extend(list(y2_hat))
    y_hat_list_xgboost_o2Saturation.extend(list(y3_hat))
    
    mse_list_xgboost_o2Saturation = mse_list_xgboost_o2Saturation.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    
    mse_xgboost_o2Saturation.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))


In [None]:
print(sum(sum(i) for i in mse_avg_O2SATURATION)/sum(len(i) for i in mse_avg_O2SATURATION))
print(sum(sum(i) for i in mse_naive_O2SATURATION)/sum(len(i) for i in mse_naive_O2SATURATION))
print(sum(sum(i) for i in mse_ets_O2SATURATION)/sum(len(i) for i in mse_ets_O2SATURATION))
print(sum(sum(i) for i in mse_movingavg_O2SATURATION)/sum(len(i) for i in mse_movingavg_O2SATURATION))
print(sum(sum(i) for i in mse_arima_O2SATURATION)/sum(len(i) for i in mse_arima_O2SATURATION))
print(sum(sum(i) for i in mse_arima2_O2SATURATION)/sum(len(i) for i in mse_arima2_O2SATURATION))
print(sum(sum(i) for i in mse_arima3_O2SATURATION)/sum(len(i) for i in mse_arima3_O2SATURATION))
print(mean_squared_error(y_list_o2Saturation, y_hat_list_o2Saturation))
print(mean_squared_error(y_list_xgboost_o2Saturation,y_hat_list_xgboost_o2Saturation))


In [None]:
print((sum(sum(i) for i in mse_avg_O2SATURATION)/sum(len(i) for i in mse_avg_O2SATURATION)) ** (1/2))
print((sum(sum(i) for i in mse_naive_O2SATURATION)/sum(len(i) for i in mse_naive_O2SATURATION)) ** (1/2))
print((sum(sum(i) for i in mse_ets_O2SATURATION)/sum(len(i) for i in mse_ets_O2SATURATION)) ** (1/2))
print((sum(sum(i) for i in mse_movingavg_O2SATURATION)/sum(len(i) for i in mse_movingavg_O2SATURATION)) ** (1/2))
print((sum(sum(i) for i in mse_arima_O2SATURATION)/sum(len(i) for i in mse_arima_O2SATURATION)) ** (1/2))
print((sum(sum(i) for i in mse_arima2_O2SATURATION)/sum(len(i) for i in mse_arima2_O2SATURATION)) ** (1/2))
print((sum(sum(i) for i in mse_arima3_O2SATURATION)/sum(len(i) for i in mse_arima3_O2SATURATION)) ** (1/2))
print((mean_squared_error(y_list_o2Saturation, y_hat_list_o2Saturation)) ** (1/2))
print((mean_squared_error(y_list_xgboost_o2Saturation,y_hat_list_xgboost_o2Saturation)) ** (1/2))


# forecast ADM

In [None]:
print(mean_squared_error(_y1, _y1_) ** (1/2))
print(mean_squared_error(_y2, _y2_) ** (1/2))
print(mean_squared_error(_y3, _y3_) ** (1/2))
print(mean_squared_error(_y3 + _y2 + _y1, _y3_ + _y2_ + _y1_) ** (1/2))

In [None]:
a = pd.DataFrame({"naive":[sum(x)/len(x) for x in mse_naive_ADM[:50]]
                  ,"mean":[sum(x)/len(x) for x in mse_avg_ADM[:50]]
                  ,"exponential smoothing":[sum(x)/len(x) for x in mse_ets_ADM[:50]]
                  ,"ARIMA(1,1,1)":[sum(x)/len(x) for x in mse_arima_ADM[:50]]
                  ,"ARIMA(0,1,1)":[sum(x)/len(x) for x in mse_arima2_ADM[:50]]
                  ,"ARIMA(1,1,0)":[sum(x)/len(x) for x in mse_arima3_ADM[:50]]
                  ,"Light GBM": mse[:50]
                  ,"Xgboost": mse_xgboost[:50]})
a.plot(figsize=(12,8))

In [None]:
# traditional 

mse_avg_ADM = [[] for i in range(100)]
mse_naive_ADM = [[] for i in range(100)]
mse_ets_ADM = [[] for i in range(100)]
mse_movingavg_ADM = [[] for i in range(100)]
mse_arima_ADM = [[] for i in range(100)]
mse_arima2_ADM = [[] for i in range(100)]
mse_arima3_ADM = [[] for i in range(100)]

y1 = []
y1_ = []
y2 = []
y2_ = []
y3 = []
y3_ = []

for i in range(100):
    for series in time_series_set[i]:
        
        data = series["ADM"]

        # average
        y_hat_1 = [sum(data[:-3])/len(data[:-3])] * 3
        mse_avg_ADM[i].append(mean_squared_error(data[-3:], y_hat_1))

        # naive
        model_2 = ExponentialSmoothing(data[:-3])
        model_fit_2 = model_2.fit()
        y_hat_2 = model_fit_2.predict(len(data)-3, len(data)-1) 
        mse_naive_ADM[i].append(mean_squared_error(data[-3:], y_hat_2))
        

        # ets
        model_3 = ExponentialSmoothing(data[:-3], trend="add")
        model_fit_3 = model_3.fit()
        y_hat_3 = model_fit_3.predict(len(data)-3, len(data)-1)
        mse_ets_ADM[i].append(mean_squared_error(data[-3:], y_hat_3))

        # moving average
        model_5 = ARIMA(data[:-3], order=(0, 0, 1))
        model_fit_5 = model_5.fit()
        y_hat_5 = model_fit_5.predict(len(data)-3, len(data)-1)
        mse_movingavg_ADM[i].append(mean_squared_error(data[-3:], y_hat_5))

        model_6 = ARIMA(data[:-3], order=(1, 1, 1))
        model_fit_6 = model_6.fit()
        y_hat_6 = model_fit_6.predict(len(data)-3, len(data)-1)
        mse_arima_ADM[i].append(mean_squared_error(data[-3:], y_hat_6))

        model_7 = ARIMA(data[:-3], order=(0, 1, 1))
        model_fit_7 = model_7.fit()
        y_hat_7 = model_fit_7.predict(len(data)-3, len(data)-1)
        mse_arima2_ADM[i].append(mean_squared_error(data[-3:], y_hat_7))

        model_8 = ARIMA(data[:-3], order=(1, 1, 0))
        model_fit_8 = model_8.fit()
        y_hat_8 = model_fit_8.predict(len(data)-3, len(data)-1)
        mse_arima3_ADM[i].append(mean_squared_error(data[-3:], y_hat_8))
        
        y1.append(data.values[-3])
        y1_.append(y_hat_8.values[-3])
        y2.append(data.values[-2])
        y2_.append(y_hat_8.values[-2])
        y3.append(data.values[-1])
        y3_.append(y_hat_8.values[-1])

In [None]:
def build_traindata(i):
    train_data = dataset[dataset["ADM" + str(i + 9)].notna()]
    train_data = train_data[["ADM" + str(i + 6), 'age', 'gender', 'BMI', "temperature" + str(i + 5), "temperature" + str(i + 4), "bloodPressure" + str(i + 5), "o2Saturation" + str(i + 5), "ADM" + str(i + 5), "ADM" + str(i + 4), "ADM" + str(i + 3), "ADM" + str(i + 2), "ADM" + str(i + 1), "ADM" + str(i)]]
    train_data.columns = ["ADM6", 'age', 'gender', 'BMI', "temperature5","temperature4", "bloodPressure5", "o2Saturation5", "ADM5", "ADM4", "ADM3", "ADM2", "ADM1", "ADM0"]
    train_data.loc[:,"temperature_3days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)]) / 3
    train_data.loc[:,"temperature_6days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)] + dataset["temperature" + str(i + 2)] + dataset["temperature" + str(i + 1)] + dataset["temperature" + str(i)]) / 6
    train_data.loc[:,"bloodPressure_3days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)]) / 3
    train_data.loc[:,"bloodPressure_6days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)] + dataset["bloodPressure" + str(i + 2)] + dataset["bloodPressure" + str(i + 1)] + dataset["bloodPressure" + str(i)]) / 6
    train_data.loc[:,"o2Saturation_3days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)]) / 3
    train_data.loc[:,"o2Saturation_6days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)] + dataset["o2Saturation" + str(i + 2)] + dataset["o2Saturation" + str(i + 1)] + dataset["o2Saturation" + str(i)]) / 6
    train_data.loc[:,"ADM_6days"] = (dataset["ADM" + str(i + 5)] + dataset["ADM" + str(i + 4)] + dataset["ADM" + str(i + 3)] + dataset["ADM" + str(i + 2)] + dataset["ADM" + str(i + 1)] + dataset["ADM" + str(i)]) / 6
    train_data.loc[:,"ADM_3days"] = (dataset["ADM" + str(i + 5)] + dataset["ADM" + str(i + 4)] + dataset["ADM" + str(i + 3)]) / 3

    return train_data

def build_testdata0(i):
    train_data = dataset[dataset["ADM" + str(i + 8)].notna()]
    train_data = train_data[["ADM" + str(i + 6), 'age', 'gender', 'BMI', "temperature" + str(i + 5), "temperature" + str(i + 4), "bloodPressure" + str(i + 5), "o2Saturation" + str(i + 5), "ADM" + str(i + 5), "ADM" + str(i + 4), "ADM" + str(i + 3), "ADM" + str(i + 2), "ADM" + str(i + 1), "ADM" + str(i)]]
    train_data.columns = ["ADM6", 'age', 'gender', 'BMI', "temperature5", "temperature4", "bloodPressure5", "o2Saturation5", "ADM5", "ADM4", "ADM3", "ADM2", "ADM1", "ADM0"]
    train_data.loc[:,"temperature_3days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)]) / 3
    train_data.loc[:,"temperature_6days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)] + dataset["temperature" + str(i + 2)] + dataset["temperature" + str(i + 1)] + dataset["temperature" + str(i)]) / 6
    train_data.loc[:,"bloodPressure_3days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)]) / 3
    train_data.loc[:,"bloodPressure_6days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)] + dataset["bloodPressure" + str(i + 2)] + dataset["bloodPressure" + str(i + 1)] + dataset["bloodPressure" + str(i)]) / 6
    train_data.loc[:,"o2Saturation_3days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)]) / 3
    train_data.loc[:,"o2Saturation_6days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)] + dataset["o2Saturation" + str(i + 2)] + dataset["o2Saturation" + str(i + 1)] + dataset["o2Saturation" + str(i)]) / 6
    train_data.loc[:,"ADM_6days"] = (dataset["ADM" + str(i + 5)] + dataset["ADM" + str(i + 4)] + dataset["ADM" + str(i + 3)] + dataset["ADM" + str(i + 2)] + dataset["ADM" + str(i + 1)] + dataset["ADM" + str(i)]) / 6
    train_data.loc[:,"ADM_3days"] = (dataset["ADM" + str(i + 5)] + dataset["ADM" + str(i + 4)] + dataset["ADM" + str(i + 3)]) / 3
    return train_data

def build_testdata(train_data, y, i):
    test_data = train_data[train_data.columns]
    for j in range(5):
        test_data["ADM" + str(j)] = test_data["ADM" + str(j + 1)]
    test_data["ADM5"] = y
    test_data["ADM6"] = dataset["ADM" + str(i + 6)]
    test_data.loc[:,"ADM_6days"] = (test_data["ADM0"] + test_data["ADM1"] + test_data["ADM2"] + test_data["ADM3"] + test_data["ADM4"] + test_data["ADM5"]) / 6
    test_data.loc[:,"ADM_3days"] = (test_data["ADM3"] + test_data["ADM4"] + test_data["ADM5"]) / 3

    return test_data

In [None]:
# light gbm

mse = []
y_list = []
y_hat_list = []
mse_list = pd.Series()
train_data = pd.DataFrame()
train_data_temperature = pd.DataFrame()
train_data_bloodPressure = pd.DataFrame()
train_data_o2Saturation = pd.DataFrame()

_y1 = []
_y1_ = []
_y2 = []
_y2_ = []
_y3 = []
_y3_ = []

params = {
          "objective" : "regression",
          "metric" :"rmse",
          "force_row_wise" : True,
          "learning_rate" : 0.015,
          "bagging_freq" : 1,
          "metric": ["mse"],
          'num_iterations' : 200,
          'num_leaves': 100,
          'min_child_samples': 30,
          'min_child_weight': 0.001,
          'bagging_fraction': 0.9,
          'bagging_freq': 2
}

params_xgboost = {
            'max_depth': 2,
            'n_estimators': 100,
            'learning_rate': 0.1
}

cols = ['age', 'gender', 'BMI', 'temperature5', 'bloodPressure5','o2Saturation5', 'ADM5', 'ADM4', 'ADM3','ADM2', 'ADM1', 'ADM0', 'temperature_3days', 'temperature_6days', 'bloodPressure_3days', 'bloodPressure_6days', 'o2Saturation_3days', 'o2Saturation_6days', 'ADM_6days', 'ADM_3days']

for i in range(97):
    # train model
    # temperature
    train_data_temperature = train_data_temperature.append(build_traindata_temperature(i))
    x_temperature = train_data_temperature.iloc[:,1:]
    y_temperature = train_data_temperature.iloc[:,0]
    trainData_temperature = lgb.Dataset(data=x_temperature,label=y_temperature)
    m_lgb_temperature = lgb.train(params, trainData_temperature)
    # bloodPressure
    train_data_bloodPressure = train_data_bloodPressure.append(build_traindata_bloodPressure(i))
    x_bloodPressure = train_data_bloodPressure.iloc[:,1:]
    y_bloodPressure = train_data_bloodPressure.iloc[:,0]
    trainData_bloodPressure = lgb.Dataset(data=x_bloodPressure,label=y_bloodPressure)
    m_lgb_bloodPressure = lgb.train(params, trainData_bloodPressure)
    #o2Saturation
    train_data_o2Saturation = train_data_o2Saturation.append(build_traindata_o2Saturation(i))
    x_o2Saturation = train_data_o2Saturation.iloc[:,1:]
    y_o2Saturation = train_data_o2Saturation.iloc[:,0]
    model_o2Saturation=xgb.XGBRegressor(**params_xgboost)
    model_o2Saturation.fit(x_o2Saturation, y_o2Saturation)
    # ADM
    train_data = train_data.append(build_traindata(i))
    x = train_data.iloc[:,1:][cols]
    y = train_data.iloc[:,0]
    trainData = lgb.Dataset(data=x,label=y)
    m_lgb = lgb.train(params, trainData) 
    
    # forecast day1
    # temperature
    test_data1_temperature = build_testdata0_temperature(i+1)
    x1_temperature = test_data1_temperature.iloc[:,1:]
    y1_temperature = test_data1_temperature.iloc[:,0]
    y1_hat_temperature = m_lgb_temperature.predict(x1_temperature)
    x1_temperature["pred"] = y1_hat_temperature
    # bloodPressure
    test_data1_bloodPressure = build_testdata0_bloodPressure(i+1)
    x1_bloodPressure = test_data1_bloodPressure.iloc[:,1:]
    y1_bloodPressure = test_data1_bloodPressure.iloc[:,0]
    y1_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure)
    x1_bloodPressure["pred"] = y1_hat_bloodPressure
    # o2Saturation
    test_data1_o2Saturation = build_testdata0_o2Saturation(i+1)
    x1_o2Saturation = test_data1_o2Saturation.iloc[:,1:]
    y1_o2Saturation = test_data1_o2Saturation.iloc[:,0]
    y1_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation)
    x1_o2Saturation["pred"] = y1_hat_o2Saturation
    # ADM
    test_data1 = build_testdata0(i+1)
    x1 = test_data1.iloc[:,1:][cols]
    y1 = test_data1.iloc[:,0]
    y1_hat = m_lgb.predict(x1)
    x1["pred"] = y1_hat
    
    # forecast day2
    # temperature
    test_data2_temperature = build_testdata_temperature(test_data1_temperature, y1_hat_temperature, i+2)
    x2_temperature = test_data2_temperature.iloc[:,1:]
    y2_temperature = test_data2_temperature.iloc[:,0]
    y2_hat_temperature = m_lgb_temperature.predict(x1_temperature.iloc[:, :-1])
    x2_temperature["pred"] = y2_hat_temperature
    # bloodPressure
    test_data2_bloodPressure = build_testdata_bloodPressure(test_data1_bloodPressure, y1_hat_bloodPressure, i+2)
    x2_bloodPressure = test_data2_bloodPressure.iloc[:,1:]
    y2_bloodPressure = test_data2_bloodPressure.iloc[:,0]
    y2_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure.iloc[:, :-1])
    x2_bloodPressure["pred"] = y2_hat_bloodPressure
    # o2Saturation
    test_data2_o2Saturation = build_testdata_o2Saturation(test_data1_o2Saturation, y1_hat_o2Saturation, i+2)
    x2_o2Saturation = test_data2_o2Saturation.iloc[:,1:]
    y2_o2Saturation = test_data2_o2Saturation.iloc[:,0]
    y2_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation.iloc[:, :-1])
    x2_o2Saturation["pred"] = y2_hat_o2Saturation
    # ADM
    test_data2 = build_testdata(test_data1, y1_hat, i+2)
    test_data2["temperature5"] = x1_temperature["pred"]
    test_data2["temperature_3days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"]) / 3
    test_data2["temperature_6days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"] + x1_temperature["temperature3"] + x1_temperature["temperature2"] + x1_temperature["temperature1"]) / 6
    test_data2["bloodPressure5"] = x1_bloodPressure["pred"]
    test_data2["bloodPressure_3days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"]) / 3
    test_data2["bloodPressure_6days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"] + x1_bloodPressure["bloodPressure3"] + x1_bloodPressure["bloodPressure2"] + x1_bloodPressure["bloodPressure1"]) / 6
    test_data2["o2Saturation5"] = x1_o2Saturation["pred"]
    test_data2["o2Saturation_3days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"]) / 3
    test_data2["o2Saturation_6days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"] + x1_o2Saturation["o2Saturation3"] + x1_o2Saturation["o2Saturation2"] + x1_o2Saturation["o2Saturation1"]) / 6
    x2 = test_data2.iloc[:,1:][cols]
    y2 = test_data2.iloc[:,0]
    y2_hat = m_lgb.predict(x2)
    x2["pred"] = y2_hat
    
    # forecast day3
    # ADM
    test_data3 = build_testdata(test_data2, y2_hat, i+2)
    test_data3["temperature5"] = x2_temperature["pred"]
    test_data3["temperature_3days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"]) / 3
    test_data3["temperature_6days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"] + x2_temperature["temperature3"] + x2_temperature["temperature2"] + x2_temperature["temperature1"]) / 6
    test_data3["bloodPressure5"] = x2_bloodPressure["pred"]
    test_data3["bloodPressure_3days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"]) / 3
    test_data3["bloodPressure_6days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"] + x2_bloodPressure["bloodPressure3"] + x2_bloodPressure["bloodPressure2"] + x2_bloodPressure["bloodPressure1"]) / 6
    test_data3["o2Saturation5"] = x2_o2Saturation["pred"]
    test_data3["o2Saturation_3days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"]) / 3
    test_data3["o2Saturation_6days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"] + x2_o2Saturation["o2Saturation3"] + x2_o2Saturation["o2Saturation2"] + x2_o2Saturation["o2Saturation1"]) / 6
    x3 = test_data3.iloc[:,1:][cols]
    y3 = test_data3.iloc[:,0]
    y3_hat = m_lgb.predict(x3)
    x3["pred"] = y3_hat
    
    y_list.extend(list(y1))
    y_list.extend(list(y2))
    y_list.extend(list(y3))
    
    y_hat_list.extend(list(y1_hat))
    y_hat_list.extend(list(y2_hat))
    y_hat_list.extend(list(y3_hat))
    
    _y1 += (list(y1))
    _y2 += (list(y2))
    _y3 += (list(y3))
    
    _y1_ += (list(y1_hat))
    _y2_ += (list(y2_hat))
    _y3_ += (list(y3_hat))
    
    mse_list = mse_list.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    
    mse.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))
    

In [None]:
# xgboost

mse_xgboost = []
y_list_xgboost = []
y_hat_list_xgboost = []
y_list_bi_xgboost = []
y_hat_list_bi_xgboost = []
y_list_bi_xgboost_3day = []
y_hat_list_bi_xgboost_3day = []
mse_list_xgboost = pd.Series()
train_data = pd.DataFrame()
train_data_temperature = pd.DataFrame()
train_data_bloodPressure = pd.DataFrame()
train_data_o2Saturation = pd.DataFrame()

_y1 = []
_y1_ = []
_y2 = []
_y2_ = []
_y3 = []
_y3_ = []

params0 = {
            'max_depth': 3,
            'n_estimators': 50,
            'learning_rate': 0.09
}

params = {
          "objective" : "regression",
          "metric" :"rmse",
          "force_row_wise" : True,
          "learning_rate" : 0.015,
          "bagging_freq" : 1,
          "metric": ["mse"],
          'num_iterations' : 200,
          'num_leaves': 100,
          'min_child_samples': 30,
          'min_child_weight': 0.001,
          'bagging_fraction': 0.9,
          'bagging_freq': 2
}

params_xgboost = {
            'max_depth': 2,
            'n_estimators': 100,
            'learning_rate': 0.1
}

cols = ['age', 'gender', 'BMI', 'temperature5', 'bloodPressure5','o2Saturation5', 'ADM5', 'ADM4', 'ADM3','ADM2', 'ADM1', 'ADM0', 'temperature_3days', 'temperature_6days', 'bloodPressure_3days', 'bloodPressure_6days', 'o2Saturation_3days', 'o2Saturation_6days', 'ADM_6days', 'ADM_3days']

for i in range(97):
    # train model
    # temperature
    train_data_temperature = train_data_temperature.append(build_traindata_temperature(i))
    x_temperature = train_data_temperature.iloc[:,1:]
    y_temperature = train_data_temperature.iloc[:,0]
    trainData_temperature = lgb.Dataset(data=x_temperature,label=y_temperature)
    m_lgb_temperature = lgb.train(params, trainData_temperature)
    # bloodPressure
    train_data_bloodPressure = train_data_bloodPressure.append(build_traindata_bloodPressure(i))
    x_bloodPressure = train_data_bloodPressure.iloc[:,1:]
    y_bloodPressure = train_data_bloodPressure.iloc[:,0]
    trainData_bloodPressure = lgb.Dataset(data=x_bloodPressure,label=y_bloodPressure)
    m_lgb_bloodPressure = lgb.train(params, trainData_bloodPressure)
    #o2Saturation
    train_data_o2Saturation = train_data_o2Saturation.append(build_traindata_o2Saturation(i))
    x_o2Saturation = train_data_o2Saturation.iloc[:,1:]
    y_o2Saturation = train_data_o2Saturation.iloc[:,0]
    model_o2Saturation=xgb.XGBRegressor(**params_xgboost)
    model_o2Saturation.fit(x_o2Saturation, y_o2Saturation)
    # ADM
    train_data = train_data.append(build_traindata(i))
    x = train_data.iloc[:,1:][cols]
    y = train_data.iloc[:,0]
    y0 = build_traindata(i).iloc[:,0]
    model=xgb.XGBRegressor(**params0)
    model.fit(x, y)
    
    # forecast day1
    # temperature
    test_data1_temperature = build_testdata0_temperature(i+1)
    x1_temperature = test_data1_temperature.iloc[:,1:]
    y1_temperature = test_data1_temperature.iloc[:,0]
    y1_hat_temperature = m_lgb_temperature.predict(x1_temperature)
    x1_temperature["pred"] = y1_hat_temperature
    # bloodPressure
    test_data1_bloodPressure = build_testdata0_bloodPressure(i+1)
    x1_bloodPressure = test_data1_bloodPressure.iloc[:,1:]
    y1_bloodPressure = test_data1_bloodPressure.iloc[:,0]
    y1_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure)
    x1_bloodPressure["pred"] = y1_hat_bloodPressure
    # o2Saturation
    test_data1_o2Saturation = build_testdata0_o2Saturation(i+1)
    x1_o2Saturation = test_data1_o2Saturation.iloc[:,1:]
    y1_o2Saturation = test_data1_o2Saturation.iloc[:,0]
    y1_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation)
    x1_o2Saturation["pred"] = y1_hat_o2Saturation
    # ADM
    test_data1 = build_testdata0(i+1)
    x1 = test_data1.iloc[:,1:][cols]
    y1 = test_data1.iloc[:,0]
    y1_hat = model.predict(x1)
    x1["pred"] = y1_hat
    
    # forecast day2
    # temperature
    test_data2_temperature = build_testdata_temperature(test_data1_temperature, y1_hat_temperature, i+2)
    x2_temperature = test_data2_temperature.iloc[:,1:]
    y2_temperature = test_data2_temperature.iloc[:,0]
    y2_hat_temperature = m_lgb_temperature.predict(x1_temperature.iloc[:, :-1])
    x2_temperature["pred"] = y2_hat_temperature
    # bloodPressure
    test_data2_bloodPressure = build_testdata_bloodPressure(test_data1_bloodPressure, y1_hat_bloodPressure, i+2)
    x2_bloodPressure = test_data2_bloodPressure.iloc[:,1:]
    y2_bloodPressure = test_data2_bloodPressure.iloc[:,0]
    y2_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure.iloc[:, :-1])
    x2_bloodPressure["pred"] = y2_hat_bloodPressure
    # o2Saturation
    test_data2_o2Saturation = build_testdata_o2Saturation(test_data1_o2Saturation, y1_hat_o2Saturation, i+2)
    x2_o2Saturation = test_data2_o2Saturation.iloc[:,1:]
    y2_o2Saturation = test_data2_o2Saturation.iloc[:,0]
    y2_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation.iloc[:, :-1])
    x2_o2Saturation["pred"] = y2_hat_o2Saturation
    # ADM
    test_data2 = build_testdata(test_data1, y1_hat, i+2)
    test_data2["temperature5"] = x1_temperature["pred"]
    test_data2["temperature_3days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"]) / 3
    test_data2["temperature_6days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"] + x1_temperature["temperature3"] + x1_temperature["temperature2"] + x1_temperature["temperature1"]) / 6
    test_data2["bloodPressure5"] = x1_bloodPressure["pred"]
    test_data2["bloodPressure_3days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"]) / 3
    test_data2["bloodPressure_6days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"] + x1_bloodPressure["bloodPressure3"] + x1_bloodPressure["bloodPressure2"] + x1_bloodPressure["bloodPressure1"]) / 6
    test_data2["o2Saturation5"] = x1_o2Saturation["pred"]
    test_data2["o2Saturation_3days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"]) / 3
    test_data2["o2Saturation_6days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"] + x1_o2Saturation["o2Saturation3"] + x1_o2Saturation["o2Saturation2"] + x1_o2Saturation["o2Saturation1"]) / 6
    x2 = test_data2.iloc[:,1:][cols]
    y2 = test_data2.iloc[:,0]
    y2_hat = model.predict(x2)
    x2["pred"] = y2_hat
    
    # forecast day3
    # ADM
    test_data3 = build_testdata(test_data2, y2_hat, i+2)
    test_data3["temperature5"] = x2_temperature["pred"]
    test_data3["temperature_3days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"]) / 3
    test_data3["temperature_6days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"] + x2_temperature["temperature3"] + x2_temperature["temperature2"] + x2_temperature["temperature1"]) / 6
    test_data3["bloodPressure5"] = x2_bloodPressure["pred"]
    test_data3["bloodPressure_3days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"]) / 3
    test_data3["bloodPressure_6days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"] + x2_bloodPressure["bloodPressure3"] + x2_bloodPressure["bloodPressure2"] + x2_bloodPressure["bloodPressure1"]) / 6
    test_data3["o2Saturation5"] = x2_o2Saturation["pred"]
    test_data3["o2Saturation_3days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"]) / 3
    test_data3["o2Saturation_6days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"] + x2_o2Saturation["o2Saturation3"] + x2_o2Saturation["o2Saturation2"] + x2_o2Saturation["o2Saturation1"]) / 6
    x3 = test_data3.iloc[:,1:][cols]
    y3 = test_data3.iloc[:,0]
    y3_hat = model.predict(x3)
    x3["pred"] = y3_hat
    
    _y1 += (list(y1))
    _y2 += (list(y2))
    _y3 += (list(y3))
    
    _y1_ += (list(y1_hat))
    _y2_ += (list(y2_hat))
    _y3_ += (list(y3_hat))
    
    y_list_xgboost.extend(list(y1))
    y_list_xgboost.extend(list(y2))
    y_list_xgboost.extend(list(y3))
    y_hat_list_xgboost.extend(list(y1_hat))
    y_hat_list_xgboost.extend(list(y2_hat))
    y_hat_list_xgboost.extend(list(y3_hat))
    y_list_bi_xgboost.extend(np.array(y0) - np.array(y1) > 0)
    y_list_bi_xgboost.extend(np.array(y1) - np.array(y2) > 0)
    y_list_bi_xgboost.extend(np.array(y2) - np.array(y3) > 0)
    y_hat_list_bi_xgboost.extend(np.array(y0) - np.array(y1_hat) > 0)
    y_hat_list_bi_xgboost.extend(np.array(y1_hat) - np.array(y2_hat) > 0)
    y_hat_list_bi_xgboost.extend(np.array(y2_hat) - np.array(y3_hat) > 0)
    y_list_bi_xgboost_3day.extend(np.array(y0) - (np.array(y1) + np.array(y2) + np.array(y3)) / 3  > 0)
    y_hat_list_bi_xgboost_3day.extend(np.array(y0) - (np.array(y1_hat) + np.array(y2_hat) + np.array(y1_hat)) / 3  > 0)
    mse_list_xgboost = mse_list_xgboost.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    mse_xgboost.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))
    

In [None]:
print(sum(np.array(y_hat_list_bi_xgboost) == y_list_bi_xgboost) / len(y_list_bi_xgboost))
print(sum(np.array(y_hat_list_bi_xgboost_3day) == y_list_bi_xgboost_3day) / len(y_list_bi_xgboost_3day))

In [None]:
sum(np.array([random.choice([True, False]) for i in y_list_bi_xgboost]) == y_list_bi_xgboost) / len(y_list_bi_xgboost)

In [None]:
cm = confusion_matrix(y_list_bi_xgboost_3day, y_hat_list_bi_xgboost_3day)
TN = int(cm[0][0])
FP = int(cm[0][1])
FN = int(cm[1][0])
TP = int(cm[1][1])
# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP) 
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate
FPR = FP/(FP+TN)
# False negative rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)

precision = TP / (TP+FP)  # 查准率
recall = TP / (TP+FN)  # 查全率

In [None]:
precision

In [None]:
len(y_list_bi_xgboost_3day) - sum(y_list_bi_xgboost_3day)

In [None]:
1193 / (1737 + 1737)

In [None]:
mean_squared_error(y_list_xgboost,y_hat_list_xgboost)

In [None]:
print("mean : %s" % (sum(sum(i) for i in mse_avg_ADM)/sum(len(i) for i in mse_avg_ADM)))
print("naive : %s" % (sum(sum(i) for i in mse_naive_ADM)/sum(len(i) for i in mse_naive_ADM)))
print("ets : %s" % (sum(sum(i) for i in mse_ets_ADM)/sum(len(i) for i in mse_ets_ADM)))
print("moving average : %s" % (sum(sum(i) for i in mse_movingavg_ADM)/sum(len(i) for i in mse_movingavg_ADM)))
print("arima(1,1,1) : %s" % (sum(sum(i) for i in mse_arima_ADM)/sum(len(i) for i in mse_arima_ADM)))
print("arima(0,1,1) : %s" % (sum(sum(i) for i in mse_arima2_ADM)/sum(len(i) for i in mse_arima2_ADM)))
print("arima(1,1,0) : %s" % (sum(sum(i) for i in mse_arima3_ADM)/sum(len(i) for i in mse_arima3_ADM)))
print("lightgbm : %s" % (mean_squared_error(y_list,y_hat_list)))
print("xgboost : %s" % (mean_squared_error(y_list_xgboost,y_hat_list_xgboost)))

In [None]:
xgb.plot_importance(model, max_num_features=30)


In [None]:
lgb.plot_importance(m_lgb, max_num_features=30)


# forecast ETN

In [None]:
print(mean_squared_error(_y1, _y1_) ** (1/2))
print(mean_squared_error(_y2, _y2_) ** (1/2))
print(mean_squared_error(_y3, _y3_) ** (1/2))
print(mean_squared_error(_y3 + _y2 + _y1, _y3_ + _y2_ + _y1_) ** (1/2))

In [None]:
a = pd.DataFrame({"naive":[sum(x)/len(x) for x in mse_naive_ETN[:50]]
                  ,"mean":[sum(x)/len(x) for x in mse_avg_ETN[:50]]
                  ,"exponential smoothing":[sum(x)/len(x) for x in mse_ets_ETN[:50]]
                  ,"ARIMA(1,1,1)":[sum(x)/len(x) for x in mse_arima_ETN[:50]]
                  ,"ARIMA(0,1,1)":[sum(x)/len(x) for x in mse_arima2_ETN[:50]]
                  ,"ARIMA(1,1,0)":[sum(x)/len(x) for x in mse_arima3_ETN[:50]]
                  ,"Light GBM": mse[:50]
                  ,"Xgboost": mse_xgboost[:50]})
a.plot(figsize=(12,8))

In [None]:
# traditional 

mse_avg_ETN = [[] for i in range(100)]
mse_naive_ETN = [[] for i in range(100)]
mse_ets_ETN = [[] for i in range(100)]
mse_movingavg_ETN = [[] for i in range(100)]
mse_arima_ETN = [[] for i in range(100)]
mse_arima2_ETN = [[] for i in range(100)]
mse_arima3_ETN = [[] for i in range(100)]

y1 = []
y1_ = []
y2 = []
y2_ = []
y3 = []
y3_ = []


for i in range(100):
    for series in time_series_set[i]:
        
        data = series["ETN"]

        # average
        y_hat_1 = [sum(data[:-3])/len(data[:-3])] * 3
        mse_avg_ETN[i].append(mean_squared_error(data[-3:], y_hat_1))

        # naive
        model_2 = ExponentialSmoothing(data[:-3])
        model_fit_2 = model_2.fit()
        y_hat_2 = model_fit_2.predict(len(data)-3, len(data)-1) 
        mse_naive_ETN[i].append(mean_squared_error(data[-3:], y_hat_2))

        # ets
        model_3 = ExponentialSmoothing(data[:-3], trend="add")
        model_fit_3 = model_3.fit()
        y_hat_3 = model_fit_3.predict(len(data)-3, len(data)-1)
        mse_ets_ETN[i].append(mean_squared_error(data[-3:], y_hat_3))
        

        # moving average
        model_5 = ARIMA(data[:-3], order=(0, 0, 1))
        model_fit_5 = model_5.fit()
        y_hat_5 = model_fit_5.predict(len(data)-3, len(data)-1)
        mse_movingavg_ETN[i].append(mean_squared_error(data[-3:], y_hat_5))

        model_6 = ARIMA(data[:-3], order=(1, 1, 1))
        model_fit_6 = model_6.fit()
        y_hat_6 = model_fit_6.predict(len(data)-3, len(data)-1)
        mse_arima_ETN[i].append(mean_squared_error(data[-3:], y_hat_6))

        model_7 = ARIMA(data[:-3], order=(0, 1, 1))
        model_fit_7 = model_7.fit()
        y_hat_7 = model_fit_7.predict(len(data)-3, len(data)-1)
        mse_arima2_ETN[i].append(mean_squared_error(data[-3:], y_hat_7))

        model_8 = ARIMA(data[:-3], order=(1, 1, 0))
        model_fit_8 = model_8.fit()
        y_hat_8 = model_fit_8.predict(len(data)-3, len(data)-1)
        mse_arima3_ETN[i].append(mean_squared_error(data[-3:], y_hat_8))
        
        y1.append(data.values[-3])
        y1_.append(y_hat_8.values[-3])
        y2.append(data.values[-2])
        y2_.append(y_hat_8.values[-2])
        y3.append(data.values[-1])
        y3_.append(y_hat_8.values[-1])


In [None]:
# for machine learning

data = pd.read_csv("cleaned_structured_data.csv", sep = ";")
data = data.set_index(["id"])

dataset = pd.DataFrame()
dataset["id"] = data.reset_index()["id"].unique()
dataset = dataset.set_index("id")
for i in range(107):
    dataset["ETN" + str(i)] = data[data["day"] == i]["ETN"]
    dataset["o2Saturation" + str(i)] = data[data["day"] == i]["o2Saturation"]
    dataset["temperature" + str(i)] = data[data["day"] == i]["temperature"]
    dataset["bloodPressure" + str(i)] = data[data["day"] == i]["bloodPressure"]
    dataset["ETN" + str(i)] = data[data["day"] == i]["ETN"]
dataset["age"] = data.groupby("id").first()["age"]
dataset["gender"] = data.groupby("id").first()["gender"]
dataset["BMI"] = data.groupby("id").mean()["BMI"]
dataset["gender"] = np.where(dataset["gender"] == "Man", 0, 1)


In [None]:
def build_traindata(i):
    train_data = dataset[dataset["ETN" + str(i + 9)].notna()]
    train_data = train_data[["ETN" + str(i + 6), 'age', 'gender', 'BMI', "temperature" + str(i + 5), "temperature" + str(i + 4), "bloodPressure" + str(i + 5), "o2Saturation" + str(i + 5), "ETN" + str(i + 5), "ETN" + str(i + 4), "ETN" + str(i + 3), "ETN" + str(i + 2), "ETN" + str(i + 1), "ETN" + str(i)]]
    train_data.columns = ["ETN6", 'age', 'gender', 'BMI', "temperature5","temperature4", "bloodPressure5", "o2Saturation5", "ETN5", "ETN4", "ETN3", "ETN2", "ETN1", "ETN0"]
    train_data.loc[:,"temperature_3days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)]) / 3
    train_data.loc[:,"temperature_6days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)] + dataset["temperature" + str(i + 2)] + dataset["temperature" + str(i + 1)] + dataset["temperature" + str(i)]) / 6
    train_data.loc[:,"bloodPressure_3days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)]) / 3
    train_data.loc[:,"bloodPressure_6days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)] + dataset["bloodPressure" + str(i + 2)] + dataset["bloodPressure" + str(i + 1)] + dataset["bloodPressure" + str(i)]) / 6
    train_data.loc[:,"o2Saturation_3days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)]) / 3
    train_data.loc[:,"o2Saturation_6days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)] + dataset["o2Saturation" + str(i + 2)] + dataset["o2Saturation" + str(i + 1)] + dataset["o2Saturation" + str(i)]) / 6
    train_data.loc[:,"ETN_6days"] = (dataset["ETN" + str(i + 5)] + dataset["ETN" + str(i + 4)] + dataset["ETN" + str(i + 3)] + dataset["ETN" + str(i + 2)] + dataset["ETN" + str(i + 1)] + dataset["ETN" + str(i)]) / 6
    train_data.loc[:,"ETN_3days"] = (dataset["ETN" + str(i + 5)] + dataset["ETN" + str(i + 4)] + dataset["ETN" + str(i + 3)]) / 3

    return train_data

def build_testdata0(i):
    train_data = dataset[dataset["ETN" + str(i + 8)].notna()]
    train_data = train_data[["ETN" + str(i + 6), 'age', 'gender', 'BMI', "temperature" + str(i + 5), "temperature" + str(i + 4), "bloodPressure" + str(i + 5), "o2Saturation" + str(i + 5), "ETN" + str(i + 5), "ETN" + str(i + 4), "ETN" + str(i + 3), "ETN" + str(i + 2), "ETN" + str(i + 1), "ETN" + str(i)]]
    train_data.columns = ["ETN6", 'age', 'gender', 'BMI', "temperature5", "temperature4", "bloodPressure5", "o2Saturation5", "ETN5", "ETN4", "ETN3", "ETN2", "ETN1", "ETN0"]
    train_data.loc[:,"temperature_3days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)]) / 3
    train_data.loc[:,"temperature_6days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)] + dataset["temperature" + str(i + 2)] + dataset["temperature" + str(i + 1)] + dataset["temperature" + str(i)]) / 6
    train_data.loc[:,"bloodPressure_3days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)]) / 3
    train_data.loc[:,"bloodPressure_6days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)] + dataset["bloodPressure" + str(i + 2)] + dataset["bloodPressure" + str(i + 1)] + dataset["bloodPressure" + str(i)]) / 6
    train_data.loc[:,"o2Saturation_3days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)]) / 3
    train_data.loc[:,"o2Saturation_6days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)] + dataset["o2Saturation" + str(i + 2)] + dataset["o2Saturation" + str(i + 1)] + dataset["o2Saturation" + str(i)]) / 6
    train_data.loc[:,"ETN_6days"] = (dataset["ETN" + str(i + 5)] + dataset["ETN" + str(i + 4)] + dataset["ETN" + str(i + 3)] + dataset["ETN" + str(i + 2)] + dataset["ETN" + str(i + 1)] + dataset["ETN" + str(i)]) / 6
    train_data.loc[:,"ETN_3days"] = (dataset["ETN" + str(i + 5)] + dataset["ETN" + str(i + 4)] + dataset["ETN" + str(i + 3)]) / 3
    return train_data

def build_testdata(train_data, y, i):
    test_data = train_data[train_data.columns]
    for j in range(5):
        test_data["ETN" + str(j)] = test_data["ETN" + str(j + 1)]
    test_data["ETN5"] = y
    test_data["ETN6"] = dataset["ETN" + str(i + 6)]
    test_data.loc[:,"ETN_6days"] = (test_data["ETN0"] + test_data["ETN1"] + test_data["ETN2"] + test_data["ETN3"] + test_data["ETN4"] + test_data["ETN5"]) / 6
    test_data.loc[:,"ETN_3days"] = (test_data["ETN3"] + test_data["ETN4"] + test_data["ETN5"]) / 3

    return test_data


In [None]:
# light gbm

mse = []
y_list = []
y_hat_list = []
mse_list = pd.Series()
train_data = pd.DataFrame()
train_data_temperature = pd.DataFrame()
train_data_bloodPressure = pd.DataFrame()
train_data_o2Saturation = pd.DataFrame()

_y1 = []
_y1_ = []
_y2 = []
_y2_ = []
_y3 = []
_y3_ = []

params = {
          "objective" : "regression",
          "metric" :"rmse",
          "force_row_wise" : True,
          "learning_rate" : 0.015,
          "bagging_freq" : 1,
          "metric": ["mse"],
          'num_iterations' : 200,
          'num_leaves': 100,
          'min_child_samples': 30,
          'min_child_weight': 0.001,
          'bagging_fraction': 0.9,
          'bagging_freq': 2
}

params_xgboost = {
            'max_depth': 2,
            'n_estimators': 100,
            'learning_rate': 0.1
}

cols = ['age', 'gender', 'BMI', 'temperature5', 'bloodPressure5','o2Saturation5', 'ETN5', 'ETN4', 'ETN3','ETN2', 'ETN1', 'ETN0', 'temperature_3days', 'temperature_6days', 'bloodPressure_3days', 'bloodPressure_6days', 'o2Saturation_3days', 'o2Saturation_6days', 'ETN_6days', 'ETN_3days']

for i in range(97):
    # train model
    # temperature
    train_data_temperature = train_data_temperature.append(build_traindata_temperature(i))
    x_temperature = train_data_temperature.iloc[:,1:]
    y_temperature = train_data_temperature.iloc[:,0]
    trainData_temperature = lgb.Dataset(data=x_temperature,label=y_temperature)
    m_lgb_temperature = lgb.train(params, trainData_temperature)
    # bloodPressure
    train_data_bloodPressure = train_data_bloodPressure.append(build_traindata_bloodPressure(i))
    x_bloodPressure = train_data_bloodPressure.iloc[:,1:]
    y_bloodPressure = train_data_bloodPressure.iloc[:,0]
    trainData_bloodPressure = lgb.Dataset(data=x_bloodPressure,label=y_bloodPressure)
    m_lgb_bloodPressure = lgb.train(params, trainData_bloodPressure)
    #o2Saturation
    train_data_o2Saturation = train_data_o2Saturation.append(build_traindata_o2Saturation(i))
    x_o2Saturation = train_data_o2Saturation.iloc[:,1:]
    y_o2Saturation = train_data_o2Saturation.iloc[:,0]
    model_o2Saturation=xgb.XGBRegressor(**params_xgboost)
    model_o2Saturation.fit(x_o2Saturation, y_o2Saturation)
    # ETN
    train_data = train_data.append(build_traindata(i))
    x = train_data.iloc[:,1:][cols]
    y = train_data.iloc[:,0]
    trainData = lgb.Dataset(data=x,label=y)
    m_lgb = lgb.train(params, trainData) 
    
    # forecast day1
    # temperature
    test_data1_temperature = build_testdata0_temperature(i+1)
    x1_temperature = test_data1_temperature.iloc[:,1:]
    y1_temperature = test_data1_temperature.iloc[:,0]
    y1_hat_temperature = m_lgb_temperature.predict(x1_temperature)
    x1_temperature["pred"] = y1_hat_temperature
    # bloodPressure
    test_data1_bloodPressure = build_testdata0_bloodPressure(i+1)
    x1_bloodPressure = test_data1_bloodPressure.iloc[:,1:]
    y1_bloodPressure = test_data1_bloodPressure.iloc[:,0]
    y1_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure)
    x1_bloodPressure["pred"] = y1_hat_bloodPressure
    # o2Saturation
    test_data1_o2Saturation = build_testdata0_o2Saturation(i+1)
    x1_o2Saturation = test_data1_o2Saturation.iloc[:,1:]
    y1_o2Saturation = test_data1_o2Saturation.iloc[:,0]
    y1_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation)
    x1_o2Saturation["pred"] = y1_hat_o2Saturation
    # ETN
    test_data1 = build_testdata0(i+1)
    x1 = test_data1.iloc[:,1:][cols]
    y1 = test_data1.iloc[:,0]
    y1_hat = m_lgb.predict(x1)
    x1["pred"] = y1_hat
    
    # forecast day2
    # temperature
    test_data2_temperature = build_testdata_temperature(test_data1_temperature, y1_hat_temperature, i+2)
    x2_temperature = test_data2_temperature.iloc[:,1:]
    y2_temperature = test_data2_temperature.iloc[:,0]
    y2_hat_temperature = m_lgb_temperature.predict(x1_temperature.iloc[:, :-1])
    x2_temperature["pred"] = y2_hat_temperature
    # bloodPressure
    test_data2_bloodPressure = build_testdata_bloodPressure(test_data1_bloodPressure, y1_hat_bloodPressure, i+2)
    x2_bloodPressure = test_data2_bloodPressure.iloc[:,1:]
    y2_bloodPressure = test_data2_bloodPressure.iloc[:,0]
    y2_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure.iloc[:, :-1])
    x2_bloodPressure["pred"] = y2_hat_bloodPressure
    # o2Saturation
    test_data2_o2Saturation = build_testdata_o2Saturation(test_data1_o2Saturation, y1_hat_o2Saturation, i+2)
    x2_o2Saturation = test_data2_o2Saturation.iloc[:,1:]
    y2_o2Saturation = test_data2_o2Saturation.iloc[:,0]
    y2_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation.iloc[:, :-1])
    x2_o2Saturation["pred"] = y2_hat_o2Saturation
    # ETN
    test_data2 = build_testdata(test_data1, y1_hat, i+2)
    test_data2["temperature5"] = x1_temperature["pred"]
    test_data2["temperature_3days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"]) / 3
    test_data2["temperature_6days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"] + x1_temperature["temperature3"] + x1_temperature["temperature2"] + x1_temperature["temperature1"]) / 6
    test_data2["bloodPressure5"] = x1_bloodPressure["pred"]
    test_data2["bloodPressure_3days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"]) / 3
    test_data2["bloodPressure_6days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"] + x1_bloodPressure["bloodPressure3"] + x1_bloodPressure["bloodPressure2"] + x1_bloodPressure["bloodPressure1"]) / 6
    test_data2["o2Saturation5"] = x1_o2Saturation["pred"]
    test_data2["o2Saturation_3days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"]) / 3
    test_data2["o2Saturation_6days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"] + x1_o2Saturation["o2Saturation3"] + x1_o2Saturation["o2Saturation2"] + x1_o2Saturation["o2Saturation1"]) / 6
    x2 = test_data2.iloc[:,1:][cols]
    y2 = test_data2.iloc[:,0]
    y2_hat = m_lgb.predict(x2)
    x2["pred"] = y2_hat
    
    # forecast day3
    # ETN
    test_data3 = build_testdata(test_data2, y2_hat, i+2)
    test_data3["temperature5"] = x2_temperature["pred"]
    test_data3["temperature_3days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"]) / 3
    test_data3["temperature_6days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"] + x2_temperature["temperature3"] + x2_temperature["temperature2"] + x2_temperature["temperature1"]) / 6
    test_data3["bloodPressure5"] = x2_bloodPressure["pred"]
    test_data3["bloodPressure_3days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"]) / 3
    test_data3["bloodPressure_6days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"] + x2_bloodPressure["bloodPressure3"] + x2_bloodPressure["bloodPressure2"] + x2_bloodPressure["bloodPressure1"]) / 6
    test_data3["o2Saturation5"] = x2_o2Saturation["pred"]
    test_data3["o2Saturation_3days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"]) / 3
    test_data3["o2Saturation_6days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"] + x2_o2Saturation["o2Saturation3"] + x2_o2Saturation["o2Saturation2"] + x2_o2Saturation["o2Saturation1"]) / 6
    x3 = test_data3.iloc[:,1:][cols]
    y3 = test_data3.iloc[:,0]
    y3_hat = m_lgb.predict(x3)
    x3["pred"] = y3_hat
    
    y_list.extend(list(y1))
    y_list.extend(list(y2))
    y_list.extend(list(y3))
    
    y_hat_list.extend(list(y1_hat))
    y_hat_list.extend(list(y2_hat))
    y_hat_list.extend(list(y3_hat))
    
    _y1 += (list(y1))
    _y2 += (list(y2))
    _y3 += (list(y3))
    
    _y1_ += (list(y1_hat))
    _y2_ += (list(y2_hat))
    _y3_ += (list(y3_hat))
    
    
    mse_list = mse_list.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    
    mse.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))
    


In [None]:
# xgboost

mse_xgboost = []
y_list_xgboost = []
y_hat_list_xgboost = []
y_list_bi_xgboost = []
y_hat_list_bi_xgboost = []
y_list_bi_xgboost_3day = []
y_hat_list_bi_xgboost_3day = []
mse_list_xgboost = pd.Series()
train_data = pd.DataFrame()
train_data_temperature = pd.DataFrame()
train_data_bloodPressure = pd.DataFrame()
train_data_o2Saturation = pd.DataFrame()

_y1 = []
_y1_ = []
_y2 = []
_y2_ = []
_y3 = []
_y3_ = []

params0 = {
            'max_depth': 3,
            'n_estimators': 50,
            'learning_rate': 0.09
}

params = {
          "objective" : "regression",
          "metric" :"rmse",
          "force_row_wise" : True,
          "learning_rate" : 0.015,
          "bagging_freq" : 1,
          "metric": ["mse"],
          'num_iterations' : 200,
          'num_leaves': 100,
          'min_child_samples': 30,
          'min_child_weight': 0.001,
          'bagging_fraction': 0.9,
          'bagging_freq': 2
}

params_xgboost = {
            'max_depth': 2,
            'n_estimators': 100,
            'learning_rate': 0.1
}

cols = ['age', 'gender', 'BMI', 'temperature5', 'bloodPressure5','o2Saturation5', 'ETN5', 'ETN4', 'ETN3','ETN2', 'ETN1', 'ETN0', 'temperature_3days', 'temperature_6days', 'bloodPressure_3days', 'bloodPressure_6days', 'o2Saturation_3days', 'o2Saturation_6days', 'ETN_6days', 'ETN_3days']

for i in range(97):
    # train model
    # temperature
    train_data_temperature = train_data_temperature.append(build_traindata_temperature(i))
    x_temperature = train_data_temperature.iloc[:,1:]
    y_temperature = train_data_temperature.iloc[:,0]
    trainData_temperature = lgb.Dataset(data=x_temperature,label=y_temperature)
    m_lgb_temperature = lgb.train(params, trainData_temperature)
    # bloodPressure
    train_data_bloodPressure = train_data_bloodPressure.append(build_traindata_bloodPressure(i))
    x_bloodPressure = train_data_bloodPressure.iloc[:,1:]
    y_bloodPressure = train_data_bloodPressure.iloc[:,0]
    trainData_bloodPressure = lgb.Dataset(data=x_bloodPressure,label=y_bloodPressure)
    m_lgb_bloodPressure = lgb.train(params, trainData_bloodPressure)
    #o2Saturation
    train_data_o2Saturation = train_data_o2Saturation.append(build_traindata_o2Saturation(i))
    x_o2Saturation = train_data_o2Saturation.iloc[:,1:]
    y_o2Saturation = train_data_o2Saturation.iloc[:,0]
    model_o2Saturation=xgb.XGBRegressor(**params_xgboost)
    model_o2Saturation.fit(x_o2Saturation, y_o2Saturation)
    # ETN
    train_data = train_data.append(build_traindata(i))
    x = train_data.iloc[:,1:][cols]
    y = train_data.iloc[:,0]
    y0 = build_traindata(i).iloc[:,0]
    model=xgb.XGBRegressor(**params0)
    model.fit(x, y)
    
    # forecast day1
    # temperature
    test_data1_temperature = build_testdata0_temperature(i+1)
    x1_temperature = test_data1_temperature.iloc[:,1:]
    y1_temperature = test_data1_temperature.iloc[:,0]
    y1_hat_temperature = m_lgb_temperature.predict(x1_temperature)
    x1_temperature["pred"] = y1_hat_temperature
    # bloodPressure
    test_data1_bloodPressure = build_testdata0_bloodPressure(i+1)
    x1_bloodPressure = test_data1_bloodPressure.iloc[:,1:]
    y1_bloodPressure = test_data1_bloodPressure.iloc[:,0]
    y1_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure)
    x1_bloodPressure["pred"] = y1_hat_bloodPressure
    # o2Saturation
    test_data1_o2Saturation = build_testdata0_o2Saturation(i+1)
    x1_o2Saturation = test_data1_o2Saturation.iloc[:,1:]
    y1_o2Saturation = test_data1_o2Saturation.iloc[:,0]
    y1_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation)
    x1_o2Saturation["pred"] = y1_hat_o2Saturation
    # ETN
    test_data1 = build_testdata0(i+1)
    x1 = test_data1.iloc[:,1:][cols]
    y1 = test_data1.iloc[:,0]
    y1_hat = model.predict(x1)
    x1["pred"] = y1_hat
    
    # forecast day2
    # temperature
    test_data2_temperature = build_testdata_temperature(test_data1_temperature, y1_hat_temperature, i+2)
    x2_temperature = test_data2_temperature.iloc[:,1:]
    y2_temperature = test_data2_temperature.iloc[:,0]
    y2_hat_temperature = m_lgb_temperature.predict(x1_temperature.iloc[:, :-1])
    x2_temperature["pred"] = y2_hat_temperature
    # bloodPressure
    test_data2_bloodPressure = build_testdata_bloodPressure(test_data1_bloodPressure, y1_hat_bloodPressure, i+2)
    x2_bloodPressure = test_data2_bloodPressure.iloc[:,1:]
    y2_bloodPressure = test_data2_bloodPressure.iloc[:,0]
    y2_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure.iloc[:, :-1])
    x2_bloodPressure["pred"] = y2_hat_bloodPressure
    # o2Saturation
    test_data2_o2Saturation = build_testdata_o2Saturation(test_data1_o2Saturation, y1_hat_o2Saturation, i+2)
    x2_o2Saturation = test_data2_o2Saturation.iloc[:,1:]
    y2_o2Saturation = test_data2_o2Saturation.iloc[:,0]
    y2_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation.iloc[:, :-1])
    x2_o2Saturation["pred"] = y2_hat_o2Saturation
    # ETN
    test_data2 = build_testdata(test_data1, y1_hat, i+2)
    test_data2["temperature5"] = x1_temperature["pred"]
    test_data2["temperature_3days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"]) / 3
    test_data2["temperature_6days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"] + x1_temperature["temperature3"] + x1_temperature["temperature2"] + x1_temperature["temperature1"]) / 6
    test_data2["bloodPressure5"] = x1_bloodPressure["pred"]
    test_data2["bloodPressure_3days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"]) / 3
    test_data2["bloodPressure_6days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"] + x1_bloodPressure["bloodPressure3"] + x1_bloodPressure["bloodPressure2"] + x1_bloodPressure["bloodPressure1"]) / 6
    test_data2["o2Saturation5"] = x1_o2Saturation["pred"]
    test_data2["o2Saturation_3days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"]) / 3
    test_data2["o2Saturation_6days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"] + x1_o2Saturation["o2Saturation3"] + x1_o2Saturation["o2Saturation2"] + x1_o2Saturation["o2Saturation1"]) / 6
    x2 = test_data2.iloc[:,1:][cols]
    y2 = test_data2.iloc[:,0]
    y2_hat = model.predict(x2)
    x2["pred"] = y2_hat
    
    # forecast day3
    # ETN
    test_data3 = build_testdata(test_data2, y2_hat, i+2)
    test_data3["temperature5"] = x2_temperature["pred"]
    test_data3["temperature_3days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"]) / 3
    test_data3["temperature_6days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"] + x2_temperature["temperature3"] + x2_temperature["temperature2"] + x2_temperature["temperature1"]) / 6
    test_data3["bloodPressure5"] = x2_bloodPressure["pred"]
    test_data3["bloodPressure_3days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"]) / 3
    test_data3["bloodPressure_6days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"] + x2_bloodPressure["bloodPressure3"] + x2_bloodPressure["bloodPressure2"] + x2_bloodPressure["bloodPressure1"]) / 6
    test_data3["o2Saturation5"] = x2_o2Saturation["pred"]
    test_data3["o2Saturation_3days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"]) / 3
    test_data3["o2Saturation_6days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"] + x2_o2Saturation["o2Saturation3"] + x2_o2Saturation["o2Saturation2"] + x2_o2Saturation["o2Saturation1"]) / 6
    x3 = test_data3.iloc[:,1:][cols]
    y3 = test_data3.iloc[:,0]
    y3_hat = model.predict(x3)
    x3["pred"] = y3_hat
    
    _y1 += (list(y1))
    _y2 += (list(y2))
    _y3 += (list(y3))
    
    _y1_ += (list(y1_hat))
    _y2_ += (list(y2_hat))
    _y3_ += (list(y3_hat))
    
    y_list_xgboost.extend(list(y1))
    y_list_xgboost.extend(list(y2))
    y_list_xgboost.extend(list(y3))
    y_hat_list_xgboost.extend(list(y1_hat))
    y_hat_list_xgboost.extend(list(y2_hat))
    y_hat_list_xgboost.extend(list(y3_hat))
    y_list_bi_xgboost.extend(np.array(y0) - np.array(y1) > 0)
    y_list_bi_xgboost.extend(np.array(y1) - np.array(y2) > 0)
    y_list_bi_xgboost.extend(np.array(y2) - np.array(y3) > 0)
    y_hat_list_bi_xgboost.extend(np.array(y0) - np.array(y1_hat) > 0)
    y_hat_list_bi_xgboost.extend(np.array(y1_hat) - np.array(y2_hat) > 0)
    y_hat_list_bi_xgboost.extend(np.array(y2_hat) - np.array(y3_hat) > 0)
    y_list_bi_xgboost_3day.extend(np.array(y0) - (np.array(y1) + np.array(y2) + np.array(y3)) / 3  > 0)
    y_hat_list_bi_xgboost_3day.extend(np.array(y0) - (np.array(y1_hat) + np.array(y2_hat) + np.array(y1_hat)) / 3  > 0)
    mse_list_xgboost = mse_list_xgboost.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    mse_xgboost.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))
    


In [None]:
print("mean : %s" % (sum(sum(i) for i in mse_avg_ETN)/sum(len(i) for i in mse_avg_ETN)))
print("naive : %s" % (sum(sum(i) for i in mse_naive_ETN)/sum(len(i) for i in mse_naive_ETN)))
print("ets : %s" % (sum(sum(i) for i in mse_ets_ETN)/sum(len(i) for i in mse_ets_ETN)))
print("moving average : %s" % (sum(sum(i) for i in mse_movingavg_ETN)/sum(len(i) for i in mse_movingavg_ETN)))
print("arima(1,1,1) : %s" % (sum(sum(i) for i in mse_arima_ETN)/sum(len(i) for i in mse_arima_ETN)))
print("arima(0,1,1) : %s" % (sum(sum(i) for i in mse_arima2_ETN)/sum(len(i) for i in mse_arima2_ETN)))
print("arima(1,1,0) : %s" % (sum(sum(i) for i in mse_arima3_ETN)/sum(len(i) for i in mse_arima3_ETN)))
print("lightgbm : %s" % (mean_squared_error(y_list,y_hat_list)))
print("xgboost : %s" % (mean_squared_error(y_list_xgboost,y_hat_list_xgboost)))


In [None]:
print(sum(np.array(y_hat_list_bi_xgboost) == y_list_bi_xgboost) / len(y_list_bi_xgboost))
print(sum(np.array(y_hat_list_bi_xgboost_3day) == y_list_bi_xgboost_3day) / len(y_list_bi_xgboost_3day))
print(sum(np.array([random.choice([True, False]) for i in y_list_bi_xgboost]) == y_list_bi_xgboost) / len(y_list_bi_xgboost))

# forecast STM

In [None]:
print(mean_squared_error(_y1, _y1_) ** (1/2))
print(mean_squared_error(_y2, _y2_) ** (1/2))
print(mean_squared_error(_y3, _y3_) ** (1/2))
print(mean_squared_error(_y3 + _y2 + _y1, _y3_ + _y2_ + _y1_) ** (1/2))

In [None]:
mse_naive_STM[30]

In [None]:
a = pd.DataFrame({"naive":[sum(x)/len(x) for x in mse_naive_STM[:50]]
                  ,"mean":[sum(x)/len(x) for x in mse_avg_STM[:50]]
                  ,"exponential smoothing":[sum(x)/len(x) for x in mse_ets_STM[:50]]
                  ,"ARIMA(1,1,1)":[sum(x)/len(x) for x in mse_arima_STM[:50]]
                  ,"ARIMA(0,1,1)":[sum(x)/len(x) for x in mse_arima2_STM[:50]]
                  ,"ARIMA(1,1,0)":[sum(x)/len(x) for x in mse_arima3_STM[:50]]
                  ,"Light GBM": mse[:50]
                  ,"Xgboost": mse_xgboost[:50]})
a.plot(figsize=(12,8))

In [None]:
# traditional 

mse_avg_STM = [[] for i in range(100)]
mse_naive_STM = [[] for i in range(100)]
mse_ets_STM = [[] for i in range(100)]
mse_movingavg_STM = [[] for i in range(100)]
mse_arima_STM = [[] for i in range(100)]
mse_arima2_STM = [[] for i in range(100)]
mse_arima3_STM = [[] for i in range(100)]

_y1 = []
_y1_ = []
_y2 = []
_y2_ = []
_y3 = []
_y3_ = []

for i in range(100):
    for series in time_series_set[i]:
        
        data = series["STM"]

        # average
        y_hat_1 = [sum(data[:-3])/len(data[:-3])] * 3
        mse_avg_STM[i].append(mean_squared_error(data[-3:], y_hat_1))

        # naive
        model_2 = ExponentialSmoothing(data[:-3])
        model_fit_2 = model_2.fit()
        y_hat_2 = model_fit_2.predict(len(data)-3, len(data)-1) 
        mse_naive_STM[i].append(mean_squared_error(data[-3:], y_hat_2))

        # ets
        model_3 = ExponentialSmoothing(data[:-3], trend="add")
        model_fit_3 = model_3.fit()
        y_hat_3 = model_fit_3.predict(len(data)-3, len(data)-1)
        mse_ets_STM[i].append(mean_squared_error(data[-3:], y_hat_3))

        # moving average
        model_5 = ARIMA(data[:-3], order=(0, 0, 1))
        model_fit_5 = model_5.fit()
        y_hat_5 = model_fit_5.predict(len(data)-3, len(data)-1)
        mse_movingavg_STM[i].append(mean_squared_error(data[-3:], y_hat_5))

        model_6 = ARIMA(data[:-3], order=(1, 1, 1))
        model_fit_6 = model_6.fit()
        y_hat_6 = model_fit_6.predict(len(data)-3, len(data)-1)
        mse_arima_STM[i].append(mean_squared_error(data[-3:], y_hat_6))

        model_7 = ARIMA(data[:-3], order=(0, 1, 1))
        model_fit_7 = model_7.fit()
        y_hat_7 = model_fit_7.predict(len(data)-3, len(data)-1)
        mse_arima2_STM[i].append(mean_squared_error(data[-3:], y_hat_7))
        
        model_8 = ARIMA(data[:-3], order=(1, 1, 0))
        model_fit_8 = model_8.fit()
        y_hat_8 = model_fit_8.predict(len(data)-3, len(data)-1)
        mse_arima3_STM[i].append(mean_squared_error(data[-3:], y_hat_8))

        _y1.append(data.values[-3])
        _y1_.append(y_hat_8.values[-3])
        _y2.append(data.values[-2])
        _y2_.append(y_hat_8.values[-2])
        _y3.append(data.values[-1])
        _y3_.append(y_hat_8.values[-1])

In [None]:
# for machine learning

data = pd.read_csv("cleaned_structured_data.csv", sep = ";")
data = data.set_index(["id"])

dataset = pd.DataFrame()
dataset["id"] = data.reset_index()["id"].unique()
dataset = dataset.set_index("id")
for i in range(107):
    dataset["STM" + str(i)] = data[data["day"] == i]["STM"]
    dataset["o2Saturation" + str(i)] = data[data["day"] == i]["o2Saturation"]
    dataset["temperature" + str(i)] = data[data["day"] == i]["temperature"]
    dataset["bloodPressure" + str(i)] = data[data["day"] == i]["bloodPressure"]
    dataset["STM" + str(i)] = data[data["day"] == i]["STM"]
dataset["age"] = data.groupby("id").first()["age"]
dataset["gender"] = data.groupby("id").first()["gender"]
dataset["BMI"] = data.groupby("id").mean()["BMI"]
dataset["gender"] = np.where(dataset["gender"] == "Man", 0, 1)


In [None]:
def build_traindata(i):
    train_data = dataset[dataset["STM" + str(i + 9)].notna()]
    train_data = train_data[["STM" + str(i + 6), 'age', 'gender', 'BMI', "temperature" + str(i + 5), "temperature" + str(i + 4), "bloodPressure" + str(i + 5), "o2Saturation" + str(i + 5), "STM" + str(i + 5), "STM" + str(i + 4), "STM" + str(i + 3), "STM" + str(i + 2), "STM" + str(i + 1), "STM" + str(i)]]
    train_data.columns = ["STM6", 'age', 'gender', 'BMI', "temperature5","temperature4", "bloodPressure5", "o2Saturation5", "STM5", "STM4", "STM3", "STM2", "STM1", "STM0"]
    train_data.loc[:,"temperature_3days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)]) / 3
    train_data.loc[:,"temperature_6days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)] + dataset["temperature" + str(i + 2)] + dataset["temperature" + str(i + 1)] + dataset["temperature" + str(i)]) / 6
    train_data.loc[:,"bloodPressure_3days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)]) / 3
    train_data.loc[:,"bloodPressure_6days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)] + dataset["bloodPressure" + str(i + 2)] + dataset["bloodPressure" + str(i + 1)] + dataset["bloodPressure" + str(i)]) / 6
    train_data.loc[:,"o2Saturation_3days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)]) / 3
    train_data.loc[:,"o2Saturation_6days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)] + dataset["o2Saturation" + str(i + 2)] + dataset["o2Saturation" + str(i + 1)] + dataset["o2Saturation" + str(i)]) / 6
    train_data.loc[:,"STM_6days"] = (dataset["STM" + str(i + 5)] + dataset["STM" + str(i + 4)] + dataset["STM" + str(i + 3)] + dataset["STM" + str(i + 2)] + dataset["STM" + str(i + 1)] + dataset["STM" + str(i)]) / 6
    train_data.loc[:,"STM_3days"] = (dataset["STM" + str(i + 5)] + dataset["STM" + str(i + 4)] + dataset["STM" + str(i + 3)]) / 3

    return train_data

def build_testdata0(i):
    train_data = dataset[dataset["STM" + str(i + 8)].notna()]
    train_data = train_data[["STM" + str(i + 6), 'age', 'gender', 'BMI', "temperature" + str(i + 5), "temperature" + str(i + 4), "bloodPressure" + str(i + 5), "o2Saturation" + str(i + 5), "STM" + str(i + 5), "STM" + str(i + 4), "STM" + str(i + 3), "STM" + str(i + 2), "STM" + str(i + 1), "STM" + str(i)]]
    train_data.columns = ["STM6", 'age', 'gender', 'BMI', "temperature5", "temperature4", "bloodPressure5", "o2Saturation5", "STM5", "STM4", "STM3", "STM2", "STM1", "STM0"]
    train_data.loc[:,"temperature_3days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)]) / 3
    train_data.loc[:,"temperature_6days"] = (dataset["temperature" + str(i + 5)] + dataset["temperature" + str(i + 4)] + dataset["temperature" + str(i + 3)] + dataset["temperature" + str(i + 2)] + dataset["temperature" + str(i + 1)] + dataset["temperature" + str(i)]) / 6
    train_data.loc[:,"bloodPressure_3days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)]) / 3
    train_data.loc[:,"bloodPressure_6days"] = (dataset["bloodPressure" + str(i + 5)] + dataset["bloodPressure" + str(i + 4)] + dataset["bloodPressure" + str(i + 3)] + dataset["bloodPressure" + str(i + 2)] + dataset["bloodPressure" + str(i + 1)] + dataset["bloodPressure" + str(i)]) / 6
    train_data.loc[:,"o2Saturation_3days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)]) / 3
    train_data.loc[:,"o2Saturation_6days"] = (dataset["o2Saturation" + str(i + 5)] + dataset["o2Saturation" + str(i + 4)] + dataset["o2Saturation" + str(i + 3)] + dataset["o2Saturation" + str(i + 2)] + dataset["o2Saturation" + str(i + 1)] + dataset["o2Saturation" + str(i)]) / 6
    train_data.loc[:,"STM_6days"] = (dataset["STM" + str(i + 5)] + dataset["STM" + str(i + 4)] + dataset["STM" + str(i + 3)] + dataset["STM" + str(i + 2)] + dataset["STM" + str(i + 1)] + dataset["STM" + str(i)]) / 6
    train_data.loc[:,"STM_3days"] = (dataset["STM" + str(i + 5)] + dataset["STM" + str(i + 4)] + dataset["STM" + str(i + 3)]) / 3
    return train_data

def build_testdata(train_data, y, i):
    test_data = train_data[train_data.columns]
    for j in range(5):
        test_data["STM" + str(j)] = test_data["STM" + str(j + 1)]
    test_data["STM5"] = y
    test_data["STM6"] = dataset["STM" + str(i + 6)]
    test_data.loc[:,"STM_6days"] = (test_data["STM0"] + test_data["STM1"] + test_data["STM2"] + test_data["STM3"] + test_data["STM4"] + test_data["STM5"]) / 6
    test_data.loc[:,"STM_3days"] = (test_data["STM3"] + test_data["STM4"] + test_data["STM5"]) / 3

    return test_data


In [None]:
build_traindata(0).columns

In [None]:
# light gbm

mse = []
y_list = []
y_hat_list = []
mse_list = pd.Series()
train_data = pd.DataFrame()
train_data_temperature = pd.DataFrame()
train_data_bloodPressure = pd.DataFrame()
train_data_o2Saturation = pd.DataFrame()

_y1 = []
_y1_ = []
_y2 = []
_y2_ = []
_y3 = []
_y3_ = []

params = {
          "objective" : "regression",
          "metric" :"rmse",
          "force_row_wise" : True,
          "learning_rate" : 0.015,
          "bagging_freq" : 1,
          "metric": ["mse"],
          'num_iterations' : 200,
          'num_leaves': 100,
          'min_child_samples': 30,
          'min_child_weight': 0.001,
          'bagging_fraction': 0.9,
          'bagging_freq': 2
}

params_xgboost = {
            'max_depth': 2,
            'n_estimators': 100,
            'learning_rate': 0.1
}

cols = ['age', 'gender', 'BMI', 'temperature5', 'bloodPressure5','o2Saturation5', 'STM5', 'STM4', 'STM3','STM2', 'STM1', 'STM0', 'temperature_3days', 'temperature_6days', 'bloodPressure_3days', 'bloodPressure_6days', 'o2Saturation_3days', 'o2Saturation_6days', 'STM_6days', 'STM_3days']

for i in range(97):
    # train model
    # temperature
    train_data_temperature = train_data_temperature.append(build_traindata_temperature(i))
    x_temperature = train_data_temperature.iloc[:,1:]
    y_temperature = train_data_temperature.iloc[:,0]
    trainData_temperature = lgb.Dataset(data=x_temperature,label=y_temperature)
    m_lgb_temperature = lgb.train(params, trainData_temperature)
    # bloodPressure
    train_data_bloodPressure = train_data_bloodPressure.append(build_traindata_bloodPressure(i))
    x_bloodPressure = train_data_bloodPressure.iloc[:,1:]
    y_bloodPressure = train_data_bloodPressure.iloc[:,0]
    trainData_bloodPressure = lgb.Dataset(data=x_bloodPressure,label=y_bloodPressure)
    m_lgb_bloodPressure = lgb.train(params, trainData_bloodPressure)
    #o2Saturation
    train_data_o2Saturation = train_data_o2Saturation.append(build_traindata_o2Saturation(i))
    x_o2Saturation = train_data_o2Saturation.iloc[:,1:]
    y_o2Saturation = train_data_o2Saturation.iloc[:,0]
    model_o2Saturation=xgb.XGBRegressor(**params_xgboost)
    model_o2Saturation.fit(x_o2Saturation, y_o2Saturation)
    # STM
    train_data = train_data.append(build_traindata(i))
    x = train_data.iloc[:,1:][cols]
    y = train_data.iloc[:,0]
    trainData = lgb.Dataset(data=x,label=y)
    m_lgb = lgb.train(params, trainData) 
    
    # forecast day1
    # temperature
    test_data1_temperature = build_testdata0_temperature(i+1)
    x1_temperature = test_data1_temperature.iloc[:,1:]
    y1_temperature = test_data1_temperature.iloc[:,0]
    y1_hat_temperature = m_lgb_temperature.predict(x1_temperature)
    x1_temperature["pred"] = y1_hat_temperature
    # bloodPressure
    test_data1_bloodPressure = build_testdata0_bloodPressure(i+1)
    x1_bloodPressure = test_data1_bloodPressure.iloc[:,1:]
    y1_bloodPressure = test_data1_bloodPressure.iloc[:,0]
    y1_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure)
    x1_bloodPressure["pred"] = y1_hat_bloodPressure
    # o2Saturation
    test_data1_o2Saturation = build_testdata0_o2Saturation(i+1)
    x1_o2Saturation = test_data1_o2Saturation.iloc[:,1:]
    y1_o2Saturation = test_data1_o2Saturation.iloc[:,0]
    y1_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation)
    x1_o2Saturation["pred"] = y1_hat_o2Saturation
    # STM
    test_data1 = build_testdata0(i+1)
    x1 = test_data1.iloc[:,1:][cols]
    y1 = test_data1.iloc[:,0]
    y1_hat = m_lgb.predict(x1)
    x1["pred"] = y1_hat
    
    # forecast day2
    # temperature
    test_data2_temperature = build_testdata_temperature(test_data1_temperature, y1_hat_temperature, i+2)
    x2_temperature = test_data2_temperature.iloc[:,1:]
    y2_temperature = test_data2_temperature.iloc[:,0]
    y2_hat_temperature = m_lgb_temperature.predict(x1_temperature.iloc[:, :-1])
    x2_temperature["pred"] = y2_hat_temperature
    # bloodPressure
    test_data2_bloodPressure = build_testdata_bloodPressure(test_data1_bloodPressure, y1_hat_bloodPressure, i+2)
    x2_bloodPressure = test_data2_bloodPressure.iloc[:,1:]
    y2_bloodPressure = test_data2_bloodPressure.iloc[:,0]
    y2_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure.iloc[:, :-1])
    x2_bloodPressure["pred"] = y2_hat_bloodPressure
    # o2Saturation
    test_data2_o2Saturation = build_testdata_o2Saturation(test_data1_o2Saturation, y1_hat_o2Saturation, i+2)
    x2_o2Saturation = test_data2_o2Saturation.iloc[:,1:]
    y2_o2Saturation = test_data2_o2Saturation.iloc[:,0]
    y2_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation.iloc[:, :-1])
    x2_o2Saturation["pred"] = y2_hat_o2Saturation
    # STM
    test_data2 = build_testdata(test_data1, y1_hat, i+2)
    test_data2["temperature5"] = x1_temperature["pred"]
    test_data2["temperature_3days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"]) / 3
    test_data2["temperature_6days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"] + x1_temperature["temperature3"] + x1_temperature["temperature2"] + x1_temperature["temperature1"]) / 6
    test_data2["bloodPressure5"] = x1_bloodPressure["pred"]
    test_data2["bloodPressure_3days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"]) / 3
    test_data2["bloodPressure_6days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"] + x1_bloodPressure["bloodPressure3"] + x1_bloodPressure["bloodPressure2"] + x1_bloodPressure["bloodPressure1"]) / 6
    test_data2["o2Saturation5"] = x1_o2Saturation["pred"]
    test_data2["o2Saturation_3days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"]) / 3
    test_data2["o2Saturation_6days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"] + x1_o2Saturation["o2Saturation3"] + x1_o2Saturation["o2Saturation2"] + x1_o2Saturation["o2Saturation1"]) / 6
    x2 = test_data2.iloc[:,1:][cols]
    y2 = test_data2.iloc[:,0]
    y2_hat = m_lgb.predict(x2)
    x2["pred"] = y2_hat
    
    # forecast day3
    # STM
    test_data3 = build_testdata(test_data2, y2_hat, i+2)
    test_data3["temperature5"] = x2_temperature["pred"]
    test_data3["temperature_3days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"]) / 3
    test_data3["temperature_6days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"] + x2_temperature["temperature3"] + x2_temperature["temperature2"] + x2_temperature["temperature1"]) / 6
    test_data3["bloodPressure5"] = x2_bloodPressure["pred"]
    test_data3["bloodPressure_3days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"]) / 3
    test_data3["bloodPressure_6days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"] + x2_bloodPressure["bloodPressure3"] + x2_bloodPressure["bloodPressure2"] + x2_bloodPressure["bloodPressure1"]) / 6
    test_data3["o2Saturation5"] = x2_o2Saturation["pred"]
    test_data3["o2Saturation_3days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"]) / 3
    test_data3["o2Saturation_6days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"] + x2_o2Saturation["o2Saturation3"] + x2_o2Saturation["o2Saturation2"] + x2_o2Saturation["o2Saturation1"]) / 6
    x3 = test_data3.iloc[:,1:][cols]
    y3 = test_data3.iloc[:,0]
    y3_hat = m_lgb.predict(x3)
    x3["pred"] = y3_hat
    
    y_list.extend(list(y1))
    y_list.extend(list(y2))
    y_list.extend(list(y3))
    
    y_hat_list.extend(list(y1_hat))
    y_hat_list.extend(list(y2_hat))
    y_hat_list.extend(list(y3_hat))
    
    _y1 += (list(y1))
    _y2 += (list(y2))
    _y3 += (list(y3))
    
    _y1_ += (list(y1_hat))
    _y2_ += (list(y2_hat))
    _y3_ += (list(y3_hat))
    
    
    mse_list = mse_list.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    
    mse.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))
    


In [None]:
# xgboost

mse_xgboost = []
y_list_xgboost = []
y_hat_list_xgboost = []
y_list_bi_xgboost = []
y_hat_list_bi_xgboost = []
y_list_bi_xgboost_3day = []
y_hat_list_bi_xgboost_3day = []
mse_list_xgboost = pd.Series()
train_data = pd.DataFrame()
train_data_temperature = pd.DataFrame()
train_data_bloodPressure = pd.DataFrame()
train_data_o2Saturation = pd.DataFrame()

_y1 = []
_y1_ = []
_y2 = []
_y2_ = []
_y3 = []
_y3_ = []

params0 = {
            'max_depth': 3,
            'n_estimators': 50,
            'learning_rate': 0.09
}

params = {
          "objective" : "regression",
          "metric" :"rmse",
          "force_row_wise" : True,
          "learning_rate" : 0.015,
          "bagging_freq" : 1,
          "metric": ["mse"],
          'num_iterations' : 200,
          'num_leaves': 100,
          'min_child_samples': 30,
          'min_child_weight': 0.001,
          'bagging_fraction': 0.9,
          'bagging_freq': 2
}

params_xgboost = {
            'max_depth': 2,
            'n_estimators': 100,
            'learning_rate': 0.1
}

cols = ['age', 'gender', 'BMI', 'temperature5', 'bloodPressure5','o2Saturation5', 'STM5', 'STM4', 'STM3','STM2', 'STM1', 'STM0', 'temperature_3days', 'temperature_6days', 'bloodPressure_3days', 'bloodPressure_6days', 'o2Saturation_3days', 'o2Saturation_6days', 'STM_6days', 'STM_3days']

for i in range(97):
    # train model
    # temperature
    train_data_temperature = train_data_temperature.append(build_traindata_temperature(i))
    x_temperature = train_data_temperature.iloc[:,1:]
    y_temperature = train_data_temperature.iloc[:,0]
    trainData_temperature = lgb.Dataset(data=x_temperature,label=y_temperature)
    m_lgb_temperature = lgb.train(params, trainData_temperature)
    # bloodPressure
    train_data_bloodPressure = train_data_bloodPressure.append(build_traindata_bloodPressure(i))
    x_bloodPressure = train_data_bloodPressure.iloc[:,1:]
    y_bloodPressure = train_data_bloodPressure.iloc[:,0]
    trainData_bloodPressure = lgb.Dataset(data=x_bloodPressure,label=y_bloodPressure)
    m_lgb_bloodPressure = lgb.train(params, trainData_bloodPressure)
    #o2Saturation
    train_data_o2Saturation = train_data_o2Saturation.append(build_traindata_o2Saturation(i))
    x_o2Saturation = train_data_o2Saturation.iloc[:,1:]
    y_o2Saturation = train_data_o2Saturation.iloc[:,0]
    model_o2Saturation=xgb.XGBRegressor(**params_xgboost)
    model_o2Saturation.fit(x_o2Saturation, y_o2Saturation)
    # STM
    train_data = train_data.append(build_traindata(i))
    x = train_data.iloc[:,1:][cols]
    y = train_data.iloc[:,0]
    y0 = build_traindata(i).iloc[:,0]
    model=xgb.XGBRegressor(**params0)
    model.fit(x, y)
    
    # forecast day1
    # temperature
    test_data1_temperature = build_testdata0_temperature(i+1)
    x1_temperature = test_data1_temperature.iloc[:,1:]
    y1_temperature = test_data1_temperature.iloc[:,0]
    y1_hat_temperature = m_lgb_temperature.predict(x1_temperature)
    x1_temperature["pred"] = y1_hat_temperature
    # bloodPressure
    test_data1_bloodPressure = build_testdata0_bloodPressure(i+1)
    x1_bloodPressure = test_data1_bloodPressure.iloc[:,1:]
    y1_bloodPressure = test_data1_bloodPressure.iloc[:,0]
    y1_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure)
    x1_bloodPressure["pred"] = y1_hat_bloodPressure
    # o2Saturation
    test_data1_o2Saturation = build_testdata0_o2Saturation(i+1)
    x1_o2Saturation = test_data1_o2Saturation.iloc[:,1:]
    y1_o2Saturation = test_data1_o2Saturation.iloc[:,0]
    y1_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation)
    x1_o2Saturation["pred"] = y1_hat_o2Saturation
    # STM
    test_data1 = build_testdata0(i+1)
    x1 = test_data1.iloc[:,1:][cols]
    y1 = test_data1.iloc[:,0]
    y1_hat = model.predict(x1)
    x1["pred"] = y1_hat
    
    # forecast day2
    # temperature
    test_data2_temperature = build_testdata_temperature(test_data1_temperature, y1_hat_temperature, i+2)
    x2_temperature = test_data2_temperature.iloc[:,1:]
    y2_temperature = test_data2_temperature.iloc[:,0]
    y2_hat_temperature = m_lgb_temperature.predict(x1_temperature.iloc[:, :-1])
    x2_temperature["pred"] = y2_hat_temperature
    # bloodPressure
    test_data2_bloodPressure = build_testdata_bloodPressure(test_data1_bloodPressure, y1_hat_bloodPressure, i+2)
    x2_bloodPressure = test_data2_bloodPressure.iloc[:,1:]
    y2_bloodPressure = test_data2_bloodPressure.iloc[:,0]
    y2_hat_bloodPressure = m_lgb_bloodPressure.predict(x1_bloodPressure.iloc[:, :-1])
    x2_bloodPressure["pred"] = y2_hat_bloodPressure
    # o2Saturation
    test_data2_o2Saturation = build_testdata_o2Saturation(test_data1_o2Saturation, y1_hat_o2Saturation, i+2)
    x2_o2Saturation = test_data2_o2Saturation.iloc[:,1:]
    y2_o2Saturation = test_data2_o2Saturation.iloc[:,0]
    y2_hat_o2Saturation = model_o2Saturation.predict(x1_o2Saturation.iloc[:, :-1])
    x2_o2Saturation["pred"] = y2_hat_o2Saturation
    # STM
    test_data2 = build_testdata(test_data1, y1_hat, i+2)
    test_data2["temperature5"] = x1_temperature["pred"]
    test_data2["temperature_3days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"]) / 3
    test_data2["temperature_6days"] = (x1_temperature["pred"] + x1_temperature["temperature5"] + x1_temperature["temperature4"] + x1_temperature["temperature3"] + x1_temperature["temperature2"] + x1_temperature["temperature1"]) / 6
    test_data2["bloodPressure5"] = x1_bloodPressure["pred"]
    test_data2["bloodPressure_3days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"]) / 3
    test_data2["bloodPressure_6days"] = (x1_bloodPressure["pred"] + x1_bloodPressure["bloodPressure5"] + x1_bloodPressure["bloodPressure4"] + x1_bloodPressure["bloodPressure3"] + x1_bloodPressure["bloodPressure2"] + x1_bloodPressure["bloodPressure1"]) / 6
    test_data2["o2Saturation5"] = x1_o2Saturation["pred"]
    test_data2["o2Saturation_3days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"]) / 3
    test_data2["o2Saturation_6days"] = (x1_o2Saturation["pred"] + x1_o2Saturation["o2Saturation5"] + x1_o2Saturation["o2Saturation4"] + x1_o2Saturation["o2Saturation3"] + x1_o2Saturation["o2Saturation2"] + x1_o2Saturation["o2Saturation1"]) / 6
    x2 = test_data2.iloc[:,1:][cols]
    y2 = test_data2.iloc[:,0]
    y2_hat = model.predict(x2)
    x2["pred"] = y2_hat
    
    # forecast day3
    # STM
    test_data3 = build_testdata(test_data2, y2_hat, i+2)
    test_data3["temperature5"] = x2_temperature["pred"]
    test_data3["temperature_3days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"]) / 3
    test_data3["temperature_6days"] = (x2_temperature["pred"] + x2_temperature["temperature5"] + x2_temperature["temperature4"] + x2_temperature["temperature3"] + x2_temperature["temperature2"] + x2_temperature["temperature1"]) / 6
    test_data3["bloodPressure5"] = x2_bloodPressure["pred"]
    test_data3["bloodPressure_3days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"]) / 3
    test_data3["bloodPressure_6days"] = (x2_bloodPressure["pred"] + x2_bloodPressure["bloodPressure5"] + x2_bloodPressure["bloodPressure4"] + x2_bloodPressure["bloodPressure3"] + x2_bloodPressure["bloodPressure2"] + x2_bloodPressure["bloodPressure1"]) / 6
    test_data3["o2Saturation5"] = x2_o2Saturation["pred"]
    test_data3["o2Saturation_3days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"]) / 3
    test_data3["o2Saturation_6days"] = (x2_o2Saturation["pred"] + x2_o2Saturation["o2Saturation5"] + x2_o2Saturation["o2Saturation4"] + x2_o2Saturation["o2Saturation3"] + x2_o2Saturation["o2Saturation2"] + x2_o2Saturation["o2Saturation1"]) / 6
    x3 = test_data3.iloc[:,1:][cols]
    y3 = test_data3.iloc[:,0]
    y3_hat = model.predict(x3)
    x3["pred"] = y3_hat
    
    _y1 += (list(y1))
    _y2 += (list(y2))
    _y3 += (list(y3))
    
    _y1_ += (list(y1_hat))
    _y2_ += (list(y2_hat))
    _y3_ += (list(y3_hat))
    
    y_list_xgboost.extend(list(y1))
    y_list_xgboost.extend(list(y2))
    y_list_xgboost.extend(list(y3))
    y_hat_list_xgboost.extend(list(y1_hat))
    y_hat_list_xgboost.extend(list(y2_hat))
    y_hat_list_xgboost.extend(list(y3_hat))
    y_list_bi_xgboost.extend(np.array(y0) - np.array(y1) > 0)
    y_list_bi_xgboost.extend(np.array(y1) - np.array(y2) > 0)
    y_list_bi_xgboost.extend(np.array(y2) - np.array(y3) > 0)
    y_hat_list_bi_xgboost.extend(np.array(y0) - np.array(y1_hat) > 0)
    y_hat_list_bi_xgboost.extend(np.array(y1_hat) - np.array(y2_hat) > 0)
    y_hat_list_bi_xgboost.extend(np.array(y2_hat) - np.array(y3_hat) > 0)
    y_list_bi_xgboost_3day.extend(np.array(y0) - (np.array(y1) + np.array(y2) + np.array(y3)) / 3  > 0)
    y_hat_list_bi_xgboost_3day.extend(np.array(y0) - (np.array(y1_hat) + np.array(y2_hat) + np.array(y1_hat)) / 3  > 0)
    mse_list_xgboost = mse_list_xgboost.append((y1 - x1["pred"]) ** 2).append((y2 - x2["pred"]) ** 2).append((y3 - x3["pred"]) ** 2)
    mse_xgboost.append((mean_squared_error(y1,y1_hat) * len(test_data1) + mean_squared_error(y2,y2_hat) * len(test_data2) + mean_squared_error(y3,y3_hat) * len(test_data3)) / (len(test_data1) + len(test_data2) + len(test_data3)))
    


In [None]:
print("mean : %s" % (sum(sum(i) for i in mse_avg_STM)/sum(len(i) for i in mse_avg_STM)))
print("naive : %s" % (sum(sum(i) for i in mse_naive_STM)/sum(len(i) for i in mse_naive_STM)))
print("ets : %s" % (sum(sum(i) for i in mse_ets_STM)/sum(len(i) for i in mse_ets_STM)))
print("moving average : %s" % (sum(sum(i) for i in mse_movingavg_STM)/sum(len(i) for i in mse_movingavg_STM)))
print("arima(1,1,1) : %s" % (sum(sum(i) for i in mse_arima_STM)/sum(len(i) for i in mse_arima_STM)))
print("arima(0,1,1) : %s" % (sum(sum(i) for i in mse_arima2_STM)/sum(len(i) for i in mse_arima2_STM)))
print("arima(1,1,0) : %s" % (sum(sum(i) for i in mse_arima3_STM)/sum(len(i) for i in mse_arima3_STM)))
print("lightgbm : %s" % (mean_squared_error(y_list,y_hat_list)))
print("xgboost : %s" % (mean_squared_error(y_list_xgboost,y_hat_list_xgboost)))


In [None]:
print(sum(np.array(y_hat_list_bi_xgboost) == y_list_bi_xgboost) / len(y_list_bi_xgboost))
print(sum(np.array(y_hat_list_bi_xgboost_3day) == y_list_bi_xgboost_3day) / len(y_list_bi_xgboost_3day))
print(sum(np.array([random.choice([True, False]) for i in y_list_bi_xgboost]) == y_list_bi_xgboost) / len(y_list_bi_xgboost))