In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
from functools import reduce
import seaborn as sns; sns.set(rc={'figure.figsize':(15,15)})
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:postgres@127.0.0.1:5555/mimic')
scaler = MinMaxScaler()



In [2]:
sql = """
SELECT 
    EXTRACT(YEAR FROM gen_dischtime) AS year,
    EXTRACT(MONTH FROM gen_dischtime) AS month,
    EXTRACT(DOW FROM gen_dischtime) AS dow,
    EXTRACT(DOY FROM gen_dischtime) AS doy,
    COUNT(DISTINCT subject_id) as y
FROM mimiciii.patient_flow
GROUP BY 
    EXTRACT(YEAR FROM gen_dischtime),
    EXTRACT(MONTH FROM gen_dischtime),
    EXTRACT(DOW FROM gen_dischtime),
    EXTRACT(DOY FROM gen_dischtime)
ORDER BY year, doy;
"""

In [3]:
df = pd.read_sql(sql, engine)
df['wbf'] = df.y.shift(7)
df['ybf'] = df.y.shift(365)
df = df.dropna().reset_index(drop=True)

scaled = scaler.fit_transform(df[['wbf', 'ybf', 'y']].values)

df['wbf'] = scaled[:, 0]
df['ybf'] = scaled[:, 1]
df['y'] = scaled[:, 2]
df.year = df.year.astype(int).astype(str)
df.month = df.month.astype(int).astype(str)
df.dow = df.dow.astype(int)
df.doy = df.doy.astype(int)


In [4]:
df.head()

Unnamed: 0,year,month,dow,doy,y,wbf,ybf
0,2002,1,2,1,0.32,0.36,0.28
1,2002,1,3,2,0.4,0.16,0.48
2,2002,1,4,3,0.56,0.36,0.4
3,2002,1,5,4,0.28,0.48,0.84
4,2002,1,6,5,0.32,0.44,0.36


## 퇴원환자수 예측에 필요한 변수는 무엇일까?

In [5]:
prefix = ['year', 'month']
data_frames = [
    pd.get_dummies(df[prefix], prefix=prefix),
    np.sin(df.doy * np.pi / 180),
    np.cos(df.doy * np.pi / 180),
    np.sin(df.dow * np.pi / 180),
    np.cos(df.dow * np.pi / 180),
    df[['wbf', 'ybf', 'y']]
]
df_merged = reduce(lambda  left,right: pd.merge(left, right, how='inner', left_index=True, right_index=True), 
                   data_frames)



In [6]:
df_merged.shape

(4018, 30)

In [7]:
df_merged.head()

Unnamed: 0,year_2002,year_2003,year_2004,year_2005,year_2006,year_2007,year_2008,year_2009,year_2010,year_2011,...,month_7,month_8,month_9,doy_x,doy_y,dow_x,dow_y,wbf,ybf,y
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0.017452,0.999848,0.034899,0.999391,0.36,0.28,0.32
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0.034899,0.999391,0.052336,0.99863,0.16,0.48,0.4
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0.052336,0.99863,0.069756,0.997564,0.36,0.4,0.56
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0.069756,0.997564,0.087156,0.996195,0.48,0.84,0.28
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0.087156,0.996195,0.104528,0.994522,0.44,0.36,0.32


In [42]:
Y = df.y.values
Y.shape

(4018,)

## 한달 전에 예측한다면?

In [10]:
input_data = df_merged.values

In [23]:
timestep = 30
data_len = df_merged.shape[0]
n_features = df_merged.shape[1]
data_len, timestep, n_features

(4018, 30, 30)

In [35]:
train_size = int(.7 * data_len)
X = []
y = []
for i in range(data_len - timestep - 1):
    t=[]
    for j in range(0,timestep):
        t.append(input_data[[(i + j)], :])
    X.append(t)
    y.append(Y[i + timestep])

In [40]:
X, y= np.array(X), np.array(y)
X_train = X[:train_size] # :train_size + timestep
X_test = X[train_size:]
y_train = y[:train_size]
y_test = y[train_size:]

X_train = X_train.reshape(X_train.shape[0], timestep, n_features)
X_test = X_test.reshape(X_test.shape[0], timestep, n_features)

In [41]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((2812, 30, 30), (1175, 30, 30), (2812,), (1175,))

## 퇴원환자수 예측은 얼마나 정확할까?

In [None]:
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, GRU
import tensorflow as tf

In [109]:
# split into train and test sets
values = df_merged.values
training_sample =int( values.shape[0] * 0.7)
train = values[:training_sample, :]
test = values[training_sample:, :]
# split into input and outputs
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]

In [111]:
train_X.shape

(2812, 28)

In [110]:
model_lstm = Sequential()
model_lstm.add(LSTM(75, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2])))
model_lstm.add(LSTM(units=30, return_sequences=True))
model_lstm.add(LSTM(units=30))
model_lstm.add(Dense(units=1))

model_lstm.compile(loss='mae', optimizer='adam')

IndexError: tuple index out of range

## 퇴원환자수 예측에 충분한 기간은 어느정도 일까?