In [15]:
import os
import pandas as pd
import numpy as np
import random
import glob

import torch
import torch.nn as nn
import torch.jit as jit
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm as tq
import warnings
warnings.filterwarnings(action='ignore')

'''functions'''
def sigmoid(x):
    return 1.0/(1+torch.exp(-x))

def clip_grad(grads, max_norm):
    total_norm = 0
    for grad in grads:
        total_norm += torch.sum(grad ** 2)
    total_norm = torch.sqrt(total_norm)

    rate = max_norm / (total_norm + 1e-6)
    if rate < 1:
        for grad in grads:
            grad *= rate
            
def load_batch(X, Y, batch_size, shuffle=True):
    """
    Generates batches with the remainder dropped.

    Do NOT modify this function
    """
    if shuffle:
        permutation = np.random.permutation(X.shape[0])
        X = X[permutation, :]
        Y = Y[permutation, :]
    num_steps = int(X.shape[0])//batch_size
    step = 0
    while step<num_steps:
        X_batch = X[batch_size*step:batch_size*(step+1)]
        Y_batch = Y[batch_size*step:batch_size*(step+1)]
        step+=1
        yield X_batch, Y_batch

# def to_gpu(x):
#     import cupy
#     if type(x) == cupy.ndarray:
#         return x
#     return cupy.asarray(x)

'''loss function'''
class MSELoss:
    def __init__(self):
        self.cache  = None
        self.loss   = None

    def forward(self, yhat, ygt):
        self.cache = yhat-ygt
        self.loss  = torch.sum(torch.sqrt(self.cache**2))
        return self.loss

    def backward(self, dout=1):
        dyhat = dout * 2 * self.cache
        return dyhat # (N, H)    
    
    
# class RMSELoss(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.mse = nn.MSELoss()
        
#     def forward(self,yhat,y):
#         return torch.sqrt(self.mse(yhat,y))
    
    
'''optimizer'''
class custom_SGD:
    def __init__(self, lr=0.01, clip=True, max_norm=10):
        self.lr = lr
        self.clip = clip
        self.max_norm = max_norm
        
    def update(self, params, grads):
        if self.clip:
            clip_grad(grads, self.max_norm)

        for i in range(len(params)):
            params[i] -= self.lr * grads[i]


In [16]:
class Linear:
    def __init__(self, W, b, device):
        '''
        shapes
        x : (N, I)  /  W : (I, O)  /  b : (O, )
        '''
        self.device = device
        self.params = [W.to(device), b.to(device)]
        self.grads  = [
            torch.zeros_like(W).to(device),
            torch.zeros_like(b).to(device)
        ]
        self.x   = None
        self.out = None
    
    
    def forward(self, x):
        x = x.to(device)
        W, b = self.params[0].to(device), self.params[1].to(device)
        self.x = x
        self.out = (x @ W + b).to(device)
        return self.out

    
    def backward(self, dout):
        '''
        shapes - dout : (N, O)
        dx : (N, I)  /  dW : (I, O)  /  db : (O, )
        '''
        dout = dout.to(device)
        W, b = self.params[0].to(device), self.params[1].to(device)
        dW = (self.x.T @ dout).to(device)
        db = torch.sum(dout, axis=0).to(device)
        dx = (dout @ W.T).to(device)
        self.grads[0][...] = dW
        self.grads[1][...] = db
        return dx


class LSTMCell:
    def __init__(self, Wx, Wh, b, device):
        self.device = device
        self.params = [Wx.to(self.device), Wh.to(self.device), b.to(self.device)]
        self.grads  = [
            torch.zeros_like(Wx).to(device),
            torch.zeros_like(Wh).to(device),
            torch.zeros_like(b).to(device)
        ]
        
        self.cache = None
    
    
    def forward(self, x, h_prev, c_prev):
        global f,g,i,A, x_copy ,Wx
        Wx, Wh, b = self.params
        if x.ndim==1:
            x = torch.reshape(x, newshape=(1, x.size))
        x = x.to(device)
        x_copy = x
        Wx_copy = Wx
        h_prev = h_prev.to(device)
        c_prev = c_prev.to(device)
        
        N, H = h_prev.shape
        A = ((x@Wx) + (h_prev@Wh) + b).to(device)

        f = A[:, :H]
        g = A[:, H:2*H]
        i = A[:, 2*H:3*H]
        o = A[:, 3*H:]

        f = sigmoid(f)
        g = torch.tanh(g)
        i = sigmoid(i)
        o = sigmoid(o)

        c_next = f * c_prev + g * i
        h_next = o * torch.tanh(c_next)

        self.cache = (x, h_prev, c_prev, i, f, g, o, c_next)
        return h_next, c_next

    
    def backward(self, dh_next, dc_next):
        Wx, Wh, b = self.params
        x, h_prev, c_prev, i,f,g,o,c_next = self.cache # computed in previous time step

        tanh_c_next  = torch.tanh(c_next)
        dtanh_c_next = dh_next * o
        do           = dh_next * tanh_c_next * o * (1-o)
        
        dsum = dc_next + dtanh_c_next*(1-tanh_c_next**2)

        dc_prev = dsum*f
        df = dsum * c_prev * f * (1-f)
        dg = dsum * i * (1-g**2)
        di = dsum * g * i * (1-i)

        dA  = torch.hstack((df,dg,di,do)).to(device)
        dWh = h_prev.T @ dA
        dWx = x.T @ dA
        db  = torch.sum(dA, axis=0)

        self.grads[0][...] = dWx
        self.grads[1][...] = dWh
        self.grads[2][...] = db

        dx      = dA @ Wx.T # (N, 4H) @ (4H, I) -> (N, I)
        dh_prev = dA @ Wh.T

        return dx, dh_prev, dc_prev

        
class LSTM:
    def __init__(self, Wx, Wh, b, device, stateful=False):
        self.params = [Wx, Wh, b]
        self.grads = [
            torch.zeros_like(Wx),
            torch.zeros_like(Wh),
            torch.zeros_like(b)
        ]
        self.layers = None

        self.h, self.c = None, None
        self.hs = None
        self.dh = None
        self.T  = None
        self.stateful = stateful


    def forward(self, xs):
        Wx, Wh, b = self.params
        N, T, D = xs.shape
        self.T  = T
        H = Wh.shape[0]
        
        self.layers = []
        hs = torch.empty(size=(N,T,H))
        
        if not self.stateful or self.h is None:
            h = torch.zeros(size=(N,H))
        if not self.stateful or self.c is None:
            c = torch.zeros(size=(N,H))
        
        for t in range(T):
            layer = LSTMCell(*self.params, device)
            h, c = layer.forward(xs[:, t, :], h, c)
            hs[:, t, :] = h

            self.layers.append(layer)

        self.hs = hs
        return self.hs

    
    def backward(self, dh_final): # loss from another layer
        Wx, Wh, b = self.params
        N, H      = dh_final.shape
        I         = Wx.shape[0]
        T         = self.T

        dxs = torch.empty(size=(N,T,I))
        dh_cur = dh_final
        dh_prev, dc = 0,0

        grads = [0,0,0]
        for t in reversed(range(T)):
            layer = self.layers[t]
            dx, dh_prev, dc = layer.backward(dh_cur, dc)
            dxs[:, t, :] = dx
            dh_cur = dh_prev + dh_cur

            for idx, grad in enumerate(layer.grads):
                grads[idx] += grad
        
        for idx, grad in enumerate(grads):
            self.grads[idx][...] = grad

        return dxs

### data preprocessing

- timestep이 1440이나 되는데, 굳이 1분단위로 끊어서 볼 이유가 없다!
  - 1시간 단위로 time step 끊어볼 것
  - seasonality 처리
- variable 정리 (feature engineering)
  - 겹치는 variables 하나로 합치기 (ex. 
  - 불필요한 variables 제거 (외부온도, 습도 등)

In [3]:
def preprocessing(X_input, Y_input, X_container, Y_container):    
    y_maxlen = 0
    for x,y in tq(zip(X_input, Y_input)):
        curx = pd.read_csv(x).drop(columns = ["시간"]).fillna(0).values
        x_len = len(curx)//1440
        x_temp = []
        for idx in range(x_len):
            x_temp.append(curx[1440*idx : 1440*(idx+1)])
        x_temp = torch.Tensor(x_temp)
        X_container.append(x_temp)
        y_temp = torch.Tensor(pd.read_csv(y)["rate"].fillna(0).values)
        y_temp = y_temp.reshape(y_temp.size()[0], 1)
        Y_container.append(y_temp)
    return;

all_input_list  = sorted(glob.glob("train_input/*.csv"))
all_target_list = sorted(glob.glob("train_target/*.csv"))
# for training
train_input_list = all_input_list[:50]
train_target_list = all_target_list[:50]
# for validation
test_input_list = all_input_list[50:]
test_target_list = all_target_list[50:]

X_train = []; Y_train = []
X_val  = []; Y_val  = []

# call function
preprocessing(train_input_list, train_target_list, X_train, Y_train)
preprocessing(test_input_list, test_target_list, X_val, Y_val)

# stack X, Y data
X_train = torch.vstack(X_train)
Y_train = torch.vstack(Y_train)
X_val  = torch.vstack(X_val)
Y_val  = np.vstack(Y_val)

winter = pd.read_csv(train_input_list[28])
summer = pd.read_csv(train_input_list[19])
print(f"X_train_shape : {X_train.shape} | Y_train_shape : {Y_train.shape}")
winter.columns

0it [00:00, ?it/s]

0it [00:00, ?it/s]

X_train_shape : torch.Size([1607, 1440, 37]) | Y_train_shape : torch.Size([1607, 1])


Index(['시간', '내부온도관측치', '내부습도관측치', 'CO2관측치', 'EC관측치', '외부온도관측치', '외부습도관측치',
       '펌프상태', '펌프작동남은시간', '최근분무량', '일간누적분무량', '냉방상태', '냉방작동남은시간', '난방상태',
       '난방작동남은시간', '내부유동팬상태', '내부유동팬작동남은시간', '외부환기팬상태', '외부환기팬작동남은시간',
       '화이트 LED상태', '화이트 LED작동남은시간', '화이트 LED동작강도', '레드 LED상태', '레드 LED작동남은시간',
       '레드 LED동작강도', '블루 LED상태', '블루 LED작동남은시간', '블루 LED동작강도', '카메라상태', '냉방온도',
       '난방온도', '기준온도', '난방부하', '냉방부하', '총추정광량', '백색광추정광량', '적색광추정광량',
       '청색광추정광량'],
      dtype='object')

In [4]:
target_columns = [
    "시간", "내부온도관측치", "내부습도관측치", "CO2"
    
]

In [5]:
winter.head(5)

Unnamed: 0,시간,내부온도관측치,내부습도관측치,CO2관측치,EC관측치,외부온도관측치,외부습도관측치,펌프상태,펌프작동남은시간,최근분무량,...,카메라상태,냉방온도,난방온도,기준온도,난방부하,냉방부하,총추정광량,백색광추정광량,적색광추정광량,청색광추정광량
0,2022-01-27 00:00:00,24.1,65.300003,811.0,0.698426,24.66,34.720001,0,0,0.0,...,0,16.0,14.0,15.0,0.0,50.500002,0.0,0.0,0.0,0.0
1,2022-01-27 00:01:00,24.1,65.400002,808.0,0.699099,24.94,34.480002,0,0,0.0,...,0,16.0,14.0,15.0,0.0,50.500002,0.0,0.0,0.0,0.0
2,2022-01-27 00:02:00,24.1,65.400002,810.0,0.698426,25.28,33.82,0,0,0.0,...,0,16.0,14.0,15.0,0.0,50.500002,0.0,0.0,0.0,0.0
3,2022-01-27 00:03:00,24.1,65.400002,815.0,0.698426,25.58,33.340001,0,0,0.0,...,0,16.0,14.0,15.0,0.0,50.500002,0.0,0.0,0.0,0.0
4,2022-01-27 00:04:00,24.1,65.400002,807.0,0.699099,25.859999,33.040001,0,0,0.0,...,0,16.0,14.0,15.0,0.0,50.500002,0.0,0.0,0.0,0.0


In [23]:
class mymodel():
    def __init__(self, input_dim, hidden_dim, output_dim, optimizer, device):
        self.I = input_dim
        self.H = hidden_dim
        self.O = output_dim
        self.device = device
        
        # initialization
        lstm_Wx = torch.Tensor(self.I, 4*self.H).uniform_(-1/np.sqrt(self.I), 1/np.sqrt(self.I))
        lstm_Wh = torch.Tensor(self.H, 4*self.H).uniform_(-1/np.sqrt(self.H), 1/np.sqrt(self.H))
        lstm_b = torch.zeros(size=(4 * self.H,))
        
        linear_W = torch.Tensor(self.H, self.O).uniform_(-1/np.sqrt(self.H), 1/np.sqrt(self.H))
        linear_b = torch.zeros(size=(self.O,))
        
        # layers
        self.layers = [
            LSTM(lstm_Wx, lstm_Wh, lstm_b, device=self.device, stateful=True),
            Linear(linear_W, linear_b, self.device)
        ]
        self.loss_layer = MSELoss()
        self.lstm   = self.layers[0]
        self.linear = self.layers[1]
        self.optimizer = optimizer

        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
            
        self.yhat = None
        self.xs = None

    def predict(self, xs):
        self.xs = xs
        lstm_output = self.lstm.forward(self.xs)
        lstm_yhat   = lstm_output[:, -1, :] # last hidden state        
        yhat = self.linear.forward(lstm_yhat)
        return yhat

    def forward(self, xs, ygt):
        xs = xs.to(self.device)
        ygt = ygt.to(self.device)
        self.xs = xs.to(self.device)
        self.yhat = self.predict(self.xs).to(self.device)
        loss = self.loss_layer.forward(self.yhat, ygt).to(self.device)
        return loss

    def backward(self, dout=1):
        dyhat    = self.loss_layer.backward().to(self.device)
        dh_final = self.linear.backward(dyhat).to(self.device)
        self.lstm.backward(dh_final)
        return
    
    def update(self):
        for layer in reversed(self.layers):
            self.optimizer.update(layer.params, layer.grads)
    
    def train(self, X_train, Y_train, X_val, Y_val,lr, n_epochs, batch_size):        
        for epoch in tq(range(n_epochs)):
            train_loss = 0.0
            len_batch  = 0
            for X_batch, Y_batch in load_batch(X_train, Y_train, batch_size):
                len_batch +=1
                train_loss += self.__step(X_batch, Y_batch)
            train_loss /= len_batch
            print(f"EPOCH {epoch+1} train loss : {train_loss:.4f}")
                        
                
    def __step(self, X_batch, Y_batch):
        # forward step
        loss = self.forward(X_batch, Y_batch)
        # backward step
        self.backward()
        # update
        self.update()
        return loss
            
    
        

In [24]:
'''parameters'''
learning_rate = 0.007
n_epochs = 30
batch_size = 64
max_norm = 10.0
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"current device : {device}")

print(f"X_train shape : {X_train.shape} | Y_train shape :{Y_train.shape}")

N = batch_size
T = X_train.shape[1]
I = X_train.shape[2]
H = 256
O = Y_train.shape[1]


'''model setting'''
optim = custom_SGD(lr=learning_rate, clip=True, max_norm=max_norm)

model = mymodel(
    input_dim=I,
    hidden_dim=H,
    output_dim=O,
    optimizer=optim,
    device=device
)


current device : cuda
X_train shape : torch.Size([1607, 1440, 37]) | Y_train shape :torch.Size([1607, 1])


### train model

In [25]:
model.train(
    X_train=X_train[:400],
    Y_train=Y_train[:400],
    X_val=X_val,
    Y_val=Y_val,
    lr=learning_rate,
    n_epochs=n_epochs,
    batch_size=N
)

  0%|          | 0/30 [00:00<?, ?it/s]

EPOCH 1 train loss : 24.3252
EPOCH 2 train loss : 18.2241
EPOCH 3 train loss : 18.5976
EPOCH 4 train loss : 17.6601
EPOCH 5 train loss : 19.3855
EPOCH 6 train loss : 16.9328
EPOCH 7 train loss : 17.9755
EPOCH 8 train loss : 17.9139
EPOCH 9 train loss : 17.9045
EPOCH 10 train loss : 17.7653
EPOCH 11 train loss : 18.2333
EPOCH 12 train loss : 18.8732
EPOCH 13 train loss : 18.1337
EPOCH 14 train loss : 18.3313
EPOCH 15 train loss : 18.2875
EPOCH 16 train loss : 18.5967
EPOCH 17 train loss : 18.6147
EPOCH 18 train loss : 17.8838
EPOCH 19 train loss : 18.9357
EPOCH 20 train loss : 18.5086
EPOCH 21 train loss : 17.8950
EPOCH 22 train loss : 18.2739
EPOCH 23 train loss : 18.9428
EPOCH 24 train loss : 19.5383
EPOCH 25 train loss : 18.5862
EPOCH 26 train loss : 18.9273
EPOCH 27 train loss : 18.6373
EPOCH 28 train loss : 18.3074
EPOCH 29 train loss : 18.9113
EPOCH 30 train loss : 17.8283
