In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchsummaryX import summary
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
import warnings
import os
warnings.filterwarnings(action='ignore')

In [11]:
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
scaler = MinMaxScaler()
path = '../data/'

In [None]:
def normal(data):
    data = scaler.fit_transform(data)
    return data

def renormal(data, scal):
    min_value = scal[0]
    max_value = scal[1]
    data = data * (max_value - min_value) + min_value
    return data

In [None]:
def train(model, days, train_loader, loss_fn, optimizer, epoch):
    USE_CUDA = torch.cuda.is_available()
    DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
    model.train()
    train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        output = model(data, days)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % 30 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
        train_loss += loss.item()/len(train_loader)
    return train_loss

In [None]:
def evaluate(model, days, test_loader, loss_fn):
    USE_CUDA = torch.cuda.is_available()
    DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data, days)
            # 배치 오차를 합산
            test_loss += loss_fn(output, target).item()
            # 예측값
            pred = output
    test_loss /= len(test_loader.dataset)
    mse = mean_squared_error(target.cpu().detach().numpy(), pred.cpu().detach().numpy())
    rmse = np.sqrt(mse)
    mean = torch.mean(target)
    nse = 1. - torch.sum(torch.square(target-pred))/torch.sum(torch.square(target-mean))
    return test_loss, mse, rmse, nse

In [None]:
def save_model(model, path, name):
    torch.save(model, './model/'+path+'/'+name+'.pt')
    print(name+'.pt model save!!')

In [12]:
def load_model(path, name):
    device = torch.device('cpu')
    model = torch.load(os.path.join(path, name+".pt"), map_location=device)
    return model

In [2]:
def result_view(model_result):
    for i in range(len(model_result)):
        fig = plt.figure(figsize=(10, 4))
        plt.title(model_result[i]['feature'][0])
        plt.plot(model_result[i]['epochs'], model_result[i]['train_loss'], label='train_loss')
        plt.plot(model_result[i]['epochs'], model_result[i]['test_loss'], label='test_loss')
        plt.plot(model_result[i]['epochs'], model_result[i]['test_rmse'], label='test_rmse')
        plt.plot(model_result[i]['epochs'], model_result[i]['test_nse'], label='test_nse')
        plt.legend()
        plt.show()

In [3]:
def view_model(model, size):
    aa = torch.tensor(torch.zeros(1, size[0], size[1]), device="cuda", dtype=torch.float32)
    summary(model, aa)

In [4]:
def view_model2(model, size, days):
    aa = torch.tensor(torch.zeros(1, size[0], size[1]), device="cuda", dtype=torch.float32)
    summary(model, aa, days)

In [5]:
def result_ck(result):
    tmp = result[0].tail(1)
    for i in range(1, len(result)):
        tmp = pd.concat([tmp, result[i].tail(1)], axis=0)
    return tmp

In [6]:
def get_m_list(tmp):
    m_list = []
    for i in range(len(tmp)):
        if tmp.iloc[i]['drop_rate'] == 0:
            if tmp.iloc[i]['epochs'] == 15:
                if tmp.iloc[i]['batch'] == 32:
                    m_list.append(0)
                elif tmp.iloc[i]['batch'] == 64:
                    m_list.append(1)
            elif tmp.iloc[i]['epochs'] == 30:
                if tmp.iloc[i]['batch'] == 32:
                    m_list.append(2)
                elif tmp.iloc[i]['batch'] == 64:
                    m_list.append(3)
        elif tmp.iloc[i]['drop_rate'] == 0.2:
            if tmp.iloc[i]['epochs'] == 15:
                if tmp.iloc[i]['batch'] == 32:
                    m_list.append(4)
                elif tmp.iloc[i]['batch'] == 64:
                    m_list.append(5)
            elif tmp.iloc[i]['epochs'] == 30:
                if tmp.iloc[i]['batch'] == 32:
                    m_list.append(6)
                elif tmp.iloc[i]['batch'] == 64:
                    m_list.append(7)
        elif tmp.iloc[i]['drop_rate'] == 0.5:
            if tmp.iloc[i]['epochs'] == 15:
                if tmp.iloc[i]['batch'] == 32:
                    m_list.append(8)
                elif tmp.iloc[i]['batch'] == 64:
                    m_list.append(9)
            elif tmp.iloc[i]['epochs'] == 30:
                if tmp.iloc[i]['batch'] == 32:
                    m_list.append(10)
                elif tmp.iloc[i]['batch'] == 64:
                    m_list.append(11)
    return m_list

In [7]:
# for cnn
def get_m_list2(tmp):
    m_list = []
    for i in range(len(tmp)):
        if tmp.iloc[i]['drop_rate'] == 0:
            if tmp.iloc[i]['batch'] == 32:
                m_list.append(0)
            elif tmp.iloc[i]['batch'] == 64:
                m_list.append(1)
        elif tmp.iloc[i]['drop_rate'] == 0.2:
            if tmp.iloc[i]['batch'] == 32:
                m_list.append(2)
            elif tmp.iloc[i]['batch'] == 64:
                    m_list.append(3)
        elif tmp.iloc[i]['drop_rate'] == 0.5:
            if tmp.iloc[i]['batch'] == 32:
                m_list.append(4)
            elif tmp.iloc[i]['batch'] == 64:
                m_list.append(5)
    return m_list

In [8]:
def max_option(t, w_list):
    tmp = t[t['feature']==w_list[0]].sort_values('test_nse').tail(1)
    for i in range(1, len(w_list)):
        tmp = pd.concat([tmp, t[t['feature']==w_list[i]].sort_values('test_nse').tail(1)])
    return tmp

In [9]:
# q_test로 이상치 확인
def dixon_q_test(ip):
    data = ip.copy()
    col_list = data.columns[:-3]
    n = len(data)
    q = 0.321 # 환경부 기준 sample data 24개
    for col in col_list:
        cnt = 0
        idx_list = []
        for s in range(0, len(data), 24):
            temp = data[col].iloc[s:s+24]
            null_count = temp.isnull().sum()
            sorted_col = temp.sort_values()
            range_col = max(sorted_col) - min(sorted_col)
            if (null_count < 10) and (range_col != np.nan):
                for k in range(24):
                    if k == 23:
                        test_stat = abs(sorted_col.iloc[k] - sorted_col.iloc[k-1]) / abs(range_col)
                    elif k < 23:   
                        test_stat = abs(sorted_col.iloc[k] - sorted_col.iloc[k+1]) / abs(range_col)
                    if test_stat > q:
#                         print(sorted_col.index[k], col, test_stat, '값 이상치: 가설 기각(가설:이상치가 아닐 것이다)')
                        cnt += 1
                        idx_list.append(sorted_col.index[k])
        data.loc[idx_list, col] = np.nan
#         print(col, "총{}개 가설 기각".format(cnt))
    return data