In [None]:
from darts import TimeSeries
from datetime import datetime

from dateutil.parser import parse
from matplotlib.pylab import rcParams
from tqdm import tqdm_notebook as tqdm
from statsmodels.tsa.stattools import adfuller
from torch.utils.tensorboard import SummaryWriter
from darts.utils.statistics import check_seasonality, plot_acf

import os
import torch
import shutil
import warnings
import itertools
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn as nn
import torch.optim as optim
import statsmodels.api as sm
from darts.metrics import mape
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf


%matplotlib inline
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
rcParams['figure.figsize'] = 15, 5

## 1. 데이터 로드

### 1-1. 데이터 변경 사항

- 날짜 인덱스를 6초 간격으로 변경 (1분 당 10개의 데이터)
- TAG와 상관관계가 낮은 MELT_WEIGHT의 경우 제외
- 무의미한 NUM 제외
- 불량 데이터(TAG가 'NG'인 데이터)는 제외함

### 1-2. 데이터 정보
총 658133 개
결측값은 불량 데이터

In [None]:
df = pd.read_csv('./public/data/raw_data.csv')

df['TAG'] = df['TAG'] == 'NG'

df.index = pd.date_range(start='3/4/2020', end='5/1/2020', freq='6S')[:-1]

df.drop(columns='STD_DT', inplace=True)
df.drop(columns='NUM', inplace=True)
df.drop(columns='MELT_WEIGHT', inplace=True)

df = df.astype(np.float32)
df['TAG'] = df['TAG'].astype(bool)

df.info()
df.describe()

In [None]:
index = (df.index.month == 3) & (df['TAG'] == False)
train_df = df[index]

val_index = (df.index.month == 4) & (df.index.day <= 10)
val_df = df[val_index]

In [None]:
train_x_df = train_df.copy(False)
train_y_df = pd.DataFrame(train_x_df.pop('TAG'), columns=['TAG'])

val_x_df = val_df.copy(False)
val_y_df = pd.DataFrame(val_x_df.pop('TAG'), columns=['TAG'])

train_x_df

### 1-3. 시계열 데이터로 변환

- 결측값에 대한 해결이 필요함

In [None]:
train_x_series = TimeSeries.from_dataframe(train_x_df, freq='6S', fill_missing_dates=True)
train_y_series = TimeSeries.from_dataframe(train_y_df, freq='6S', fill_missing_dates=True)
val_x_series = TimeSeries.from_dataframe(val_x_df, freq='6S', fill_missing_dates=True)
val_y_series = TimeSeries.from_dataframe(val_y_df, freq='6S', fill_missing_dates=True)

train_x_series

### 1-4. Scaling

In [None]:
from darts.dataprocessing.transformers import Scaler
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, robust_scale

scaler = Scaler(scaler=MinMaxScaler())

train_scaled = scaler.fit_transform(train_x_series)
val_scaled = scaler.transform(val_x_series)

train_scaled = train_scaled.astype(np.float32)
val_scaled = val_scaled.astype(np.float32)

train_scaled.pd_dataframe().info()
val_scaled.pd_dataframe().info()

## 2. 모델 최초 학습

### 2-1. 결측치 제외

In [None]:
from darts.utils.missing_values import extract_subseries

window_size = 20

train_scaled_list = extract_subseries(train_scaled)

train_scaled_list = [x for x in train_scaled_list if len(x) > window_size]

train_scaled_list

In [None]:
from darts.models import TransformerModel, RNNModel, TCNModel, NBEATSModel, NHiTSModel
from darts.utils.likelihood_models import LaplaceLikelihood
model_name = 'NHiTSModel'
epoch = 10

model = NHiTSModel(
    input_chunk_length=window_size,
    output_chunk_length=1,
    num_stacks=3,
    num_blocks=1,
    num_layers=2,
    layer_widths=512,
    pooling_kernel_sizes=None, 
    n_freq_downsample=None, 
    dropout=0.1, 
    activation='ReLU',
    n_epochs=epoch,
    pl_trainer_kwargs={
        "accelerator": "gpu",
        "devices": [5]
    }
)

In [None]:
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print ('Error: Creating directory. ' +  directory)

In [None]:
createFolder('./models/'+ model_name +'/')
for i, x in enumerate(train_scaled_list):
    if i > 0:
        model = model.load(f'./models/{model_name}/{model_name}.pt')
    model.fit(series=x, val_series=val_scaled ,epochs=epoch*(i+1))
    model.save(f'./models/{model_name}/{model_name}.pt')