In [None]:
from darts import TimeSeries
from datetime import datetime

from imblearn.over_sampling import SMOTE
from dateutil.parser import parse
from matplotlib.pylab import rcParams
from tqdm import tqdm_notebook as tqdm
from statsmodels.tsa.stattools import adfuller
from torch.utils.tensorboard import SummaryWriter
from darts.utils.statistics import check_seasonality, plot_acf

import os
import torch
import shutil
import warnings
import itertools
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn as nn
import torch.optim as optim
import statsmodels.api as sm
from darts.metrics import mape
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf


%matplotlib inline
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
rcParams['figure.figsize'] = 15, 5

In [None]:
df = pd.read_csv('./public/data/raw_data.csv')

df['TAG'] = df['TAG'] == 'NG'

df.index = pd.date_range(start='3/4/2020', end='5/1/2020', freq='6S')[:-1]

df.drop(columns=['STD_DT', 'NUM', 'MELT_WEIGHT', 'INSP'], inplace=True)

df = df.astype(np.float32)
df['TAG'] = df['TAG'].astype(bool)

df.info()
df.describe()

In [None]:
val_start_day = 25

train_index = (df.index.month == 3) & (df.index.day < val_start_day)
val_index = (df.index.month == 3) & (df.index.day >= val_start_day)

train_df = df[train_index]
val_df = df[val_index]

train_df, val_df

In [None]:
x_train = train_df.copy(False)
y_train = pd.DataFrame(x_train.pop('TAG'), columns=['TAG'])

x_val = val_df.copy(False)
y_val = pd.DataFrame(x_val.pop('TAG'), columns=['TAG'])

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

train_scaled = scaler.fit_transform(x_train)
val_scaled = scaler.fit_transform(x_val)

train_scaled

In [None]:
smote = SMOTE(random_state=0)

train_scaled_over, train_y_over = smote.fit_resample(
    X=train_scaled,
    y=y_train.values.squeeze()
)

train_scaled_over.shape

In [None]:
def make_dataset(data, label, window_size):
    feature_list = []
    label_list = []
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(label.iloc[i:i+window_size])
    return np.array(feature_list), np.array(label_list)

In [None]:
x_train = pd.DataFrame(
    train_scaled_over,
    columns=['MELT_TEMP', 'MOTORSPEED']
)

y_train = pd.DataFrame(
    train_y_over,
    columns=['TAG']
)

x_val = pd.DataFrame(
    val_scaled,
    columns=['MELT_TEMP', 'MOTORSPEED']
)

In [None]:
from torch.utils.data import DataLoader, TensorDataset

window_size = 10
x_train, y_train = make_dataset(x_train, y_train, window_size)

x_val, y_val = make_dataset(x_val, y_val, window_size)

In [None]:
from tqdm import tqdm
from torch.nn import BCELoss

h_size = 8
class MockUpModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.ModuleDict({
            'lstm': nn.LSTM(
                input_size=2,
                hidden_size=h_size,
                dropout=0.1,
                num_layers=2,
                batch_first=True,
                bidirectional=True
            ),
            'linear1': nn.Linear(in_features=h_size*2, out_features=h_size),
            'linear2': nn.Linear(in_features=h_size, out_features=1),
            'relu1': nn.ReLU(),
            'relu2': nn.ReLU(),
            'sigmoid': nn.Sigmoid()
        })

    def forward(self, x):
        out, _ = self.model['lstm'](x)
        out = self.model['linear1'](out)
        out = self.model['relu1'](out)
        out = self.model['linear2'](out)
        out = self.model['sigmoid'](out)
        return out

In [None]:
train_bs = 64
val_bs = 1024

x_train_dataloader = DataLoader(
    dataset=torch.FloatTensor(x_train),
    batch_size=train_bs,
    shuffle=False
)

y_train_dataloader = DataLoader(
    dataset=torch.FloatTensor(y_train),
    batch_size=train_bs,
    shuffle=False
)

x_val_dataloader = DataLoader(
    dataset=torch.FloatTensor(x_val),
    batch_size=val_bs,
    shuffle=False
)

y_val_dataloader = DataLoader(
    dataset=torch.FloatTensor(y_val),
    batch_size=val_bs,
    shuffle=False
)

In [None]:
device = torch.device("cuda:4")
model = MockUpModel().to(device)
lr = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

loss_fn = BCELoss()

min_valid = 1e9

for i in range(50):
    batch_loss = 0.
    model.train()
    for x, y in tqdm(zip(x_train_dataloader, y_train_dataloader)):
        optimizer.zero_grad()
        out = model(x.to(device))
        loss = loss_fn(out, y.to(device))
        loss.backward()
        optimizer.step()
        batch_loss += loss.cpu().item()
    model.eval()
    with torch.no_grad():
        valid_loss = sum(loss_fn(model(x.to(device)), y.to(device)).cpu().item() for x, y in zip(x_val_dataloader, y_val_dataloader))
    batch_loss /= len(x_train_dataloader)
    valid_loss /= len(x_val_dataloader)

    if min_valid >= valid_loss:
        min_valid = valid_loss
        torch.save(model.state_dict(), './detection_models/model_upgrade.pt')
    print(f'{i}: loss: {batch_loss}, valid: {valid_loss}')

In [None]:
best_model = MockUpModel()

best_model.load_state_dict(torch.load('./detection_models/model.pt'))
best_model.eval()

In [None]:
x_val, y_val = make_dataset(x_val, y_val, window_size)

x_test_dataloader = DataLoader(
    dataset=torch.FloatTensor(x_val),
    batch_size=1,
    shuffle=False
)

y_test_dataloader = DataLoader(
    dataset=torch.FloatTensor(y_val),
    batch_size=1,
    shuffle=False
)

In [None]:
for x, y in zip(x_test_dataloader, y_test_dataloader):
    print(load_model(x, y))