In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd

from darts import TimeSeries
from torch.nn import Transformer, Sequential

In [4]:
df = pd.read_csv('./public/data/raw_data.csv')

df['TAG'] = df['TAG'] == 'NG'

df.index = pd.date_range(start='3/4/2020', end='5/1/2020', freq='6S')[:-1]

df.drop(columns=['STD_DT', 'NUM', 'MELT_WEIGHT'], inplace=True)

df = df.astype(np.float32)
df['TAG'] = df['TAG'].astype(bool)

df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 835200 entries, 2020-03-04 00:00:00 to 2020-04-30 23:59:54
Freq: 6S
Data columns (total 4 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   MELT_TEMP   835200 non-null  float32
 1   MOTORSPEED  835200 non-null  float32
 2   INSP        835200 non-null  float32
 3   TAG         835200 non-null  bool   
dtypes: bool(1), float32(3)
memory usage: 16.7 MB


Unnamed: 0,MELT_TEMP,MOTORSPEED,INSP
count,835200.0,835200.0,835200.0
mean,509.200714,459.78302,3.194854
std,128.277512,639.436401,0.011822
min,308.0,0.0,3.17
25%,430.0,119.0,3.19
50%,469.0,168.0,3.19
75%,502.0,218.0,3.2
max,832.0,1804.0,3.23


In [6]:
index = (df.index.month == 3) | (df.index.day <= 14)

train_df = df[index]
test_df = df[~index]

train_x_df = train_df
train_y_df = pd.DataFrame(train_x_df.pop('TAG'), columns=['TAG'])

test_x_df = test_df
test_y_df = pd.DataFrame(test_x_df.pop('TAG'), columns=['TAG'])

In [7]:
train_x_series = TimeSeries.from_dataframe(train_x_df, freq='6S', fill_missing_dates=True)
train_y_series = TimeSeries.from_dataframe(train_y_df, freq='6S', fill_missing_dates=True)
test_x_series = TimeSeries.from_dataframe(test_x_df, freq='6S', fill_missing_dates=True)
test_y_series = TimeSeries.from_dataframe(test_y_df, freq='6S', fill_missing_dates=True)

len(train_x_series)

604800

In [8]:
from darts.dataprocessing.transformers import Scaler
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, robust_scale

scaler = Scaler(scaler=MinMaxScaler())

train_scaled = scaler.fit_transform(train_x_series).astype(np.float32)

test_scaled = scaler.transform(test_x_series).astype(np.float32)

train_scaled.pd_dataframe().info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 604800 entries, 2020-03-04 00:00:00 to 2020-04-14 23:59:54
Freq: 6S
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   MELT_TEMP   604800 non-null  float32
 1   MOTORSPEED  604800 non-null  float32
 2   INSP        604800 non-null  float32
dtypes: float32(3)
memory usage: 11.5 MB


In [9]:
from torch.utils.data import DataLoader, Dataset

class WindowDataset(Dataset):
    def __init__(self, X, Y, input_window=10):
        x = []
        y = []
        
        for i in range(input_window, len(X)-1):
            x.append(X[i-input_window: i, :])
            y.append(Y[i+1])
        self.len = len(x)
        self.x = np.array(x)
        self.y = np.array(y)
    def __getitem__(self, i):
        return self.x[i], self.y[i]
    def __len__(self):
        return self.len

In [75]:
import math

class TFModel(nn.Module):
    def __init__(self, window, d_model, nhead, nlayers, dropout=0.5):
        super(TFModel, self).__init__()
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dropout=dropout
        )

        self.transformer_encoder = nn.TransformerEncoder(
            self.encoder_layer,
            num_layers=nlayers
        )

        self.pos_encoder = PositionalEncoding(
            d_model,
            dropout
        )

        self.encoder = nn.Sequential(
            nn.Linear(3, d_model//2),
            nn.ReLU(),
            nn.Linear(d_model//2, d_model)
        )
        
        self.linear =  nn.Sequential(
            nn.Linear(d_model, d_model//2),
            nn.ReLU(),
            nn.Linear(d_model//2, 1)
        )

        self.linear2 = nn.Sequential(
            nn.Linear(window, window // 2),
            nn.ReLU(),
            nn.Linear(window // 2, 1)
        ) 

    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def forward(self, src, srcmask):
        src = self.encoder(src)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src.transpose(0,1), srcmask).transpose(0,1)
        output = self.linear(output)[:,:,0]

#         print(output.shape)
        output = self.linear2(output)
#         print(output.shape)
        return output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

def gen_attention_mask(x):
    mask = torch.eq(x, 0)
    return mask

In [83]:
window = 10 * 60

train_dataset = WindowDataset(train_scaled.values().squeeze(), train_y_series.values().squeeze(), window)

train_loader = DataLoader(train_dataset, batch_size=4)

In [84]:
torch.cuda.empty_cache()

device = torch.device("cuda")
lr = 1e-3
model = TFModel(window, 512, 8, 4, 0.1).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [85]:
from tqdm import tqdm

epoch = 1000
model.train()
progress = tqdm(range(epoch))
for i in progress:
    batchloss = 0.0
    for (inputs, outputs) in train_loader:
        optimizer.zero_grad()
        src_mask = model.generate_square_subsequent_mask(inputs.shape[1]).to(device)
        result = model(inputs.to(device),  src_mask)
        loss = criterion(result[:, 0], outputs.float().to(device))
        loss.backward()
        optimizer.step()
        batchloss += loss
    progress.set_description("loss: {:0.6f}".format(batchloss.cpu().item() / len(train_loader)))

  0%|          | 0/1000 [00:00<?, ?it/s]


RuntimeError: CUDA out of memory. Tried to allocate 44.00 MiB (GPU 0; 15.78 GiB total capacity; 11.75 GiB already allocated; 14.41 GiB free; 11.83 GiB reserved in total by PyTorch)