In [1]:
import random
import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.nn import Transformer
from torch.optim import AdamW
from torch.utils.data import DataLoader, Dataset
import torch.backends.cudnn as cudnn

from tqdm.notebook import tqdm

import wandb

In [2]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)
np.random.seed(0)
cudnn.benchmark = False
cudnn.deterministic = True
random.seed(0)

if torch.cuda.is_available(): device = torch.device("cuda")
elif torch.backends.mps.is_available(): device = torch.device("mps")
else: device = torch.device('cpu')

In [3]:
wandb.init()
wandb.run.name = 'transformer'
wandb.run.save()

[34m[1mwandb[0m: Currently logged in as: [33mqja1998[0m. Use [1m`wandb login --relogin`[0m to force relogin




True

In [69]:
class GasDataset(Dataset):
    def __init__(self, df, seq_len, out_len, year=None):
        self.seq_len = seq_len
        self.out_len = out_len

        self.df = df
        if year is not None:
            self.df = self.df.loc[self.df['year'] == year]
    
    def __len__(self):
        return len(self.df) - self.seq_len - self.out_len

    def __getitem__(self, idx):
        input_end = idx + self.seq_len
        output_end = input_end + self.out_len

        x_data = self.df.iloc[idx:input_end, :]
        y_data = self.df.iloc[input_end:output_end, :]

        date_x, x = x_data.date, torch.from_numpy(np.array(x_data[['supply']], dtype=np.float32))
        date_y, y = y_data.date, torch.from_numpy(np.array(y_data[['supply']], dtype=np.float32))
        
        return x, y

In [70]:
class TransformerModel(nn.Module):
    def __init__(self,d_model, seq_len, out_len, nhead, nhid, nlayers, model_type, dropout=0.5):
        super(TransformerModel, self).__init__()
        
        self.model_type = model_type
        self.d_model = d_model

        self.pos_encoder = PositionalEncoding(d_model, dropout)

        if model_type == "enc-dec":
            self.embedding = nn.Linear(1, d_model)
            self.transformer = Transformer(d_model=d_model, nhead=nhead, dim_feedforward=nhid, num_encoder_layers=nlayers, num_decoder_layers=nlayers,dropout=dropout)
            self.linear = nn.Linear(d_model, 1)
        
        else:
            if model_type == "enc":
                encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout)
                self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=nlayers)

            elif model_type == "dec":
                encoder_layer = nn.TransformerDecoderLayer(d_model=d_model, nhead=nhead, dropout=dropout)
                self.transformer_encoder = nn.TransformerDecoder(encoder_layer, num_layers=nlayers)
            
            self.embedding = nn.Sequential(
                nn.Linear(1, d_model//2),
                nn.ReLU(),
                nn.Linear(d_model//2, d_model)
            )
            
            self.fc =  nn.Sequential(
                nn.Linear(d_model, d_model//2),
                nn.ReLU(),
                nn.Linear(d_model//2, 1),
            )

            self.fc2 = nn.Sequential(
                nn.Linear(seq_len, (seq_len + out_len)//2),
                nn.ReLU(),
                nn.Linear((seq_len + out_len)//2, out_len)
            )

    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def forward(self, src, tgt, srcmask, tgtmask):
        if self.model_type == "enc-dec":
            src = self.embedding(src) * math.sqrt(self.d_model)
            src = self.pos_encoder(src)
            tgt = self.embedding(tgt) * math.sqrt(self.d_model)
            tgt = self.pos_encoder(tgt)

            output = self.transformer(src.transpose(0,1), tgt.transpose(0,1), srcmask, tgtmask)
            output = self.linear(output)

        else:
            src = self.embedding(src) * math.sqrt(self.d_model)
            src = self.pos_encoder(src)
            output = self.transformer_encoder(src.transpose(0,1), srcmask).transpose(0,1)
            output = self.fc(output)[:,:,0]
            output = self.fc2(output)

        return output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

def gen_attention_mask(x):
    mask = torch.eq(x, 0)
    return mask

In [71]:
train_dataset[-14]

(tensor([], size=(0, 1)),
 tensor([[ 587.9580],
         [ 210.4197],
         [ -28.8697],
         [-372.3413],
         [-211.9683],
         [ 197.3100],
         [ -81.8040]]))

In [72]:
seq_len = 14
out_len = 7
train_rate = 0.8
df = pd.read_csv("data/korea/kor_gas_day.csv")
df = df.loc[df['type'] == 'A']
train_len = int(len(df) * train_rate)

df_train, df_val = df.iloc[:train_len], df.iloc[train_len:]
train_dataset = GasDataset(df_train, seq_len, out_len)
train_loader = DataLoader(train_dataset, batch_size=64)

lr = 1e-3
model = TransformerModel(256, 14, 7, 8, 256, 2, 'enc-dec', 0.1).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

model.eval()
for batch_idx, (x, y) in enumerate(train_loader):
    print(x.shape, y.shape)
    x = x.to(device)
    y = y.to(device)
    src_mask = model.generate_square_subsequent_mask(x.shape[1]).to(device)
    tgt_mask = model.generate_square_subsequent_mask(y.shape[1]).to(device)
    output = model(x, y, src_mask, tgt_mask).transpose(0, 1)
    print(output.shape)
    break

torch.Size([64, 14, 1]) torch.Size([64, 7, 1])
torch.Size([64, 7, 1])


In [73]:
def train(model, train_loader, val_data, optimizer, criterion, epoch):
    model.train()
    for i in tqdm(range(epoch)):
        for batch_idx, (x, y) in enumerate(train_loader):
            x = x.to(device)
            y = y.to(device)
            
            optimizer.zero_grad()
            
            src_mask = model.generate_square_subsequent_mask(x.shape[1]).to(device)
            tgt_mask = model.generate_square_subsequent_mask(y.shape[1]).to(device)
            output = model(x, y, src_mask, tgt_mask).transpose(0, 1)
            
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
            wandb.log({
                "epoch": i,
                "Loss": loss,
                'x': x,
                'y': y
            })
        if i % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, i, epoch,
                100. * i / epoch, loss.item()))

    if val_data is not None:
        model.eval()
        loss = 0
        true_val = []
        predictions = []
        with torch.no_grad():
            for batch_idx, (x, y) in enumerate(val_data):
                
                x = x.unsqueeze(0).to(device)
                y = y.unsqueeze(0).to(device)
                src_mask = model.generate_square_subsequent_mask(x.shape[1]).to(device)
                tgt_mask = model.generate_square_subsequent_mask(y.shape[1]).to(device)
                
                output = model(x, y, src_mask, tgt_mask).transpose(0, 1)

                loss += criterion(output, y)
                print(y.shape, output.shape)
                for i in y.squeeze().cpu().numpy():
                    true_val.append(float(i))
                for i in output.squeeze().cpu().numpy():
                    predictions.append(float(i))
        

        print('\nTest set: Average loss: {:.4f}'.format(loss / len(val_data)))
        
        plt.figure(figsize=(30,15))
        x = np.arange(len(true_val))
        plt.subplot(411)
        plt.plot(x, true_val, label='true', c='blue')
        plt.plot(x, predictions, label='predictions', c='red')
        plt.legend()

        true_val_cumsum = np.cumsum(true_val)

        tmp1 = predictions[:]
        for i, v in enumerate(true_val_cumsum[:-1]):
            tmp1[i + 1] = v + tmp1[i + 1]
            
        plt.subplot(412)
        plt.plot(x, true_val_cumsum, label='true', c='blue')
        plt.plot(x, predictions, label='predictions', c='red')
        plt.plot(x, np.cumsum(tmp1), label='self cumsum', c='orange')
        plt.legend()

        self_predictions = []
        x = val_data[0][0].to(device)
        y = val_data[0][1].to(device)
        
        with torch.no_grad():
            for i, (_, true_y) in enumerate(val_data):
                output = model(x).squeeze(-1)
                tmp = x[0][1:].unsqueeze(0)
                x = torch.cat([tmp, output.unsqueeze(0)], dim=1)
                y = output

                loss += criterion(output, true_y.unsqueeze(-1).to(device))
                self_predictions.append(float(output.cpu().numpy()))

        print('\nTest set: Average loss: {:.4f}'.format(loss / len(val_data)))
        x = np.arange(len(true_val))
        plt.subplot(413)
        plt.plot(x, true_val, label='true', c='blue')
        plt.plot(x, self_predictions, label='predictions', c='red')
        plt.legend()
        
        tmp2 = self_predictions[:]
        for i, v in enumerate(true_val_cumsum[:-1]):
            tmp2[i + 1] = v + tmp2[i + 1]
        
        plt.subplot(414)
        plt.plot(x, true_val_cumsum, label='true', c='blue')
        plt.plot(x, self_predictions, label='predictions', c='red')
        plt.plot(x, np.cumsum(tmp2), label='self cumsum', c='orange')
        plt.legend()

        return true_val, true_val_cumsum, predictions, self_predictions

In [56]:
seq_len = 14
out_len = 7
train_rate = 0.8

epochs = 1
batch_szie = 512

df = pd.read_csv("data/korea/kor_gas_day.csv")
df = df.loc[df['type'] == 'A']
df.loc[:, ['supply']] = df.loc[:, ['supply']].astype(float).diff()
df = df[1:]
train_len = int(len(df) * train_rate)

df_train, df_val = df.iloc[:train_len], df.iloc[train_len:]
train_dataset = GasDataset(df_train, seq_len, out_len)
train_loader = DataLoader(train_dataset, batch_size=batch_szie, shuffle=True)

val_dataset = GasDataset(df_val, seq_len, out_len)

In [57]:
seq_len = 14
out_len = 7
train_rate = 0.8
df = pd.read_csv("data/korea/kor_gas_day.csv")
df = df.loc[df['type'] == 'A']
train_len = int(len(df) * train_rate)

lr = 1e-3
model = TransformerModel(256, 14, 7, 8, 256, 2, 'enc-dec', 0.1).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [58]:
results = train(model, train_loader, val_dataset, optimizer, criterion, epochs)
true_val, true_val_cumsum, predictions, self_predictions = results[:]

  0%|          | 0/1 [00:00<?, ?it/s]

torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size

torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size

torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size([1, 7, 1])
torch.Size([1, 7, 1]) torch.Size

TypeError: iteration over a 0-d array