<a href="https://colab.research.google.com/github/letrongminh/Torch4GANs/blob/main/Transformer_for_TimeSeries_Forecast_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

LSTMs xử lý thông tin tuần tự, như được hiển thị ở trên. Kiến trúc này duy trì trạng thái ẩn được cập nhật với mọi mã thông báo đầu vào mới, đại diện cho toàn bộ trình tự mà nó đã thấy. Về mặt lý thuyết, thông tin rất quan trọng có thể lan truyền qua các chuỗi dài vô hạn. Tuy nhiên, trong thực tế, điều này không đúng như vậy. Do vấn đề về Vanishing Gradient, LSTM cuối cùng sẽ quên các thông tin trước đó.

Trong khi đó, Transformers giữ lại các kết nối trực tiếp với tất cả các thời gian trước đó, cho phép thông tin truyền qua các chuỗi dài hơn nhiều. Tuy nhiên, điều này đòi hỏi một thách thức mới: mô hình sẽ được kết nối trực tiếp với một lượng đầu vào bùng nổ. Để lọc điều quan trọng khỏi điều không quan trọng, Transformers sử dụng một thuật toán gọi là tự chú ý.

**Self-Attention**

Cơ chế chú ý được thiết kế để chỉ tập trung vào các tập hợp con quan trọng nhất của các chuỗi dài tùy ý, có liên quan để hoàn thành một nhiệm vụ nhất định.

Cụ thể, mô hình phải quyết định những chi tiết nào từ các thẻ nhớ trước đó có liên quan để mã hóa mã thông báo hiện tại. Khối tự chú ý mã hóa từng đầu vào mới đối với tất cả các đầu vào khác trước đó, đặt trọng tâm dựa trên tính toán mức độ liên quan đối với mã thông báo hiện tại.



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Data Handle/ Data Loader

In [2]:
!pip install icecream

Collecting icecream
  Downloading icecream-2.1.1-py2.py3-none-any.whl (8.1 kB)
Collecting executing>=0.3.1
  Downloading executing-0.8.2-py2.py3-none-any.whl (16 kB)
Collecting colorama>=0.3.9
  Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Collecting asttokens>=2.0.1
  Downloading asttokens-2.0.5-py2.py3-none-any.whl (20 kB)
Installing collected packages: executing, colorama, asttokens, icecream
Successfully installed asttokens-2.0.5 colorama-0.4.4 executing-0.8.2 icecream-2.1.1


In [46]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import os
import torch
import numpy as np
import random
import matplotlib.pyplot as plt
from joblib import dump

from icecream import ic
import time
import datetime
import torch
import torch.nn as nn
import math

import logging
from joblib import load


In [51]:
class SensorDataset(Dataset):
  """Face Landmarks dataset."""

  def __init__(self, csv_name, root_dir, training_length, forecast_window):
      """
      Args:
          csv_file (string): Path to the csv file.
          root_dir (string): Directory
      """
      
      # load raw data file
      csv_file = os.path.join(root_dir, csv_name)
      self.df = pd.read_csv(csv_file)
      self.root_dir = root_dir
      self.transform = MinMaxScaler()
      self.T = training_length
      self.S = forecast_window

  def __len__(self):
      # return number of sensors
      return len(self.df.groupby(by=["reindexed_id"]))

  # Will pull an index between 0 and __len__. 
  def __getitem__(self, idx):
      
      # Sensors are indexed from 1
      idx = idx+1

      # np.random.seed(0)

      start = np.random.randint(0, len(self.df[self.df["reindexed_id"]==idx]) - self.T - self.S) 
      sensor_number = str(self.df[self.df["reindexed_id"]==idx][["sensor_id"]][start:start+1].values.item())
      index_in = torch.tensor([i for i in range(start, start+self.T)])
      index_tar = torch.tensor([i for i in range(start + self.T, start + self.T + self.S)])
      _input = torch.tensor(self.df[self.df["reindexed_id"]==idx][["humidity", "sin_hour", "cos_hour", "sin_day", "cos_day", "sin_month", "cos_month"]][start : start + self.T].values)
      target = torch.tensor(self.df[self.df["reindexed_id"]==idx][["humidity", "sin_hour", "cos_hour", "sin_day", "cos_day", "sin_month", "cos_month"]][start + self.T : start + self.T + self.S].values)

      # scalar is fit only to the input, to avoid the scaled values "leaking" information about the target range.
      # scalar is fit only for humidity, as the timestamps are already scaled
      # scalar input/output of shape: [n_samples, n_features].
      scaler = self.transform

      scaler.fit(_input[:,0].unsqueeze(-1))
      _input[:,0] = torch.tensor(scaler.transform(_input[:,0].unsqueeze(-1)).squeeze(-1))
      target[:,0] = torch.tensor(scaler.transform(target[:,0].unsqueeze(-1)).squeeze(-1))

      # save the scalar to be used later when inverse translating the data for plotting.
      dump(scaler, 'scalar_item.joblib')

      return index_in, index_tar, _input, target, sensor_number

**Preprocessing**

In [5]:
# encoding the timestamp data cyclically. See Medium Article.
def process_data(source):
  df = pd.read_csv(source)
      
  timestamps = [ts.split('+')[0] for ts in  df['timestamp']]
  timestamps_hour = np.array([float(datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').hour) for t in timestamps])
  timestamps_day = np.array([float(datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').day) for t in timestamps])
  timestamps_month = np.array([float(datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').month) for t in timestamps])

  hours_in_day = 24
  days_in_month = 30
  month_in_year = 12

  df['sin_hour'] = np.sin(2*np.pi*timestamps_hour/hours_in_day)
  df['cos_hour'] = np.cos(2*np.pi*timestamps_hour/hours_in_day)
  df['sin_day'] = np.sin(2*np.pi*timestamps_day/days_in_month)
  df['cos_day'] = np.cos(2*np.pi*timestamps_day/days_in_month)
  df['sin_month'] = np.sin(2*np.pi*timestamps_month/month_in_year)
  df['cos_month'] = np.cos(2*np.pi*timestamps_month/month_in_year)

  return df

train_dataset = process_data('/content/drive/MyDrive/Colab_Notebooks_1/PyTorchtoGANs/Transformer/Data/train_raw.csv')
test_dataset = process_data('/content/drive/MyDrive/Colab_Notebooks_1/PyTorchtoGANs/Transformer/Data/test_raw.csv')

train_dataset.to_csv(r'train_dataset.csv', index=False)
test_dataset.to_csv(r'test_dataset.csv', index=False)

# Model building

**Model**

The architecture is based on the paper "Attention is all you need"

In [42]:
class Transformer(nn.Module):
  # d_model : number of features
  def __init__(self,feature_size=7,num_layers=3,dropout=0):
      super(Transformer, self).__init__()

      self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=7, dropout=dropout)
      self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)        
      self.decoder = nn.Linear(feature_size,1)
      self.init_weights()

  def init_weights(self):
      initrange = 0.1    
      self.decoder.bias.data.zero_()
      self.decoder.weight.data.uniform_(-initrange, initrange)

  def _generate_square_subsequent_mask(self, sz):
      mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
      mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
      return mask

  def forward(self, src, device):
      
      mask = self._generate_square_subsequent_mask(len(src)).to(device)
      output = self.transformer_encoder(src,mask)
      output = self.decoder(output)
      return output
      

**helper**

In [43]:
import os, shutil

# save train or validation loss
def log_loss(loss_val : float, path_to_save_loss : str, train : bool = True):
    if train:
        file_name = "train_loss.txt"
    else:
        file_name = "val_loss.txt"

    path_to_file = path_to_save_loss+file_name
    os.makedirs(os.path.dirname(path_to_file), exist_ok=True)
    with open(path_to_file, "a") as f:
        f.write(str(loss_val)+"\n")
        f.close()

# Exponential Moving Average, https://en.wikipedia.org/wiki/Moving_average
def EMA(values, alpha=0.1):
    ema_values = [values[0]]
    for idx, item in enumerate(values[1:]):
        ema_values.append(alpha*item + (1-alpha)*ema_values[idx])
    return ema_values

# Remove all files from previous executions and re-run the model.
def clean_directory():

    if os.path.exists('save_loss'):
        shutil.rmtree('save_loss')
    if os.path.exists('save_model'): 
        shutil.rmtree('save_model')
    if os.path.exists('save_predictions'): 
        shutil.rmtree('save_predictions')
    os.mkdir("save_loss")
    os.mkdir("save_model")
    os.mkdir("save_predictions")

**Plot**

In [44]:
def plot_loss(path_to_save, train=True):
    plt.rcParams.update({'font.size': 10})
    with open(path_to_save + "/train_loss.txt", 'r') as f:
        loss_list = [float(line) for line in f.readlines()]
    if train:
        title = "Train"
    else:
        title = "Validation"
    EMA_loss = EMA(loss_list)
    plt.plot(loss_list, label = "loss")
    plt.plot(EMA_loss, label="EMA loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.title(title+"_loss")
    plt.savefig(path_to_save+f"/{title}.png")
    plt.close()

def plot_prediction(title, path_to_save, src, tgt, prediction, sensor_number, index_in, index_tar):

    idx_scr = index_in[0, 1:].tolist()
    idx_tgt = index_tar[0].tolist()
    idx_pred = [i for i in range(idx_scr[0] +1, idx_tgt[-1])] #t2 - t61

    plt.figure(figsize=(15,6))
    plt.rcParams.update({"font.size" : 16})

    # connect with last elemenet in src
    # tgt = np.append(src[-1], tgt.flatten())
    # prediction = np.append(src[-1], prediction.flatten())

    # plotting
    plt.plot(idx_scr, src, '-', color = 'blue', label = 'Input', linewidth=2)
    plt.plot(idx_tgt, tgt, '-', color = 'indigo', label = 'Target', linewidth=2)
    plt.plot(idx_pred, prediction,'--', color = 'limegreen', label = 'Forecast', linewidth=2)

    #formatting
    plt.grid(b=True, which='major', linestyle = 'solid')
    plt.minorticks_on()
    plt.grid(b=True, which='minor', linestyle = 'dashed', alpha=0.5)
    plt.xlabel("Time Elapsed")
    plt.ylabel("Humidity (%)")
    plt.legend()
    plt.title("Forecast from Sensor " + str(sensor_number[0]))

    # save
    plt.savefig(path_to_save+f"Prediction_{title}.png")
    plt.close()

def plot_training(epoch, path_to_save, src, prediction, sensor_number, index_in, index_tar):

    # idx_scr = index_in.tolist()[0]
    # idx_tar = index_tar.tolist()[0]
    # idx_pred = idx_scr.append(idx_tar.append([idx_tar[-1] + 1]))

    idx_scr = [i for i in range(len(src))]
    idx_pred = [i for i in range(1, len(prediction)+1)]

    plt.figure(figsize=(15,6))
    plt.rcParams.update({"font.size" : 18})
    plt.grid(b=True, which='major', linestyle = '-')
    plt.grid(b=True, which='minor', linestyle = '--', alpha=0.5)
    plt.minorticks_on()

    plt.plot(idx_scr, src, 'o-.', color = 'blue', label = 'input sequence', linewidth=1)
    plt.plot(idx_pred, prediction, 'o-.', color = 'limegreen', label = 'prediction sequence', linewidth=1)

    plt.title("Teaching Forcing from Sensor " + str(sensor_number[0]) + ", Epoch " + str(epoch))
    plt.xlabel("Time Elapsed")
    plt.ylabel("Humidity (%)")
    plt.legend()
    plt.savefig(path_to_save+f"/Epoch_{str(epoch)}.png")
    plt.close()

def plot_training_3(epoch, path_to_save, src, sampled_src, prediction, sensor_number, index_in, index_tar):

    # idx_scr = index_in.tolist()[0]
    # idx_tar = index_tar.tolist()[0]
    # idx_pred = idx_scr.append(idx_tar.append([idx_tar[-1] + 1]))

    idx_scr = [i for i in range(len(src))]
    idx_pred = [i for i in range(1, len(prediction)+1)]
    idx_sampled_src = [i for i in range(len(sampled_src))]

    plt.figure(figsize=(15,6))
    plt.rcParams.update({"font.size" : 18})
    plt.grid(b=True, which='major', linestyle = '-')
    plt.grid(b=True, which='minor', linestyle = '--', alpha=0.5)
    plt.minorticks_on()

    ## REMOVE DROPOUT FOR THIS PLOT TO APPEAR AS EXPECTED !! DROPOUT INTERFERES WITH HOW THE SAMPLED SOURCES ARE PLOTTED
    plt.plot(idx_sampled_src, sampled_src, 'o-.', color='red', label = 'sampled source', linewidth=1, markersize=10)
    plt.plot(idx_scr, src, 'o-.', color = 'blue', label = 'input sequence', linewidth=1)
    plt.plot(idx_pred, prediction, 'o-.', color = 'limegreen', label = 'prediction sequence', linewidth=1)
    plt.title("Teaching Forcing from Sensor " + str(sensor_number[0]) + ", Epoch " + str(epoch))
    plt.xlabel("Time Elapsed")
    plt.ylabel("Humidity (%)")
    plt.legend()
    plt.savefig(path_to_save+f"/Epoch_{str(epoch)}.png")
    plt.close()

**Inference**

In [24]:
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s %(message)s", datefmt="[%Y-%m-%d %H:%M:%S]")
logger = logging.getLogger(__name__)

def inference(path_to_save_predictions, forecast_window, dataloader, device, path_to_save_model, best_model):

    device = torch.device(device)
    
    model = Transformer().double().to(device)
    model.load_state_dict(torch.load(path_to_save_model+best_model))
    criterion = torch.nn.MSELoss()

    val_loss = 0
    with torch.no_grad():

        model.eval()
        for plot in range(25):

            for index_in, index_tar, _input, target, sensor_number in dataloader:
                
                # starting from 1 so that src matches with target, but has same length as when training
                src = _input.permute(1,0,2).double().to(device)[1:, :, :] # 47, 1, 7: t1 -- t47
                target = target.permute(1,0,2).double().to(device) # t48 - t59

                next_input_model = src
                all_predictions = []

                for i in range(forecast_window - 1):
                    
                    prediction = model(next_input_model, device) # 47,1,1: t2' - t48'

                    if all_predictions == []:
                        all_predictions = prediction # 47,1,1: t2' - t48'
                    else:
                        all_predictions = torch.cat((all_predictions, prediction[-1,:,:].unsqueeze(0))) # 47+,1,1: t2' - t48', t49', t50'

                    pos_encoding_old_vals = src[i+1:, :, 1:] # 46, 1, 6, pop positional encoding first value: t2 -- t47
                    pos_encoding_new_val = target[i + 1, :, 1:].unsqueeze(1) # 1, 1, 6, append positional encoding of last predicted value: t48
                    pos_encodings = torch.cat((pos_encoding_old_vals, pos_encoding_new_val)) # 47, 1, 6 positional encodings matched with prediction: t2 -- t48
                    
                    next_input_model = torch.cat((src[i+1:, :, 0].unsqueeze(-1), prediction[-1,:,:].unsqueeze(0))) #t2 -- t47, t48'
                    next_input_model = torch.cat((next_input_model, pos_encodings), dim = 2) # 47, 1, 7 input for next round

                true = torch.cat((src[1:,:,0],target[:-1,:,0]))
                loss = criterion(true, all_predictions[:,:,0])
                val_loss += loss
            
            val_loss = val_loss/10
            scaler = load('scalar_item.joblib')
            src_humidity = scaler.inverse_transform(src[:,:,0].cpu())
            target_humidity = scaler.inverse_transform(target[:,:,0].cpu())
            prediction_humidity = scaler.inverse_transform(all_predictions[:,:,0].detach().cpu().numpy())
            plot_prediction(plot, path_to_save_predictions, src_humidity, target_humidity, prediction_humidity, sensor_number, index_in, index_tar)

        logger.info(f"Loss On Unseen Dataset: {val_loss.item()}")

In [47]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s %(message)s", datefmt="[%Y-%m-%d %H:%M:%S]")
logger = logging.getLogger(__name__)

def transformer(dataloader, EPOCH, k, frequency, path_to_save_model, path_to_save_loss, path_to_save_predictions, device):

  device = torch.device(device)

  model = Transformer().double().to(device)
  optimizer = torch.optim.Adam(model.parameters())
  # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=200)
  criterion = torch.nn.MSELoss()
  best_model = ""
  min_train_loss = float('inf')

  for epoch in range(EPOCH + 1):

      train_loss = 0
      val_loss = 0

      ## TRAIN -- TEACHER FORCING
      model.train()
      for index_in, index_tar, _input, target, sensor_number in dataloader: # for each data set 
      
          optimizer.zero_grad()

          # Shape of _input : [batch, input_length, feature]
          # Desired input for model: [input_length, batch, feature]

          src = _input.permute(1,0,2).double().to(device)[:-1,:,:] # torch.Size([24, 1, 7])
          target = _input.permute(1,0,2).double().to(device)[1:,:,:] # src shifted by 1.
          prediction = model(src, device) # torch.Size([24, 1, 7])
          loss = criterion(prediction, target[:,:,0].unsqueeze(-1))
          loss.backward()
          optimizer.step()
          # scheduler.step(loss.detach().item())
          train_loss += loss.detach().item()

      if train_loss < min_train_loss:
          torch.save(model.state_dict(), path_to_save_model + f"best_train_{epoch}.pth")
          torch.save(optimizer.state_dict(), path_to_save_model + f"optimizer_{epoch}.pth")
          min_train_loss = train_loss
          best_model = f"best_train_{epoch}.pth"


      if epoch % 100 == 0: # Plot 1-Step Predictions

          logger.info(f"Epoch: {epoch}, Training loss: {train_loss}")
          scaler = load('scalar_item.joblib')
          src_humidity = scaler.inverse_transform(src[:,:,0].cpu()) #torch.Size([35, 1, 7])
          target_humidity = scaler.inverse_transform(target[:,:,0].cpu()) #torch.Size([35, 1, 7])
          prediction_humidity = scaler.inverse_transform(prediction[:,:,0].detach().cpu().numpy()) #torch.Size([35, 1, 7])
          plot_training(epoch, path_to_save_predictions, src_humidity, prediction_humidity, sensor_number, index_in, index_tar)

      train_loss /= len(dataloader)
      log_loss(train_loss, path_to_save_loss, train=True)
      
  plot_loss(path_to_save_loss, train=True)
  return best_model

# **Main**

In [55]:
import argparse

root = '/content/'

def main( epoch = 1500, k = 60, batch_size =32 ,
         frequency = 100, training_length = 24, forecast_window = 12, 
         train_csv = "train_dataset.csv", test_csv = "test_dataset.csv", 
         path_to_save_model = "save_model/", 
         path_to_save_loss = "save_loss/",  
         path_to_save_predictions = "save_predictions/",  
         device = "cpu"):
  
  clean_directory()
  train_dataset = SensorDataset(csv_name = train_csv, root_dir = root, training_length = training_length, forecast_window = forecast_window)
  train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
  test_dataset = SensorDataset(csv_name = test_csv, root_dir = root, training_length = training_length, forecast_window = forecast_window)
  test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)

  best_model = transformer(train_dataloader, epoch, k, frequency, path_to_save_model, path_to_save_loss, path_to_save_predictions, device)
  inference(path_to_save_predictions, forecast_window, test_dataloader, device, path_to_save_model, best_model)


main()


[2022-01-16 17:14:46] [INFO] __main__ Epoch: 0, Training loss: 1.4117889722778505
[2022-01-16 17:14:57] [INFO] __main__ Epoch: 100, Training loss: 0.035739494928948096
[2022-01-16 17:15:09] [INFO] __main__ Epoch: 200, Training loss: 0.04626071597698528
[2022-01-16 17:15:19] [INFO] __main__ Epoch: 300, Training loss: 0.02784590172310985
[2022-01-16 17:15:30] [INFO] __main__ Epoch: 400, Training loss: 0.026922289711511924
[2022-01-16 17:15:41] [INFO] __main__ Epoch: 500, Training loss: 0.027998669805422607
[2022-01-16 17:15:52] [INFO] __main__ Epoch: 600, Training loss: 0.018889757267674636
[2022-01-16 17:16:03] [INFO] __main__ Epoch: 700, Training loss: 0.023743670195789636
[2022-01-16 17:16:14] [INFO] __main__ Epoch: 800, Training loss: 0.04001753914921255
[2022-01-16 17:16:25] [INFO] __main__ Epoch: 900, Training loss: 0.02659667900307344
[2022-01-16 17:16:35] [INFO] __main__ Epoch: 1000, Training loss: 0.028530942895002395
[2022-01-16 17:16:46] [INFO] __main__ Epoch: 1100, Training l