In [2]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("./../")

# Library

In [None]:
import torch
import torch.nn as nn
import numpy as np
import time
import math
import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd 
from src.algorithms.transformer.time2vec import t2v , SineActivation
import torch
from tqdm import tqdm
torch.cuda.is_available()
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

# Data

In [3]:
from datetime import datetime 

today = datetime.strftime(datetime.today(),"%Y-%m-%d")
import yfinance as yf
# Request historical data for past 5 years
df = yf.download("^GSPC", start='2017-01-01', end=today)
df.columns = [i.replace(" ", "_") for i in list(df)]
df.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj_Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-03,2251.570068,2263.879883,2245.129883,2257.830078,2257.830078,3770530000
2017-01-04,2261.600098,2272.820068,2261.600098,2270.75,2270.75,3764890000
2017-01-05,2268.179932,2271.5,2260.449951,2269.0,2269.0,3761820000
2017-01-06,2271.139893,2282.100098,2264.060059,2276.97998,2276.97998,3339890000
2017-01-09,2273.590088,2275.48999,2268.899902,2268.899902,2268.899902,3217610000


# Data Preprocessing

## Scaler

In [5]:
from sklearn.preprocessing import MinMaxScaler
scaled_data = []
for col in list(df) :
    min_ , max_ = df[col].min() , df[col].max()
    min_value = 0.9 * min_
    max_value = 1.1 * max_
    scaled_data.append(np.array([min_value , max_value]).reshape(-1,1))
else :
    scaled_info = np.hstack(scaled_data)
    col_order = list(df)
    scaler = MinMaxScaler(feature_range=(0,1))
    scaler.fit(scaled_info)

In [6]:
df[col_order] = scaler.transform(df[col_order].values)

In [7]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj_Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-03,0.069097,0.069381,0.08293,0.07484,0.07484,0.268446
2017-01-04,0.072175,0.07212,0.087943,0.0788,0.0788,0.267864
2017-01-05,0.074195,0.071716,0.087593,0.078264,0.078264,0.267548
2017-01-06,0.075103,0.074964,0.088692,0.08071,0.08071,0.224045
2017-01-09,0.075855,0.072939,0.090165,0.078233,0.078233,0.211438


## Add New Features

In [8]:
df = df.reset_index(drop=False)
df['date'] = pd.to_datetime(df['Date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['dayofweek'] = df['date'].dt.dayofweek
df['dayofmonth'] = df['date'].dt.days_in_month
df['dayofyear'] = df['date'].dt.dayofyear
df['weekday'] = df['date'].dt.weekday
df['weekofyear'] = df['date'].dt.weekofyear
df.drop(columns = ["year",'date','Date'], inplace = True)
all_data = pd.get_dummies(df,columns=["month","day","dayofweek","dayofmonth","dayofyear","weekday","weekofyear"])


  df['weekofyear'] = df['date'].dt.weekofyear


In [9]:
all_data.head()

Unnamed: 0,Open,High,Low,Close,Adj_Close,Volume,month_1,month_2,month_3,month_4,...,weekofyear_44,weekofyear_45,weekofyear_46,weekofyear_47,weekofyear_48,weekofyear_49,weekofyear_50,weekofyear_51,weekofyear_52,weekofyear_53
0,0.069097,0.069381,0.08293,0.07484,0.07484,0.268446,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0.072175,0.07212,0.087943,0.0788,0.0788,0.267864,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0.074195,0.071716,0.087593,0.078264,0.078264,0.267548,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0.075103,0.074964,0.088692,0.08071,0.08071,0.224045,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0.075855,0.072939,0.090165,0.078233,0.078233,0.211438,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
target_col = "Adj_Close"

y = all_data[target_col].values
y_orig = all_data[target_col].values

In [11]:
all_data.shape

(1312, 480)

In [13]:
all_data.head()

Unnamed: 0,Open,High,Low,Close,Adj_Close,Volume,month_1,month_2,month_3,month_4,...,weekofyear_44,weekofyear_45,weekofyear_46,weekofyear_47,weekofyear_48,weekofyear_49,weekofyear_50,weekofyear_51,weekofyear_52,weekofyear_53
0,0.069097,0.069381,0.08293,0.07484,0.07484,0.268446,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0.072175,0.07212,0.087943,0.0788,0.0788,0.267864,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0.074195,0.071716,0.087593,0.078264,0.078264,0.267548,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0.075103,0.074964,0.088692,0.08071,0.08071,0.224045,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0.075855,0.072939,0.090165,0.078233,0.078233,0.211438,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Make TimeSeries Data

In [15]:
x_train = all_data
y_train = all_data[target_col]
WINDOW_SIZE = 20

In [16]:

def create_inout_sequences(input_data,target_data, tw, output_window):
    input_seq = []
    output_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = target_data[i+output_window:i+tw+output_window]
        input_seq.append(train_seq)
        output_seq.append(train_label)
    return np.array(input_seq) , np.array(output_seq)[:,:, np.newaxis]

In [17]:
N_Train_Test_Split = 1000
train = all_data[:N_Train_Test_Split]
valid = all_data[N_Train_Test_Split:]
train.shape , valid.shape

((1000, 480), (312, 480))

In [18]:
x_train_ts , y_train_ts = create_inout_sequences(train.values , train[target_col], tw= WINDOW_SIZE , output_window=1)
x_valid_ts , y_valid_ts = create_inout_sequences(valid.values , valid[target_col], tw= WINDOW_SIZE , output_window=1)

# Torch Function

In [38]:
def evaluate(eval_model, data_loader):
    eval_model.eval() # Turn on the evaluation mode
    total_loss = 0.
    with torch.no_grad():
        for batch_idx , samples in enumerate(data_loader) :
            x_train , y_train = samples
            output = eval_model(x_train)            
            total_loss +=  criterion(output, y_train).cpu().item()
    return total_loss / batch_idx
    
def plot_and_loss(eval_model , data_loader , epoch , folder , device ) :
    eval_model.eval() 
    test_result = torch.Tensor(0)    
    truth = torch.Tensor(0)
    total_loss = 0.
    with torch.no_grad():
        for batch_idx , (x , y) in enumerate(data_loader) :
            pred = model(x.to(device))
            _ = y[:,-1].squeeze().cpu()
            truth = torch.cat((truth, _), 0)
            _ = pred[:,-1].squeeze().cpu()
            total_loss += criterion(pred.cpu(), y.cpu()).item()

            test_result = torch.cat((test_result, _), 0)
        else :
            total_loss /= batch_idx 
            plt.plot(test_result.detach().numpy(),color="red")
            plt.plot(truth.detach().numpy(),color="blue")
            plt.plot((test_result-truth).detach().numpy(),color="green")
            plt.title(f"Loss : {total_loss:.5f}")
            plt.grid(True, which='both')
            plt.axhline(y=0, color='k')
            fig_path = f'{folder}/epoch{epoch:05d}.png'
            filepath = Path(fig_path)
            filepath.parent.mkdir(parents=True, exist_ok=True)
            plt.savefig(filepath)
            plt.close()
            return total_loss 

class TransAm(nn.Module):
    def __init__(self,time_dim = 16 , feature_size=250,num_layers=1,dropout=0.1,nhead=6,device="cpu"):
        super(TransAm, self).__init__()
        self.model_type = 'Transformer'
        
        self.src_mask = None
        self.time_vec = SineActivation(feature_size,time_dim ).to(device)
        output_size = 2*time_dim+feature_size
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=output_size, nhead=nhead, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)        
        self.decoder = nn.Linear(output_size,1)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1    
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self,src):
        if self.src_mask is None or self.src_mask.size(0) != len(src):
            device = src.device
            mask = self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask = mask

        time_src = self.time_vec(src)
        src = torch.cat((src, time_src),axis=-1)
        output = self.transformer_encoder(src,self.src_mask)#, self.src_mask)
        output = self.decoder(output)
        return output

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask



class CustomDataset(Dataset): 
  def __init__(self , x_data ,y_data):
    self.x_data = x_data
    self.y_data = y_data
    self.device= device 

  # 총 데이터의 개수를 리턴
  def __len__(self): 
    return len(self.x_data)

  # 인덱스를 입력받아 그에 맵핑되는 입출력 데이터를 파이토치의 Tensor 형태로 리턴
  def __getitem__(self, idx): 
    x = torch.FloatTensor(self.x_data[idx])
    y = torch.FloatTensor(self.y_data[idx])
    return x, y

In [21]:
device = "cuda:1"
model = TransAm(time_dim=16 , feature_size=480 ,num_layers=1,dropout=0.1,nhead=8,device= device)
model = model.to(device)

In [23]:
x_train_ts.shape

(980, 20, 480)

In [24]:
x_train_ts.shape , y_train_ts.shape

((980, 20, 480), (980, 20, 1))

In [25]:
tr_dataset = CustomDataset(x_data= x_train_ts,y_data =y_train_ts)
va_dataset = CustomDataset(x_data= x_valid_ts,y_data =y_valid_ts)
dataloader = DataLoader(tr_dataset, batch_size=2, shuffle=True)


In [26]:
y_train_ts.shape

(980, 20, 1)

In [27]:
_input , _target =  next(iter(dataloader))
_input , _target = _input.to(device) , _target.to(device)

In [29]:
model(_input).shape

torch.Size([2, 20, 1])

In [32]:
lr = 1e-3
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
tr_dataloader = DataLoader(tr_dataset, batch_size=32, shuffle=True)
va_dataloader = DataLoader(va_dataset, batch_size=32, shuffle=False)

In [None]:
epoch = 1 

# Train

In [39]:


n_epoch = 100
total_n_batch = len(dataloader)
pbar = tqdm(range(epoch,n_epoch),desc="start")
va_loss = np.inf
for epoch in pbar :
    model.train()
    total_loss = 0
    for batch_idx , samples in enumerate(dataloader) :
        optimizer.zero_grad()
        x_train , y_train = samples 
        pred = model(x_train.to(device))
        loss = criterion(y_train.to(device), pred)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.7)
        optimizer.step()
        total_loss += loss.item()
        cur_loss = total_loss / (batch_idx+1)
        percent = (batch_idx / total_n_batch) * 100
        pbar.set_description(f"[{epoch:02d}][{percent:05.2f}%] : {cur_loss:.3f} / validation loss : {va_loss:.5f}")
    else :
        pbar.update(1)
        scheduler.step()
    
    if epoch % 10 == 0 == 0:
        va_loss = plot_and_loss(model, va_dataloader,epoch, "time2vec_transformer",device=device)


[50][99.80%] : 0.000 / validation loss : 0.05319:  56%|█████▌    | 50/90 [03:21<02:57,  4.44s/it]