In [1]:
import argparse
import os
from exp.exp_pred import Exp_pred
from exp.exp_mae import Exp_mae

from data.stock_data_handle import Stock_Data
import tools as utils
import time

import pdb
import random
import torch
import numpy as np

fix_seed = 2022
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)

parser = argparse.ArgumentParser(description='[Transformer] Long Sequences Forecasting')

parser.add_argument('--model', type=str, default='Transformer',help='model of the experiment')
parser.add_argument('--project_name', type=str, default='baseline',help='name of the experiment')

parser.add_argument('--data_name', type=str, default='CSI', help='')
parser.add_argument('--data_type', type=str, default='stock', help='stock')
parser.add_argument('--root_path', type=str, default='data/', help='root path of the data file')
parser.add_argument('--full_stock_path', type=str, default='processed_data/CSI/', help='root path of the data file')

parser.add_argument('--exp_type', type=str, default='pred', help='[mae|pred]')

parser.add_argument('--seq_len', type=int, default=60, help='input series length')
parser.add_argument('--label_len', type=int, default=1, help='help series length')
parser.add_argument('--pred_len', type=int, default=1, help='predict series length')

parser.add_argument('--enc_in', type=int, default=96, help='encoder input size: cov+technical indicators')
parser.add_argument('--dec_in', type=int, default=96, help='decoder input size')
parser.add_argument('--c_out', type=int, default=96, help='output size')

parser.add_argument('--short_term_len', type=int, default=1, help='short term prediction len')
parser.add_argument('--long_term_len', type=int, default=5, help='long term prediction len')
parser.add_argument('--pred_type', type=str, default='long_term_len', help='type of prediction')

parser.add_argument('--d_model', type=int, default=128, help='dimension of model')
parser.add_argument('--n_heads', type=int, default=4, help='num of heads')
parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
parser.add_argument('--d_ff', type=int, default=256, help='dimension of fcn')

parser.add_argument('--dropout', type=float, default=0.05, help='dropout')

parser.add_argument('--activation', type=str, default='gelu',help='activation')
parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')

parser.add_argument('--rank_alpha', type=float, default=0.1, help='weight of rank loss') # adjust

parser.add_argument('--itr', type=int, default=2, help='each params run iteration')
parser.add_argument('--train_epochs', type=int, default=30, help='train epochs')
parser.add_argument('--batch_size', type=int, default=32, help='input data batch size')
parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
parser.add_argument('--adjust_interval', type=int, default=1, help='lr adjust interval')
parser.add_argument('--des', type=str, default='pred',help='exp description')
parser.add_argument('--loss', type=str, default='mse',help='loss function')
parser.add_argument('--lradj', type=str, default='type1',help='adjust learning rate')

# GPU
parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
parser.add_argument('--gpu', type=int, default=0, help='gpu')
parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus')


args = parser.parse_args()
args.project_name="transformer_CSI_predShort"
args.exp_type="pred"
args.root_path="../../data/"
args.full_stock_path="data/CSI"
args.enc_in=10
args.dec_in=10
args.c_out=1
args.pred_type="label_short_term"
args.rank_alpha=1
args.train_epochs=50
args.itr=1
args.adjust_interval=10
args.devices=0
args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False

if args.use_gpu and args.use_multi_gpu:
    args.devices = args.devices.replace(' ', '')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]


  from pandas.core import (


The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!



python main.py \
      --project_name transformer_CSI_predShort \
      --exp_type pred \
      --data_name CSI \
      --data_type stock \
      --root_path ../../data/ \
      --full_stock_path CSI/ \
      --seq_len 60 \
      --label_len 1 \
      --pred_len 1 \
      --enc_in 10 \
      --dec_in 10 \
      --c_out 1 \
      --short_term_len 1 \
      --long_term_len 5 \
      --pred_type label_short_term \
      --d_model 128 \
      --n_heads 4 \
      --e_layers 2 \
      --d_layers 1 \
      --d_ff 256 \
      --dropout 0.05 \
      --rank_alpha 1.0 \
      --train_epochs 50 \
      --itr 1 \
      --batch_size 32 \
      --learning_rate 0.0001 \
      --adjust_interval 10 \
      --num_workers 10 \
      --devices 0 \


In [2]:
import multiprocessing
print("CPU cores available:", multiprocessing.cpu_count())


CPU cores available: 20


In [3]:
args.num_workers=0

In [2]:
exp_dict = {'pred': Exp_pred, 'mae': Exp_mae}
data_type_dict = {'stock': Stock_Data}
Exp = exp_dict[args.exp_type]

In [4]:

data =  data_type_dict[args.data_type](
        root_path=args.root_path,
        dataset_name=args.data_name,
        full_stock_path=args.full_stock_path,
        size=[args.seq_len, args.label_len, args.pred_len],
        prediction_len=[args.short_term_len, args.long_term_len]
        )

  df = pd.concat((df, temp_df))


generate technical indicator...
Successfully added technical indicators


  df = df.fillna(method="ffill").fillna(method="bfill")


generate convariate matrix...
data shape:  (2743, 88, 106)
label shape:  (2, 2743, 88)


In [10]:
import config
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
import pandas as pd
ticker_list = config.use_ticker_dict[args.data_name]
border_dates = config.date_dict[args.data_name]
stock_num = len(ticker_list)

full_stock_dir =  '../../data/CSI/'

df = pd.DataFrame([], columns=['date','open','close','high','low','volume','dopen','dclose','dhigh','dlow','dvolume', 'price', 'tic'])
for ticket in ticker_list:
    temp_df = pd.read_csv(os.path.join(full_stock_dir,ticket+'.csv'), usecols=['date', 'open', 'close', 'high', 'low', 'volume', 'dopen', 'dclose', 'dhigh', 'dlow', 'dvolume', 'price'])

    temp_df['date'] = temp_df['date'].apply(lambda x:str(x))
    temp_df['date'] = pd.to_datetime(temp_df['date'])
    temp_df['label_short_term'] = temp_df['close'].pct_change(periods=args.short_term_len).shift(periods=(args.short_term_len))
    temp_df['label_long_term'] = temp_df['close'].pct_change(periods=args.long_term_len).shift(periods=(args.long_term_len))
    temp_df['tic'] = ticket
    df = pd.concat((df, temp_df))
df = df.sort_values(by=['date','tic'])

  df = pd.concat((df, temp_df))


In [11]:

from preprocess import FeatureEngineer
fe = FeatureEngineer(
            use_technical_indicator=True,
            tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
            use_turbulence=False,
            user_defined_feature = False)

print("generate technical indicator...")
df = fe.preprocess_data(df)

import datetime
# add covariance matrix as states
df=df.sort_values(['date','tic'],ignore_index=True)
df.index = df.date.factorize()[0]

cov_list = []
return_list = []

# look back is one year
print("generate convariate matrix...")
lookback=252
for i in range(lookback,len(df.index.unique())):
    data_lookback = df.loc[i-lookback:i,:]
    price_lookback=data_lookback.pivot_table(index = 'date',columns = 'tic', values = 'close') 
    return_lookback = price_lookback.pct_change().dropna()
    return_list.append(return_lookback)

    covs = return_lookback.cov().values 
    cov_list.append(covs)

df_cov = pd.DataFrame({'date':df.date.unique()[lookback:],'cov_list':cov_list,'return_list':return_list})
df = df.merge(df_cov, on='date')
df = df.sort_values(['date','tic']).reset_index(drop=True)

df['date_str'] = df['date'].apply(lambda x: datetime.datetime.strftime(x,'%Y%m%d'))

In [14]:

dates = df['date_str'].unique().tolist()
boarder1_ = dates.index(border_dates[0])
boarder1 = dates.index(border_dates[1]) 

boarder2_ = dates.index(border_dates[2])
boarder2 = dates.index(border_dates[3]) 

boarder3_ = dates.index(border_dates[4])
boarder3 = dates.index(border_dates[5]) 

boarder_end = [boarder1, boarder2, boarder3]
boarder_start = [boarder1_, boarder2_, boarder3_]

In [30]:

data_cov = cov_list.reshape(-1, stock_num, cov_list.shape[1], cov_list.shape[2]) # [day, num_stocks, num_stocks, num_stocks]
data_technical = data.reshape(-1, stock_num, len(attr)) # [day, stock_num, technical_len]
data_feature = feature_list.reshape(-1, stock_num, len(temporal_feature)) # [day, stock_num, temporal_feature_len=10]
data_close = close_list.reshape(-1, stock_num)

In [32]:
data_cov.shape,data_technical.shape,data_feature.shape,data_close.shape

((2743, 88, 88, 88), (2743, 88, 8), (2743, 88, 10), (2743, 88))

In [33]:


label_short_term = np.array(df['label_short_term'].values.tolist()).reshape(-1, stock_num)
label_long_term = np.array(df['label_long_term'].values.tolist()).reshape(-1, stock_num)

In [34]:

data_all = np.concatenate((data_cov[:, 0, :, :], data_technical, data_feature), axis=-1) # [days, num_stocks, cov+technical_len+feature_len]
label_all = np.stack((label_short_term, label_long_term), axis=0) # [2, days, num_stocks, 1]
data_all.shape,label_all.shape

((2743, 88, 106), (2, 2743, 88))

In [35]:

dates = np.array(dates)
data_close = data_close

print("data shape: ",data_all.shape)
print("label shape: ",label_all.shape)

data shape:  (2743, 88, 106)
label shape:  (2, 2743, 88)


In [5]:
id = utils.generate_id()
ii=0
setting = '{}_{}_{}_alpha{}_sl{}_pl{}_enc{}_cout{}_dm{}_nh{}_el{}_dl{}_df{}_{}_{}_dt{}_id{}'.format(args.exp_type, args.project_name, args.data_name, str(args.rank_alpha).replace('.','_'),
            args.seq_len, args.pred_len, args.enc_in, args.c_out,
            args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.des, ii, args.data_name, id)
print(id)
print(setting)

ddw1metg
pred_transformer_CSI_predShort_CSI_alpha1_sl60_pl1_enc10_cout1_dm128_nh4_el2_dl1_df256_pred_0_dtCSI_idddw1metg


In [6]:
args.data_type

'stock'

In [38]:
from torch.utils.data import Dataset, DataLoader

import config
class DatasetStock_PRED(Dataset):
    def __init__(self, stock: Stock_Data, type='train', feature=config.TEMPORAL_FEATURE, pred_type='label_short_term'):
        super().__init__()
        assert type in ['train', 'test', 'valid']
        assert pred_type in ['label_short_term', 'label_long_term']
        
        pos = stock.type_map[type]

        self.label_type = stock.pred_type_map[pred_type]
        self.start_pos = stock.boarder_start[pos]
        self.end_pos = stock.boarder_end[pos]+1
       

        self.feature_len = len(feature)
        self.feature_day_len = stock.seq_len
        self.data = stock.data_all
        self.label = stock.label_all
        
        self.dates = stock.dates[self.start_pos: self.end_pos]
        self.data_close = stock.data_close[self.start_pos: self.end_pos]

        # pdb.set_trace()

    def __getitem__(self, index):
        position = self.start_pos+index
        seq_x = self.data[position-self.feature_day_len+1:position+1, :, -self.feature_len:].transpose(1,0,2) #[days, num_stocks, feature]-> [num_stocks, days, feature]
        seq_x_dec = seq_x[:, -1:, :]

        seq_y = self.label[self.label_type, index, :]
        return seq_x, seq_x_dec, seq_y
    
    def __len__(self):
        return self.end_pos-self.start_pos#len(self.data)

In [10]:

from tensorboardX import SummaryWriter
log_dir = os.path.join('log', 'pred_'+args.project_name+'_'+str(args.rank_alpha)+'_'+id)
print(log_dir)
writer = SummaryWriter(log_dir=log_dir)
data_all = data

log\pred_transformer_CSI_predShort_1_c2xn1b54


In [7]:
from models.embed import DataEmbedding


In [10]:

dataset_dict = {
    'stock': DatasetStock_PRED,
}


flag="train"
if flag == 'train':
    shuffle_flag = True; drop_last = False; batch_size = args.batch_size
else:
    shuffle_flag = False; drop_last = True; batch_size = args.batch_size

dataset = dataset_dict[args.data_type](data, type=flag, pred_type=args.pred_type)

data_loader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=shuffle_flag,
    num_workers=0,
    drop_last=drop_last)

dataset, data_loader

label_short_term
59 1936


(<__main__.DatasetStock_PRED at 0x17e1210c850>,
 <torch.utils.data.dataloader.DataLoader at 0x17e1210ce10>)

In [11]:
a,b,c=dataset[0]
a.shape,b.shape,c.shape

((88, 60, 10), (88, 1, 10), (88,))

In [12]:
total_embedding=DataEmbedding(c_in=10,d_model=512,dropout=args.dropout)

In [13]:
pe_x = total_embedding(torch.tensor(a).float())  # Shape will be [1, 100, 512]
pe_x.shape

torch.Size([88, 60, 512])

In [14]:
# Display image and label.
train_features, trainx2,train_labels = next(iter(data_loader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")


Feature batch shape: torch.Size([32, 88, 60, 10])
Labels batch shape: torch.Size([32, 88])


In [25]:
args.devices

0

In [40]:
pe_x.shape

torch.Size([88, 60, 512])

In [16]:
from data.stock_data_handle import Stock_Data,DatasetStock,DatasetStock_PRED
from exp.exp_basic import Exp_Basic
from models.transformer import Transformer_base as Transformer
from tools import EarlyStopping, adjust_learning_rate
from metrics import metric, ranking_loss
import tools as utils
import metrics_object as metrics_object

import numpy as np

import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
dataset_dict = {
    'stock': DatasetStock_PRED,
}
model_dict = {
            'Transformer':Transformer,
        }

if args.model=='Transformer':
    model = model_dict[args.model](
        # self.args
        args.enc_in,
        args.dec_in, 
        args.c_out,
        args.d_model, 
        args.n_heads, 
        args.e_layers,
        args.d_layers, 
        args.d_ff,
        args.dropout, 
        args.activation
        # self.device
    )

if args.use_multi_gpu and args.use_gpu:
    model = nn.DataParallel(model, device_ids=args.device_ids)

In [27]:


class Exp_pred(Exp_Basic):
    def __init__(self, args, data_all, id):
        super(Exp_pred, self).__init__(args)
        log_dir = os.path.join('log', 'pred_'+args.project_name+'_'+str(args.rank_alpha)+'_'+id)
        
        self.writer = SummaryWriter(log_dir=log_dir)
        self.data_all = data_all
    
    def _build_model(self):
        model_dict = {
            'Transformer':Transformer,
        }

        if self.args.model=='Transformer':
            model = model_dict[self.args.model](
                # self.args
                self.args.enc_in,
                self.args.dec_in, 
                self.args.c_out,
                self.args.d_model, 
                self.args.n_heads, 
                self.args.e_layers,
                self.args.d_layers, 
                self.args.d_ff,
                self.args.dropout, 
                self.args.activation
                # self.device
            )

        if self.args.use_multi_gpu and self.args.use_gpu:
            model = nn.DataParallel(model, device_ids=self.args.device_ids)
        
        return model.float()

    def _get_data(self, flag):
        args = self.args

        if flag == 'train':
            shuffle_flag = True; drop_last = False; batch_size = args.batch_size
        else:
            shuffle_flag = False; drop_last = True; batch_size = args.batch_size
      
        dataset = dataset_dict[self.args.data_type](self.data_all, type=flag, pred_type=self.args.pred_type)
        
        data_loader = DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=shuffle_flag,
            num_workers=args.num_workers,
            drop_last=drop_last)

        return dataset, data_loader

    def _select_optimizer(self):
        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
        return model_optim
    
    def _select_criterion(self):
        criterion =  nn.MSELoss()
        return criterion

    def vali(self, vali_data, vali_loader, criterion, metric_builders, stage='test'):
        self.model.eval()
        total_loss = []
        metric_objs = [builder(stage) for builder in metric_builders]

        for i, (batch_x1, batch_x2, batch_y) in enumerate(vali_loader):
            bs, stock_num = batch_x1.shape[0], batch_x1.shape[1]
            batch_x1 = batch_x1.reshape(-1, batch_x1.shape[-2], batch_x1.shape[-1]).float().to(self.device)
            batch_x2 = batch_x2.reshape(-1, batch_x2.shape[-2], batch_x2.shape[-1]).float().to(self.device)
            batch_y = batch_y.float().to(self.device)
            
            _, _, output = self.model(batch_x1, batch_x2)

            output = output.reshape(bs,stock_num)
            loss = criterion(output, batch_y) + self.args.rank_alpha * ranking_loss(output, batch_y)

            total_loss.append(loss.item())

            with torch.no_grad():
                for metric in metric_objs:
                    metric.update(output, batch_y)

        total_loss = np.average(total_loss)
        self.model.train()
        return total_loss, metric_objs
        
    def train(self, setting):
        train_data, train_loader = self._get_data(flag = 'train')
        vali_data, vali_loader = self._get_data(flag = 'valid')
        test_data, test_loader = self._get_data(flag = 'test')

        metrics_builders = [
        metrics_object.MIRRTop1,
    ]

        path = os.path.join('./checkpoints/',setting)
        if not os.path.exists(path):
            os.makedirs(path)

        time_now = time.time()
        
        train_steps = len(train_loader)        
        model_optim = self._select_optimizer()
        criterion =  self._select_criterion()

        metric_objs = [builder('train') for builder in metrics_builders]

        valid_loss_global = np.inf
        best_model_index = -1

        for epoch in range(self.args.train_epochs):
            iter_count = 0
            train_loss = []
            
            self.model.train()
            for i, (batch_x1, batch_x2, batch_y) in enumerate(train_loader):
                iter_count += 1
                # pdb.set_trace()
                bs, stock_num = batch_x1.shape[0], batch_x1.shape[1]
                batch_x1 = batch_x1.reshape(-1, batch_x1.shape[-2], batch_x1.shape[-1]).float().to(self.device)
                batch_x2 = batch_x2.reshape(-1, batch_x2.shape[-2], batch_x2.shape[-1]).float().to(self.device)
                batch_y = batch_y.float().to(self.device)

                _,_, output = self.model(batch_x1, batch_x2)
            
                output = output.reshape(bs,stock_num)
        
                loss = criterion(output, batch_y) + self.args.rank_alpha * ranking_loss(output, batch_y)
                train_loss.append(loss.item())

                model_optim.zero_grad()
                loss.backward()
                model_optim.step()
                
                if (i+1) % 100==0:
                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
                    speed = (time.time()-time_now)/iter_count
                    left_time = speed*((self.args.train_epochs - epoch)*train_steps - i)
                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
                    iter_count = 0
                    time_now = time.time()

                with torch.no_grad():
                    for metric in metric_objs:
                        metric.update(output, batch_y)


            train_loss = np.average(train_loss)
            valid_loss, valid_metrics = self.vali(vali_data, vali_loader, criterion, metrics_builders, stage='valid')
            test_loss, test_metrics = self.vali(test_data, test_loader, criterion, metrics_builders, stage='test')

            self.writer.add_scalar('Train/loss', train_loss, epoch)
            self.writer.add_scalar('Valid/loss', valid_loss, epoch)
            self.writer.add_scalar('Test/loss', test_loss, epoch)

            # pdb.set_trace()

            all_logs = {
                metric.name: metric.value for metric in metric_objs + valid_metrics + test_metrics
            }
            for name, value in all_logs.items():
                self.writer.add_scalar(name, value.mean(), global_step=epoch)

            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Valid Loss: {3:.7f} Test Loss: {3:.7f}".format(
                epoch + 1, train_steps, train_loss, valid_loss, test_loss))
            
            torch.save(self.model.state_dict(), path+'/'+'checkpoint_{0}.pth'.format(epoch+1))

            if valid_loss.item() < valid_loss_global:
                best_model_index = epoch+1

            adjust_learning_rate(model_optim, epoch+1, self.args)
            
        best_model_path = path+'/'+'checkpoint_{0}.pth'.format(best_model_index)
        self.model.load_state_dict(torch.load(best_model_path))
        print('best model index: ', best_model_index)
        
        return self.model

    def test(self, setting):
        test_data, test_loader = self._get_data(flag='test')

        outputs = []
        real = []
        
        self.model.eval()

        metrics_builders = [
        metrics_object.MIRRTop1,
        metrics_object.RankIC
    ]
        
        metric_objs = [builder('test') for builder in metrics_builders]
        
        for i, (batch_x1, batch_x2, batch_y) in enumerate(test_loader):
            bs, stock_num = batch_x1.shape[0], batch_x2.shape[1]
            batch_x1 = batch_x1.reshape(-1, batch_x1.shape[-2], batch_x1.shape[-1]).float().to(self.device)
            batch_x2 = batch_x2.reshape(-1, batch_x2.shape[-2], batch_x2.shape[-1]).float().to(self.device)
            batch_y = batch_y.float().to(self.device)

            _,_, output = self.model(batch_x1, batch_x2)

            output = output.reshape(bs,stock_num)

            with torch.no_grad():
                for metric in metric_objs:
                    metric.update(output, batch_y)

        # result save
        folder_path = './results/' + setting +'/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        
        all_logs = {
                metric.name: metric.value for metric in metric_objs
            }
        for name, value in all_logs.items():
            print(name, value.mean())

        return

In [28]:
exp = Exp(args, data, id)

Use GPU: cuda:0
log\pred_transformer_CSI_predShort_1_ddw1metg


In [39]:
from tqdm import tqdm  # Import the tqdm progress bar

train_data, train_loader = exp._get_data(flag='train')
vali_data, vali_loader = exp._get_data(flag='valid')
test_data, test_loader = exp._get_data(flag='test')

metrics_builders = [
    metrics_object.MIRRTop1,
]

path = os.path.join('./checkpoints/', setting)
if not os.path.exists(path):
    os.makedirs(path)

time_now = time.time()

train_steps = len(train_loader)
model_optim = exp._select_optimizer()
criterion = exp._select_criterion()

metric_objs = [builder('train') for builder in metrics_builders]

valid_loss_global = np.inf
best_model_index = -1

for epoch in range(exp.args.train_epochs):
    iter_count = 0
    train_loss = []
    
    exp.model.train()
    # Wrap the training loader with tqdm for a progress bar
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{exp.args.train_epochs}", leave=False)
    for i, (batch_x1, batch_x2, batch_y) in enumerate(pbar):
        iter_count += 1
        bs, stock_num = batch_x1.shape[0], batch_x1.shape[1]
        batch_x1 = batch_x1.reshape(-1, batch_x1.shape[-2], batch_x1.shape[-1]).float().to(exp.device)
        batch_x2 = batch_x2.reshape(-1, batch_x2.shape[-2], batch_x2.shape[-1]).float().to(exp.device)
        batch_y = batch_y.float().to(exp.device)

        _, _, output = exp.model(batch_x1, batch_x2)
        output = output.reshape(bs, stock_num)

        loss = criterion(output, batch_y) + exp.args.rank_alpha * ranking_loss(output, batch_y)
        train_loss.append(loss.item())

        model_optim.zero_grad()
        loss.backward()
        model_optim.step()
        
        # Optionally update the progress bar with the current loss and estimated time remaining
        pbar.set_postfix({"loss": f"{loss.item():.7f}"})
        
        if (i+1) % 100 == 0:
            print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
            speed = (time.time()-time_now)/iter_count
            left_time = speed*((exp.args.train_epochs - epoch)*train_steps - i)
            print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
            iter_count = 0
            time_now = time.time()

        with torch.no_grad():
            for metric in metric_objs:
                metric.update(output, batch_y)

    train_loss = np.average(train_loss)
    valid_loss, valid_metrics = exp.vali(vali_data, vali_loader, criterion, metrics_builders, stage='valid')
    test_loss, test_metrics = exp.vali(test_data, test_loader, criterion, metrics_builders, stage='test')

    exp.writer.add_scalar('Train/loss', train_loss, epoch)
    exp.writer.add_scalar('Valid/loss', valid_loss, epoch)
    exp.writer.add_scalar('Test/loss', test_loss, epoch)

    all_logs = {
        metric.name: metric.value for metric in metric_objs + valid_metrics + test_metrics
    }
    for name, value in all_logs.items():
        exp.writer.add_scalar(name, value.mean(), global_step=epoch)

    print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Valid Loss: {3:.7f} Test Loss: {4:.7f}".format(
        epoch + 1, train_steps, train_loss, valid_loss, test_loss))
    
    torch.save(exp.model.state_dict(), os.path.join(path, f'checkpoint_{epoch+1}.pth'))

    if valid_loss.item() < valid_loss_global:
        best_model_index = epoch + 1
        valid_loss_global = valid_loss.item()  # Update the best validation loss

    adjust_learning_rate(model_optim, epoch+1, exp.args)
    
best_model_path = os.path.join(path, f'checkpoint_{best_model_index}.pth')
exp.model.load_state_dict(torch.load(best_model_path))
print('Best model index:', best_model_index)


label_short_term
59 1936
label_short_term
1693 2421
label_short_term
1995 2664


                                                                          

KeyboardInterrupt: 

In [33]:
train_loss

[0.08326154202222824]

In [22]:

print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
print('Task id: ',id)
start = time.time()
exp.train(setting)
end = time.time()
print("Training Time:",end-start)

print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
exp.test(setting)

>>>>>>>start training : pred_transformer_CSI_predShort_CSI_alpha1_sl60_pl1_enc10_cout1_dm128_nh4_el2_dl1_df256_pred_0_dtCSI_idddw1metg>>>>>>>>>>>>>>>>>>>>>>>>>>
Task id:  ddw1metg
label_short_term
59 1936
label_short_term
1693 2421
label_short_term
1995 2664


KeyboardInterrupt: 