# For: Using Pretrained Model

**Import**

In [1]:
from ray import tune
import torch
from torch import nn
from torch import optim
from torch import functional as F
from einops import rearrange
import os
import pickle
import sys
sys.path.append('/home/tiennv/FPT/FinanceTransformers')
from logger_config import get_logger
from stock_embedder import *
import pandas as pd
from tqdm.notebook import tqdm
import ta
import numpy as np

**Configs**

In [2]:
config = {
    'pretrained_model_dir': '/home/tiennv/FPT/FinanceTransformers/Models_Development/Stock_Embedder/models_saved/pretrained',
    # Trainer
    'stock_data_file': '/home/tiennv/FPT/FinanceTransformers/Models_Development/Stock_Embedder/Datasets/technology_ver_1.csv',
    "batch_size": tune.choice([16, 32, 64, 128, 256]),
    'split_ratio': 0.8,
    'calculate_technical_indicators': False,
    "lr": tune.loguniform(1e-4, 1e-2),
    "optimizer": tune.choice(["Adam", "SGD"]),
    "momentum": tune.uniform(0.8, 0.99),
    "weight_decay": tune.loguniform(1e-5, 1e-3),
    "scheduler": tune.choice(["StepLR", "ReduceLROnPlateau"]),
    "step_size": tune.choice([5, 10, 20]),
    "gamma": tune.uniform(0.1, 0.5),
    # Ray Tune:
    'num_samples': 20,
    'epochs': 10,
    'max_num_epochs': 100,
    'gpus_per_trial': 0,
    'grace_period': 1,
    'reduction_factor': 2,
    'device': 'cpu'
}

**Giải thích:**

`max_t=max_num_epochs`: Mỗi thử nghiệm có thể chạy tối đa max_num_epochs epoch.

`grace_period=1`: Scheduler sẽ đợi ít nhất 1 epoch trước khi quyết định loại bỏ các thử nghiệm không tốt.

`reduction_factor=2`: Sau mỗi giai đoạn, scheduler sẽ giảm số lượng thử nghiệm còn lại xuống một nửa, giúp tập trung vào những thử nghiệm hứa hẹn nhất.

In [3]:
model_config = load_model_config(model_dir=config['pretrained_model_dir'])
model_config

{'ts_size': 24,
 'mask_size': 1,
 'num_masks': 3,
 'total_mask_size': 3,
 'hidden_dim': 12,
 'embed_dim': 6,
 'num_layer': 3,
 'z_dim': 6,
 'num_embed': 32,
 'min_val': array([  49.274517,   50.541279,   49.150326,   49.681866,   49.681866,
        7900.      ]),
 'max_val': array([1.22172548e+03, 1.22334874e+03, 1.19986969e+03, 1.21864809e+03,
        1.21864809e+03, 8.27602000e+07]),
 'stock_features': ['Open', 'High', 'Low', 'Close', 'Adj_Close', 'Volume']}

*# Bước 1*


**Define Models Architecture and Data Loader**

*Data Loader*

In [4]:
def train_test_split(data, ratio):
    idx = np.random.permutation(len(data))
    train_idx = idx[:int(ratio * len(data))]
    test_idx = idx[int(ratio * len(data)):]
    train_data = data[train_idx, ...]
    test_data = data[test_idx, ...]
    return train_data, test_data

def load_data(ts_size, data):
    # data.shape = (rows, features)
    
    def sliding_window(ts_size, ori_data):
        # Flipping the data to make chronological data
        ori_data = ori_data[::-1]  # (len(csv), z_dim)
        # Make (len(ori_data), z_dim) into (num_samples, seq_len, z_dim)
        samples = []
        for i in range(len(ori_data) - ts_size):
            single_sample = ori_data[i:i + ts_size]  # (seq_len, z_dim)
            samples.append(single_sample)
        samples = np.array(samples)  # (bs, seq_len, z_dim)
        np.random.shuffle(samples)  # Make it more like i.i.d.
        return samples

    data = sliding_window(ts_size=ts_size, ori_data=data)  # (bs, ts_size, z_dim)
    
    return data

In [5]:
def calculate_technical_indicators(df_passed: pd.DataFrame, rolling_window = 50):
    df = df_passed.copy()
    
    def generate_indicators(df, rolling_window = 50):
        # Calculate technical indicators
        # df['momentum'] = ta.momentum.roc(df['Close'])
        # df['trend'] = ta.trend.sma_indicator(df['Close'])
        # df['volatility'] = ta.volatility.bollinger_mavg(df['Close'])
        # df['volume'] = ta.volume.on_balance_volume(df['Close'], df['Volume'])
        df['stoch'] = ta.momentum.stoch(df['High'], df['Low'], df['Close'])
        df['adx'] = ta.trend.adx(df['High'], df['Low'], df['Close'])
        df['bollinger_hband'] = ta.volatility.bollinger_hband(df['Close'])
        df['mfi'] = ta.volume.money_flow_index(df['High'], df['Low'], df['Close'], df['Volume'])
        df['rsi'] = ta.momentum.rsi(df['Close'])
        df['ma'] = ta.trend.sma_indicator(df['Close'])
        df['std'] = df['Close'].rolling(window=rolling_window).std()
        df['adl'] = ta.volume.acc_dist_index(df['High'], df['Low'], df['Close'], df['Volume'])
        df['williams'] = ta.momentum.williams_r(df['High'], df['Low'], df['Close'])
        df['macd'] = ta.trend.macd(df['Close'])
        df['obv'] = ta.volume.on_balance_volume(df['Close'], df['Volume'])
        df['sar'] = ta.trend.psar_down(df['High'], df['Low'], df['Close']) # Added the 'close' argument
        df['ichimoku_a'] = ta.trend.ichimoku_a(df['High'], df['Low'])
        df['ichimoku_b'] = ta.trend.ichimoku_b(df['High'], df['Low'])

        return df
    
    df = generate_indicators(df=df, rolling_window=rolling_window)
    
    # Fillna
    df = df.fillna(method='ffill')
    df = df.iloc[rolling_window + 1 : ]
    df = df.fillna(method='bfill')
    
    if df.isna().sum().sum() > 0:
        raise Exception('NaN values found')
    
    return df

In [6]:
all_symbols_df = pd.read_csv(config['stock_data_file'], encoding='UTF-8')
all_symbols_df

Unnamed: 0,Date,Symbol,Adj Close,Close,High,Low,Open,Volume
0,2010-01-11,JBL,14.340293,17.040001,17.620001,16.940001,17.610001,3682500.0
1,2010-01-11,FSLR,138.240005,138.240005,141.240005,137.770004,141.000000,1575400.0
2,2010-01-11,FI,12.227500,12.227500,12.245000,12.125000,12.190000,5786000.0
3,2010-01-11,IT,20.600000,20.600000,20.690001,20.309999,20.690001,386400.0
4,2010-01-11,MCHP,9.767199,14.230000,14.285000,14.090000,14.245000,4489600.0
...,...,...,...,...,...,...,...,...
261364,2024-04-30,ADI,199.815201,200.610001,204.199997,200.500000,203.630005,2668400.0
261365,2024-04-30,QCOM,165.850006,165.850006,169.240005,165.809998,169.229996,6914200.0
261366,2024-04-30,INTU,625.619995,625.619995,637.919983,625.229980,637.010010,1309700.0
261367,2024-04-30,FTV,75.188156,75.269997,77.105003,75.220001,77.010002,2356300.0


In [7]:
all_symbols_df['Date'] = pd.to_datetime(all_symbols_df['Date'])
all_symbols_df

Unnamed: 0,Date,Symbol,Adj Close,Close,High,Low,Open,Volume
0,2010-01-11,JBL,14.340293,17.040001,17.620001,16.940001,17.610001,3682500.0
1,2010-01-11,FSLR,138.240005,138.240005,141.240005,137.770004,141.000000,1575400.0
2,2010-01-11,FI,12.227500,12.227500,12.245000,12.125000,12.190000,5786000.0
3,2010-01-11,IT,20.600000,20.600000,20.690001,20.309999,20.690001,386400.0
4,2010-01-11,MCHP,9.767199,14.230000,14.285000,14.090000,14.245000,4489600.0
...,...,...,...,...,...,...,...,...
261364,2024-04-30,ADI,199.815201,200.610001,204.199997,200.500000,203.630005,2668400.0
261365,2024-04-30,QCOM,165.850006,165.850006,169.240005,165.809998,169.229996,6914200.0
261366,2024-04-30,INTU,625.619995,625.619995,637.919983,625.229980,637.010010,1309700.0
261367,2024-04-30,FTV,75.188156,75.269997,77.105003,75.220001,77.010002,2356300.0


In [8]:
all_symbols_df = all_symbols_df.sort_values(by='Date')
all_symbols_df

Unnamed: 0,Date,Symbol,Adj Close,Close,High,Low,Open,Volume
0,2010-01-11,JBL,14.340293,17.040001,17.620001,16.940001,17.610001,3682500.0
33,2010-01-11,ADSK,26.250000,26.250000,26.490000,26.070000,26.340000,2151300.0
34,2010-01-11,GRMN,21.318253,34.290001,34.450001,33.520000,34.099998,1997700.0
35,2010-01-11,APH,5.031209,5.661250,5.692500,5.575000,5.680000,6540800.0
36,2010-01-11,GLW,14.353989,20.490000,20.850000,20.219999,20.620001,25617100.0
...,...,...,...,...,...,...,...,...
261313,2024-04-30,AAPL,170.099289,170.330002,174.990005,170.000000,173.330002,65934800.0
261312,2024-04-30,ENPH,108.760002,108.760002,111.949997,108.690002,111.250000,2768300.0
261311,2024-04-30,MSI,339.149994,339.149994,347.070007,338.540009,346.700012,1220800.0
261318,2024-04-30,APH,60.288956,60.384998,61.799999,60.349998,61.000000,7167200.0


*Choose Dates Range*

In [9]:
start_date = '2015-01-01'
end_date = '2016-01-01'

all_symbols_df = all_symbols_df[(all_symbols_df['Date'] >= start_date) & (all_symbols_df['Date'] <= end_date)]
all_symbols_df

Unnamed: 0,Date,Symbol,Adj Close,Close,High,Low,Open,Volume
82033,2015-01-02,TER,18.421520,19.700001,20.000000,19.469999,19.920000,1030300.0
82039,2015-01-02,LRCX,69.079231,79.449997,80.190002,78.839996,79.870003,830600.0
82038,2015-01-02,ADI,45.454937,55.540001,56.250000,54.970001,55.680000,1323200.0
82037,2015-01-02,MSI,58.383911,66.510002,67.730003,66.360001,67.540001,1077900.0
82036,2015-01-02,CDW,31.416943,34.860001,35.299999,34.599998,35.259998,380200.0
...,...,...,...,...,...,...,...,...
100323,2015-12-31,AMAT,17.243118,18.670000,18.959999,18.670000,18.940001,8685100.0
100322,2015-12-31,NXPI,76.961082,84.250000,85.870003,84.150002,85.430000,2140000.0
100321,2015-12-31,MCHP,20.020269,23.270000,23.924999,23.270000,23.825001,2316200.0
100320,2015-12-31,JBL,21.590118,23.290001,23.629999,23.290001,23.500000,1559900.0


In [10]:
model_config['stock_features']

['Open', 'High', 'Low', 'Close', 'Adj_Close', 'Volume']

In [11]:
all_symbols_df.rename(columns={'Adj Close': 'Adj_Close'}, inplace=True)
all_symbols_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_symbols_df.rename(columns={'Adj Close': 'Adj_Close'}, inplace=True)


Unnamed: 0,Date,Symbol,Adj_Close,Close,High,Low,Open,Volume
82033,2015-01-02,TER,18.421520,19.700001,20.000000,19.469999,19.920000,1030300.0
82039,2015-01-02,LRCX,69.079231,79.449997,80.190002,78.839996,79.870003,830600.0
82038,2015-01-02,ADI,45.454937,55.540001,56.250000,54.970001,55.680000,1323200.0
82037,2015-01-02,MSI,58.383911,66.510002,67.730003,66.360001,67.540001,1077900.0
82036,2015-01-02,CDW,31.416943,34.860001,35.299999,34.599998,35.259998,380200.0
...,...,...,...,...,...,...,...,...
100323,2015-12-31,AMAT,17.243118,18.670000,18.959999,18.670000,18.940001,8685100.0
100322,2015-12-31,NXPI,76.961082,84.250000,85.870003,84.150002,85.430000,2140000.0
100321,2015-12-31,MCHP,20.020269,23.270000,23.924999,23.270000,23.825001,2316200.0
100320,2015-12-31,JBL,21.590118,23.290001,23.629999,23.290001,23.500000,1559900.0


*Create Training and Validation Batches*

In [12]:
train_batches = torch.tensor(data=[])
val_batches = torch.tensor(data=[])

symbols = all_symbols_df['Symbol'].unique()

for symbol in tqdm(symbols):
    df = all_symbols_df[all_symbols_df['Symbol'] == symbol]
    df = df.sort_values(by='Date')
    
    if config['calculate_technical_indicators']:
        df = calculate_technical_indicators(df, rolling_window=model_config['ts_size'])
    
    df = df[model_config['stock_features']]
    data = df.values
    
    train_data, val_data = train_test_split(data=data, ratio=config['split_ratio'])
    
    # Create batches (sliding window)
    train_data = load_data(ts_size=model_config['ts_size'], data=train_data)
    val_data = load_data(ts_size=model_config['ts_size'], data=val_data)
    
    if len(train_data) > 0:
        train_data = normalize(train_data, min_val=model_config['min_val'], max_val=model_config['max_val'])
        train_data = torch.tensor(train_data)
        train_batches = torch.cat(tensors=[train_batches, train_data])
    
    if len(val_data) > 0:
        val_data = normalize(val_data, min_val=model_config['min_val'], max_val=model_config['max_val'])
        val_data = torch.tensor(val_data)
        val_batches = torch.cat(tensors=[val_batches, val_data])

  0%|          | 0/74 [00:00<?, ?it/s]

*Models Architecture*

In [13]:
def get_mini_batch(batch_size, data):
    idx = np.random.permutation(len(data))
    idx = idx[:batch_size]
    data_mini = data[idx, ...]  # (bs, seq_len, z_dim)
    return data_mini

def generate_random_masks(num_samples, ts_size, mask_size, num_masks):
    # xxxo
    # oxxx
    # xxox
    num_patches = int(ts_size // mask_size)

    def single_sample_mask():
        idx = np.random.permutation(num_patches)[:num_masks]
        mask = np.zeros(ts_size, dtype=bool)
        for j in idx:
            mask[j * mask_size:(j + 1) * mask_size] = 1
        return mask

    masks_list = [single_sample_mask() for _ in range(num_samples)]
    masks_list = [torch.tensor(mask) for mask in masks_list]
    masks = torch.stack(masks_list, axis=0)  # (num_samples, ts_size)
    return masks

def generate_pseudo_masks(ts_size, num_samples):
    # xxxx
    # xxxx
    # xxxx
    masks = np.zeros((num_samples, ts_size), dtype=bool)
    return masks

**Ray Tune**

In [14]:
import ray.train
import torch
import torch.nn as nn
import torch.optim as optim
import ray
from ray import tune
from ray.tune.schedulers import ASHAScheduler



# Bước 2: Hàm training với Ray Tune và validation
def train_model(config, checkpoint_dir=None):
    # ---------------- Get Model ------------
    model = StockEmbedder(config=model_config)
    model = load_model(model=model, model_dir=config['pretrained_model_dir'])
    # ---------------- END OF: Get Model -------------
    
    criterion = torch.nn.MSELoss(reduction='mean')
    optimizer = getattr(optim, config["optimizer"])(model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"])

    # Chỉ dùng momentum nếu optimizer là SGD
    if config["optimizer"] == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=config["lr"], momentum=config["momentum"], weight_decay=config["weight_decay"])

    # Load checkpoint nếu có
    if checkpoint_dir:
        checkpoint = torch.load(checkpoint_dir)
        model.load_state_dict(checkpoint["model_state"])
        optimizer.load_state_dict(checkpoint["optimizer_state"])
    
    
    pseudo_masks = generate_pseudo_masks(ts_size=model.config['ts_size'], num_samples=config['batch_size'])
    
    # ------------------- TRAIN AE ----------------------------
    # for t in tqdm(range(config['epochs'])):
    #     # ------------ Train model: -----------------
    #     # Đặt mô hình về chế độ train để huấn luyện
    #     model.train()
        
    #     x_ori = get_mini_batch(batch_size=config['batch_size'], data=train_batches)  # (bs, ts_size, z_dim)
        
    #     x_ori = torch.tensor(x_ori, dtype=torch.float32).to(config['device'])
    #     x_enc, x_dec = model(x_ori, pseudo_masks, 'ae')
    #     loss = criterion(x_dec, x_ori)
        
    #     optimizer.zero_grad()
    #     loss.backward()
    #     optimizer.step()
        
    #     # -------------- END OF: Train model -----------------
    #     # ------------- Calculate loss on validation set: -----------------------
    #     model.eval()
        
    #     val_loss = 0
        
    #     num_batches = len(val_batches) // config['batch_size']
        
    #     for i in range(num_batches):
    #         # Lấy batch dữ liệu
    #         val_batch = val_batches[i * config['batch_size'] : (i + 1) * config['batch_size']]  # (bs, ts_size, z_dim)
            
    #         x_ori = torch.tensor(val_batch, dtype=torch.float32).to(config['device'])
    #         x_enc, x_dec = model(x_ori, pseudo_masks, 'ae')
    #         loss = criterion(x_dec, x_ori)
            
    #         # Cộng dồn loss cho mỗi batch
    #         val_loss += loss.item()
        
    #     # Tính trung bình của val_loss
    #     if num_batches > 0:
    #         val_loss /= num_batches
    #     else:
    #         val_loss = 1e9
        
    #     # ------------- END OF: Calculate loss on validation set: -----------------------
        
        
    #     # Báo cáo loss trên tập validation cho Ray Tune
    #     ray.train.report({'loss': val_loss})
    
    # ------------------- END OF: TRAIN AE ----------------------------
    
    
    # ------------------- TRAIN EMBED ----------------------------
    # for t in tqdm(range(config['epochs'])):
    #     # ------------ Train model: -----------------
    #     x_ori = get_mini_batch(batch_size=config['batch_size'], data=train_batches)  # (bs, ts_size, z_dim)
        
    #     x_ori = torch.tensor(x_ori, dtype=torch.float32).to(config['device'])
    #     random_masks = generate_random_masks(num_samples=config['batch_size'], ts_size=model.config['ts_size'], mask_size=model.config['mask_size'], num_masks=model.config['num_masks'])

    #     # Get the target x_ori_enc by Autoencoder
    #     model.eval()
    #     masks = pseudo_masks
    #     x_ori_enc, _ = model(x_ori, pseudo_masks, 'ae')
    #     x_ori_enc = x_ori_enc.clone().detach()  # (bs, ts_size, hidden_dim)
    #     b, l, f = x_ori_enc.size()

    #     model.train()
    #     masks = random_masks
    #     x_enc, x_inter, x_dec = model(x_ori, random_masks, 'mae')

    #     # Only calculate loss for those being masked
    #     x_enc_masked = x_enc[masks, :].reshape(b, -1, f)
    #     x_ori_enc_masked = x_ori_enc[masks, :].reshape(b, -1, f)
    #     loss = criterion(x_enc_masked, x_ori_enc_masked)
    #     # By annotate lines above, we take loss on all patches
    #     # loss = self.criterion(x_enc, x_ori_enc)  # embed_loss
        
    #     optimizer.zero_grad()
    #     loss.backward()
    #     optimizer.step()
        
    #     # -------------- END OF: Train model -----------------
    #     # ------------- Calculate loss on validation set: -----------------------
    #     model.eval()
        
    #     val_loss = 0
        
    #     num_batches = len(val_batches) // config['batch_size']
        
    #     for i in range(num_batches):
    #         # Lấy batch dữ liệu
    #         val_batch = val_batches[i * config['batch_size'] : (i + 1) * config['batch_size']]  # (bs, ts_size, z_dim)
            
    #         x_ori = torch.tensor(val_batch, dtype=torch.float32).to(config['device'])
    #         random_masks = generate_random_masks(num_samples=config['batch_size'], ts_size=model.config['ts_size'], mask_size=model.config['mask_size'], num_masks=model.config['num_masks'])  # (bs, ts_size)
            
    #         # Get the target x_ori_enc by Autoencoder
    #         masks = pseudo_masks
    #         x_ori_enc, _ = model(x_ori, pseudo_masks, 'ae')
    #         x_ori_enc = x_ori_enc.clone().detach()  # (bs, ts_size, hidden_dim)
    #         b, l, f = x_ori_enc.size()
            
    #         masks = random_masks
    #         x_enc, x_inter, x_dec = model(x_ori, random_masks, 'mae')

    #         # Only calculate loss for those being masked
    #         x_enc_masked = x_enc[masks, :].reshape(b, -1, f)
    #         x_ori_enc_masked = x_ori_enc[masks, :].reshape(b, -1, f)
    #         loss = criterion(x_enc_masked, x_ori_enc_masked)
    #         # By annotate lines above, we take loss on all patches
    #         # loss = self.criterion(x_enc, x_ori_enc)  # embed_loss
            
    #         # Cộng dồn loss cho mỗi batch
    #         val_loss += loss.item()
        
    #     # Tính trung bình của val_loss
    #     if num_batches > 0:
    #         val_loss /= num_batches
    #     else:
    #         val_loss = 1e9
        
    #     # ------------- END OF: Calculate loss on validation set: -----------------------
        
    #     # Báo cáo loss trên tập validation cho Ray Tune
    #     ray.train.report({'loss': val_loss})
        
    # ------------------- END OF: TRAIN EMBED ----------------------------
    
    
    # ------------------- TRAIN RECON ----------------------------
    for t in tqdm(range(config['epochs'])):
        # ------------ Train model: -----------------
        x_ori = get_mini_batch(batch_size=config['batch_size'], data=train_batches)  # (bs, ts_size, z_dim)
        
        x_ori = torch.tensor(x_ori, dtype=torch.float32).to(config['device'])
        random_masks = generate_random_masks(num_samples=config['batch_size'], ts_size=model.config['ts_size'], mask_size=model.config['mask_size'], num_masks=model.config['num_masks'])  # (bs, ts_size)

        model.train()
        masks = random_masks
        _, x_inter, x_dec = model(x_ori, random_masks, 'mae')
        loss = criterion(x_dec, x_ori)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # -------------- END OF: Train model -----------------
        # ------------- Calculate loss on validation set: -----------------------
        model.eval()
        
        val_loss = 0
        
        num_batches = len(val_batches) // config['batch_size']
        
        for i in range(num_batches):
            # Lấy batch dữ liệu
            val_batch = val_batches[i * config['batch_size'] : (i + 1) * config['batch_size']]  # (bs, ts_size, z_dim)
            
            x_ori = torch.tensor(val_batch, dtype=torch.float32).to(config['device'])
            random_masks = generate_random_masks(num_samples=config['batch_size'], ts_size=model.config['ts_size'], mask_size=model.config['mask_size'], num_masks=model.config['num_masks'])  # (bs, ts_size)
            
            masks = random_masks
            _, x_inter, x_dec = model(x_ori, random_masks, 'mae')
            loss = criterion(x_dec, x_ori)
            
            # Cộng dồn loss cho mỗi batch
            val_loss += loss.item()
        
        # Tính trung bình của val_loss
        if num_batches > 0:
            val_loss /= num_batches
        else:
            val_loss = 1e9
        
        # ------------- END OF: Calculate loss on validation set: -----------------------
        
        #  Báo cáo loss trên tập validation cho Ray Tune
        ray.train.report({'loss': val_loss})
        
    # ------------------- END OF: TRAIN RECON ----------------------------
        
        

# Bước 3: Cấu hình hyperparameter tuning
def main():
    
    scheduler = ASHAScheduler(
        metric="loss",  # Sử dụng loss trên tập validation để đánh giá
        mode="min",
        max_t=config['max_num_epochs'],
        grace_period=config['grace_period'],
        reduction_factor=config['reduction_factor']
    )
    
    result = tune.run(
        # train_model,
        tune.with_parameters(trainable=train_model),
        resources_per_trial={"cpu": 1, "gpu": config['gpus_per_trial']},
        config=config,
        num_samples=config['num_samples'],
        scheduler=scheduler
    )

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(best_trial.last_result["loss"]))
    
    return best_trial
    
    

if __name__ == "__main__":
    best_trial = main()

2024-09-22 21:59:39,458	INFO worker.py:1786 -- Started a local Ray instance.
2024-09-22 21:59:40,416	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2024-09-22 21:59:40,418	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
  return torch.load(io.BytesIO(b))


0,1
Current time:,2024-09-22 21:59:58
Running for:,00:00:17.54
Memory:,13.7/62.6 GiB

Trial name,status,loc,batch_size,gamma,lr,momentum,optimizer,scheduler,step_size,weight_decay,iter,total time (s),loss
train_model_4b3ad_00000,TERMINATED,192.168.2.115:2862487,32,0.148858,0.00043122,0.912965,Adam,ReduceLROnPlateau,5,5.48646e-05,10,6.54748,0.0334539
train_model_4b3ad_00001,TERMINATED,192.168.2.115:2862489,256,0.471821,0.000484726,0.960113,SGD,StepLR,5,2.04044e-05,1,2.10994,0.0384892
train_model_4b3ad_00002,TERMINATED,192.168.2.115:2862490,256,0.185782,0.000103949,0.913394,Adam,StepLR,10,0.000194447,1,2.1533,0.0383927
train_model_4b3ad_00003,TERMINATED,192.168.2.115:2862495,32,0.193691,0.00775389,0.927245,Adam,StepLR,5,1.19615e-05,1,2.2115,0.225209
train_model_4b3ad_00004,TERMINATED,192.168.2.115:2862494,16,0.242172,0.00020564,0.861605,Adam,ReduceLROnPlateau,10,0.000358674,1,2.0243,0.0347919
train_model_4b3ad_00005,TERMINATED,192.168.2.115:2862493,32,0.328982,0.00146963,0.957959,SGD,StepLR,5,1.04891e-05,2,2.42563,0.0351425
train_model_4b3ad_00006,TERMINATED,192.168.2.115:2862496,128,0.442288,0.000948089,0.800161,Adam,ReduceLROnPlateau,10,0.000598948,1,2.13594,0.0358585
train_model_4b3ad_00007,TERMINATED,192.168.2.115:2862499,16,0.443678,0.000381704,0.898661,Adam,ReduceLROnPlateau,10,0.000104699,10,6.51936,0.0331546
train_model_4b3ad_00008,TERMINATED,192.168.2.115:2862502,16,0.464757,0.000136277,0.980367,Adam,StepLR,20,3.61004e-05,4,4.71392,0.0347408
train_model_4b3ad_00009,TERMINATED,192.168.2.115:2862506,32,0.138449,0.00402469,0.944095,SGD,ReduceLROnPlateau,20,0.000248221,2,2.85906,0.0351495




[36m(pid=2862502)[0m   return torch.load(io.BytesIO(b))
[36m(train_model pid=2862502)[0m   model.load_state_dict(state_dict=torch.load(f=os.path.join(model_dir, 'model.pth')))


[36m(train_model pid=2862502)[0m   0%|          | 0/10 [00:00<?, ?it/s]


[36m(train_model pid=2862502)[0m   x = torch.tensor(x, dtype=torch.float32)
[36m(train_model pid=2862502)[0m   masks = torch.tensor(masks, dtype=torch.float32)


Trial name,loss
train_model_4b3ad_00000,0.0334539
train_model_4b3ad_00001,0.0384892
train_model_4b3ad_00002,0.0383927
train_model_4b3ad_00003,0.225209
train_model_4b3ad_00004,0.0347919
train_model_4b3ad_00005,0.0351425
train_model_4b3ad_00006,0.0358585
train_model_4b3ad_00007,0.0331546
train_model_4b3ad_00008,0.0347408
train_model_4b3ad_00009,0.0351495


2024-09-22 21:59:58,298	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/tiennv/ray_results/train_model_2024-09-22_21-59-40' in 0.0203s.
2024-09-22 21:59:58,309	INFO tune.py:1041 -- Total run time: 17.89 seconds (17.52 seconds for the tuning loop).


Best trial config: {'pretrained_model_dir': '/home/tiennv/FPT/FinanceTransformers/Models_Development/Stock_Embedder/models_saved/pretrained', 'stock_data_file': '/home/tiennv/FPT/FinanceTransformers/Models_Development/Stock_Embedder/Datasets/technology_ver_1.csv', 'batch_size': 16, 'split_ratio': 0.8, 'calculate_technical_indicators': False, 'lr': 0.00038170387616976386, 'optimizer': 'Adam', 'momentum': 0.8986611460040892, 'weight_decay': 0.00010469943492750553, 'scheduler': 'ReduceLROnPlateau', 'step_size': 10, 'gamma': 0.4436783435351249, 'num_samples': 20, 'epochs': 10, 'max_num_epochs': 100, 'gpus_per_trial': 0, 'grace_period': 1, 'reduction_factor': 2, 'device': 'cpu'}
Best trial final validation loss: 0.03315458196871599


In [15]:
best_trial.config

{'pretrained_model_dir': '/home/tiennv/FPT/FinanceTransformers/Models_Development/Stock_Embedder/models_saved/pretrained',
 'stock_data_file': '/home/tiennv/FPT/FinanceTransformers/Models_Development/Stock_Embedder/Datasets/technology_ver_1.csv',
 'batch_size': 16,
 'split_ratio': 0.8,
 'calculate_technical_indicators': False,
 'lr': 0.00038170387616976386,
 'optimizer': 'Adam',
 'momentum': 0.8986611460040892,
 'weight_decay': 0.00010469943492750553,
 'scheduler': 'ReduceLROnPlateau',
 'step_size': 10,
 'gamma': 0.4436783435351249,
 'num_samples': 20,
 'epochs': 10,
 'max_num_epochs': 100,
 'gpus_per_trial': 0,
 'grace_period': 1,
 'reduction_factor': 2,
 'device': 'cpu'}

# For: Create New Model

**Import**

In [1]:
from ray import tune
import torch
from torch import nn
from torch import optim
from torch import functional as F
from einops import rearrange
import os
import pickle
import sys
sys.path.append('/home/tiennv/FPT/FinanceTransformers')
from logger_config import get_logger
from stock_embedder import *
import pandas as pd
from tqdm.notebook import tqdm
import ta
import numpy as np

**Configs**

In [2]:
config = {
    # Trainer
    'stock_data_file': '/home/tiennv/FPT/FinanceTransformers/Models_Development/Stock_Embedder/Datasets/technology_ver_1.csv',
    "batch_size": tune.choice([16, 32, 64, 128, 256]),
    'split_ratio': 0.8,
    'calculate_technical_indicators': True,
    'rolling_window': 30,
    "lr": tune.loguniform(1e-4, 1e-2),
    "optimizer": tune.choice(["Adam", "SGD"]),
    "momentum": tune.uniform(0.8, 0.99),
    "weight_decay": tune.loguniform(1e-5, 1e-3),
    "scheduler": tune.choice(["StepLR", "ReduceLROnPlateau"]),
    "step_size": tune.choice([5, 10, 20]),
    "gamma": tune.uniform(0.1, 0.5),
    # Model:
    'ts_size': tune.choice([24, 60]),
    'mask_size': tune.choice([1, 2]),
    'num_masks': tune.choice([1, 2]),
    'hidden_dim': tune.choice([12, 24]),
    'embed_dim': tune.choice([6, 12, 18]),
    'num_layer': tune.choice([2, 3, 4]),
    'z_dim': 20,
    'num_embed': tune.choice([32, 64]),
    # Ray Tune:
    'num_samples': 20,
    'epochs': 10,
    'max_num_epochs': 100,
    'gpus_per_trial': 0,
    'grace_period': 1,
    'reduction_factor': 2,
    'device': 'cpu'
}

**Giải thích:**

`max_t=max_num_epochs`: Mỗi thử nghiệm có thể chạy tối đa max_num_epochs epoch.

`grace_period=1`: Scheduler sẽ đợi ít nhất 1 epoch trước khi quyết định loại bỏ các thử nghiệm không tốt.

`reduction_factor=2`: Sau mỗi giai đoạn, scheduler sẽ giảm số lượng thử nghiệm còn lại xuống một nửa, giúp tập trung vào những thử nghiệm hứa hẹn nhất.

*# Bước 1*


**Define Models Architecture and Data Loader**

*Data Loader*

In [3]:
def train_test_split(data, ratio):
    idx = np.random.permutation(len(data))
    train_idx = idx[:int(ratio * len(data))]
    test_idx = idx[int(ratio * len(data)):]
    train_data = data[train_idx, ...]
    test_data = data[test_idx, ...]
    return train_data, test_data

def load_data(ts_size, data):
    # data.shape = (rows, features)
    
    def sliding_window(ts_size, ori_data):
        # Flipping the data to make chronological data
        ori_data = ori_data[::-1]  # (len(csv), z_dim)
        # Make (len(ori_data), z_dim) into (num_samples, seq_len, z_dim)
        samples = []
        for i in range(len(ori_data) - ts_size):
            single_sample = ori_data[i:i + ts_size]  # (seq_len, z_dim)
            samples.append(single_sample)
        samples = np.array(samples)  # (bs, seq_len, z_dim)
        np.random.shuffle(samples)  # Make it more like i.i.d.
        return samples

    data = sliding_window(ts_size=ts_size, ori_data=data)  # (bs, ts_size, z_dim)
    
    return data

In [4]:
def calculate_technical_indicators(df_passed: pd.DataFrame, rolling_window = 50):
    df = df_passed.copy()
    
    def generate_indicators(df, rolling_window = 50):
        # Calculate technical indicators
        # df['momentum'] = ta.momentum.roc(df['Close'])
        # df['trend'] = ta.trend.sma_indicator(df['Close'])
        # df['volatility'] = ta.volatility.bollinger_mavg(df['Close'])
        # df['volume'] = ta.volume.on_balance_volume(df['Close'], df['Volume'])
        df['stoch'] = ta.momentum.stoch(df['High'], df['Low'], df['Close'])
        df['adx'] = ta.trend.adx(df['High'], df['Low'], df['Close'])
        df['bollinger_hband'] = ta.volatility.bollinger_hband(df['Close'])
        df['mfi'] = ta.volume.money_flow_index(df['High'], df['Low'], df['Close'], df['Volume'])
        df['rsi'] = ta.momentum.rsi(df['Close'])
        df['ma'] = ta.trend.sma_indicator(df['Close'])
        df['std'] = df['Close'].rolling(window=rolling_window).std()
        df['adl'] = ta.volume.acc_dist_index(df['High'], df['Low'], df['Close'], df['Volume'])
        df['williams'] = ta.momentum.williams_r(df['High'], df['Low'], df['Close'])
        df['macd'] = ta.trend.macd(df['Close'])
        df['obv'] = ta.volume.on_balance_volume(df['Close'], df['Volume'])
        df['sar'] = ta.trend.psar_down(df['High'], df['Low'], df['Close']) # Added the 'close' argument
        df['ichimoku_a'] = ta.trend.ichimoku_a(df['High'], df['Low'])
        df['ichimoku_b'] = ta.trend.ichimoku_b(df['High'], df['Low'])

        return df
    
    df = generate_indicators(df=df, rolling_window=rolling_window)
    
    # Fillna
    df = df.fillna(method='ffill')
    df = df.iloc[rolling_window + 1 : ]
    df = df.fillna(method='bfill')
    
    if df.isna().sum().sum() > 0:
        raise Exception('NaN values found')
    
    return df

In [5]:
all_symbols_df = pd.read_csv(config['stock_data_file'], encoding='UTF-8')
all_symbols_df

Unnamed: 0,Date,Symbol,Adj Close,Close,High,Low,Open,Volume
0,2010-01-11,JBL,14.340293,17.040001,17.620001,16.940001,17.610001,3682500.0
1,2010-01-11,FSLR,138.240005,138.240005,141.240005,137.770004,141.000000,1575400.0
2,2010-01-11,FI,12.227500,12.227500,12.245000,12.125000,12.190000,5786000.0
3,2010-01-11,IT,20.600000,20.600000,20.690001,20.309999,20.690001,386400.0
4,2010-01-11,MCHP,9.767199,14.230000,14.285000,14.090000,14.245000,4489600.0
...,...,...,...,...,...,...,...,...
261364,2024-04-30,ADI,199.815201,200.610001,204.199997,200.500000,203.630005,2668400.0
261365,2024-04-30,QCOM,165.850006,165.850006,169.240005,165.809998,169.229996,6914200.0
261366,2024-04-30,INTU,625.619995,625.619995,637.919983,625.229980,637.010010,1309700.0
261367,2024-04-30,FTV,75.188156,75.269997,77.105003,75.220001,77.010002,2356300.0


In [6]:
all_symbols_df['Date'] = pd.to_datetime(all_symbols_df['Date'])
all_symbols_df

Unnamed: 0,Date,Symbol,Adj Close,Close,High,Low,Open,Volume
0,2010-01-11,JBL,14.340293,17.040001,17.620001,16.940001,17.610001,3682500.0
1,2010-01-11,FSLR,138.240005,138.240005,141.240005,137.770004,141.000000,1575400.0
2,2010-01-11,FI,12.227500,12.227500,12.245000,12.125000,12.190000,5786000.0
3,2010-01-11,IT,20.600000,20.600000,20.690001,20.309999,20.690001,386400.0
4,2010-01-11,MCHP,9.767199,14.230000,14.285000,14.090000,14.245000,4489600.0
...,...,...,...,...,...,...,...,...
261364,2024-04-30,ADI,199.815201,200.610001,204.199997,200.500000,203.630005,2668400.0
261365,2024-04-30,QCOM,165.850006,165.850006,169.240005,165.809998,169.229996,6914200.0
261366,2024-04-30,INTU,625.619995,625.619995,637.919983,625.229980,637.010010,1309700.0
261367,2024-04-30,FTV,75.188156,75.269997,77.105003,75.220001,77.010002,2356300.0


In [7]:
all_symbols_df = all_symbols_df.sort_values(by='Date')
all_symbols_df

Unnamed: 0,Date,Symbol,Adj Close,Close,High,Low,Open,Volume
0,2010-01-11,JBL,14.340293,17.040001,17.620001,16.940001,17.610001,3682500.0
33,2010-01-11,ADSK,26.250000,26.250000,26.490000,26.070000,26.340000,2151300.0
34,2010-01-11,GRMN,21.318253,34.290001,34.450001,33.520000,34.099998,1997700.0
35,2010-01-11,APH,5.031209,5.661250,5.692500,5.575000,5.680000,6540800.0
36,2010-01-11,GLW,14.353989,20.490000,20.850000,20.219999,20.620001,25617100.0
...,...,...,...,...,...,...,...,...
261313,2024-04-30,AAPL,170.099289,170.330002,174.990005,170.000000,173.330002,65934800.0
261312,2024-04-30,ENPH,108.760002,108.760002,111.949997,108.690002,111.250000,2768300.0
261311,2024-04-30,MSI,339.149994,339.149994,347.070007,338.540009,346.700012,1220800.0
261318,2024-04-30,APH,60.288956,60.384998,61.799999,60.349998,61.000000,7167200.0


*Choose Dates Range*

In [8]:
start_date = '2015-01-01'
end_date = '2016-01-01'

all_symbols_df = all_symbols_df[(all_symbols_df['Date'] >= start_date) & (all_symbols_df['Date'] <= end_date)]
all_symbols_df

Unnamed: 0,Date,Symbol,Adj Close,Close,High,Low,Open,Volume
82033,2015-01-02,TER,18.421520,19.700001,20.000000,19.469999,19.920000,1030300.0
82039,2015-01-02,LRCX,69.079231,79.449997,80.190002,78.839996,79.870003,830600.0
82038,2015-01-02,ADI,45.454937,55.540001,56.250000,54.970001,55.680000,1323200.0
82037,2015-01-02,MSI,58.383911,66.510002,67.730003,66.360001,67.540001,1077900.0
82036,2015-01-02,CDW,31.416943,34.860001,35.299999,34.599998,35.259998,380200.0
...,...,...,...,...,...,...,...,...
100323,2015-12-31,AMAT,17.243118,18.670000,18.959999,18.670000,18.940001,8685100.0
100322,2015-12-31,NXPI,76.961082,84.250000,85.870003,84.150002,85.430000,2140000.0
100321,2015-12-31,MCHP,20.020269,23.270000,23.924999,23.270000,23.825001,2316200.0
100320,2015-12-31,JBL,21.590118,23.290001,23.629999,23.290001,23.500000,1559900.0


*Get min_val, max_val*

In [9]:
if config['calculate_technical_indicators']:
    all_symbols_df = calculate_technical_indicators(all_symbols_df, rolling_window=config['rolling_window'])

all_symbols_df

  df = df.fillna(method='ffill')
  df = df.fillna(method='bfill')


Unnamed: 0,Date,Symbol,Adj Close,Close,High,Low,Open,Volume,stoch,adx,...,rsi,ma,std,adl,williams,macd,obv,sar,ichimoku_a,ichimoku_b
82045,2015-01-02,CRM,59.070766,59.240002,60.430000,58.509998,59.900002,2796400.0,33.403506,8.275422,...,50.364198,61.096040,39.180109,-1.432365e+07,-66.596494,6.571785,-1.753669e+08,137.689115,83.082499,79.989999
82044,2015-01-02,INTC,28.073418,36.360001,37.160000,36.110001,36.669998,23605600.0,15.855017,7.781564,...,48.396385,61.993540,39.335916,-2.668848e+07,-84.144983,4.027497,-1.989725e+08,137.689115,83.082499,79.989999
82042,2015-01-02,CSCO,20.481958,27.610001,28.120001,27.379999,27.860001,22926500.0,8.835314,7.428013,...,47.629944,61.398540,39.579674,-3.536331e+07,-91.164686,1.290205,-2.218990e+08,137.689115,83.082499,79.989999
82023,2015-01-02,TRMB,26.910000,26.910000,26.959999,26.360001,26.700001,1106000.0,8.343291,7.112710,...,47.565047,51.119374,39.735654,-3.444164e+07,-91.656709,-0.924940,-2.230050e+08,157.309998,83.082499,79.989999
82032,2015-01-02,ZBRA,77.430000,77.430000,78.419998,76.059998,77.989998,411800.0,43.853238,7.068758,...,52.586194,54.786874,39.896284,-3.437534e+07,-56.146762,1.380173,-2.225932e+08,154.690998,85.912499,79.989999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100323,2015-12-31,AMAT,17.243118,18.670000,18.959999,18.670000,18.940001,8685100.0,12.401151,6.391875,...,46.982974,54.133166,35.338450,5.348407e+09,-87.598849,2.842122,-1.239740e+11,98.449997,72.776998,72.776998
100322,2015-12-31,NXPI,76.961082,84.250000,85.870003,84.150002,85.430000,2140000.0,57.972567,6.299500,...,52.080422,60.634499,35.650583,5.346516e+09,-42.027433,4.661001,-1.239719e+11,98.449997,72.776998,72.776998
100321,2015-12-31,MCHP,20.020269,23.270000,23.924999,23.270000,23.825001,2316200.0,15.597683,6.139756,...,47.506477,54.493666,36.123059,5.344200e+09,-84.402317,1.168431,-1.239742e+11,98.449997,72.776998,72.776998
100320,2015-12-31,JBL,21.590118,23.290001,23.629999,23.290001,23.500000,1559900.0,15.611581,5.991423,...,47.508105,51.957000,36.443454,5.342640e+09,-84.388419,-1.579634,-1.239726e+11,144.729996,72.776998,72.776998


In [10]:
stock_features = [column for column in all_symbols_df.columns if column not in ['Date', 'Symbol']]

min_val = all_symbols_df[stock_features].min().values
max_val = all_symbols_df[stock_features].max().values

config['stock_features'] = stock_features
config['z_dim'] = len(stock_features)
config['min_val'] = min_val
config['max_val'] = max_val

print(min_val)
print(max_val)

[ 4.59453315e-01  4.78500009e-01  4.87500012e-01  4.73500013e-01
  4.81249988e-01  0.00000000e+00  0.00000000e+00  3.21805341e+00
  7.38272562e+01  1.73447777e+00  4.23946742e+01  2.48179424e+01
  2.15548392e+01 -2.17302467e+09 -1.00000000e+02 -1.33643311e+01
 -1.23974206e+11  4.64983580e+01  3.42043750e+01  4.47023738e+01]
[ 1.85039658e+02  1.94830002e+02  1.95929993e+02  1.93380005e+02
  1.95000000e+02  1.40524800e+09  1.00000000e+02  9.20426719e+00
  1.82642077e+02  9.92617096e+01  6.50187234e+01  1.00782710e+02
  5.47552574e+01  7.10742327e+09 -0.00000000e+00  2.47550200e+01
 -6.81279730e+07  1.95929993e+02  1.13479998e+02  1.01789998e+02]


*Create Training and Validation Batches*

In [11]:
def create_batches(config):
    train_batches = torch.tensor(data=[])
    val_batches = torch.tensor(data=[])

    symbols = all_symbols_df['Symbol'].unique()

    for symbol in tqdm(symbols):
        df = all_symbols_df[all_symbols_df['Symbol'] == symbol]
        df = df.sort_values(by='Date')
        
        if config['calculate_technical_indicators']:
            df = calculate_technical_indicators(df, rolling_window=config['rolling_window'])
        
        df = df[config['stock_features']]
        data = df.values
        
        train_data, val_data = train_test_split(data=data, ratio=config['split_ratio'])
        
        # Create batches (sliding window)
        train_data = load_data(ts_size=config['ts_size'], data=train_data)
        val_data = load_data(ts_size=config['ts_size'], data=val_data)
        
        if len(train_data) > 0:
            train_data = normalize(train_data, min_val=config['min_val'], max_val=config['max_val'])
            train_data = torch.tensor(train_data)
            train_batches = torch.cat(tensors=[train_batches, train_data])
        
        if len(val_data) > 0:
            val_data = normalize(val_data, min_val=config['min_val'], max_val=config['max_val'])
            val_data = torch.tensor(val_data)
            val_batches = torch.cat(tensors=[val_batches, val_data])
            
    
    return train_batches, val_batches

*Models Architecture*

In [12]:
def get_mini_batch(batch_size, data):
    idx = np.random.permutation(len(data))
    idx = idx[:batch_size]
    data_mini = data[idx, ...]  # (bs, seq_len, z_dim)
    return data_mini

def generate_random_masks(num_samples, ts_size, mask_size, num_masks):
    # xxxo
    # oxxx
    # xxox
    num_patches = int(ts_size // mask_size)

    def single_sample_mask():
        idx = np.random.permutation(num_patches)[:num_masks]
        mask = np.zeros(ts_size, dtype=bool)
        for j in idx:
            mask[j * mask_size:(j + 1) * mask_size] = 1
        return mask

    masks_list = [single_sample_mask() for _ in range(num_samples)]
    masks_list = [torch.tensor(mask) for mask in masks_list]
    masks = torch.stack(masks_list, axis=0)  # (num_samples, ts_size)
    return masks

def generate_pseudo_masks(ts_size, num_samples):
    # xxxx
    # xxxx
    # xxxx
    masks = np.zeros((num_samples, ts_size), dtype=bool)
    return masks

**Ray Tune**

In [13]:
import ray.train
import torch
import torch.nn as nn
import torch.optim as optim
import ray
from ray import tune
from ray.tune.schedulers import ASHAScheduler



# Bước 2: Hàm training với Ray Tune và validation
def train_model(config, checkpoint_dir=None):
    # ---------------- Get Model ------------
    model = StockEmbedder(config={k: config[k] for k in ['ts_size', 'mask_size', 'num_masks', 'hidden_dim', 'embed_dim', 'num_layer', 'z_dim', 'num_embed', 'min_val', 'max_val', 'stock_features']})
    
    train_batches, val_batches = create_batches(config=config)
    # ---------------- END OF: Get Model -------------
    
    criterion = torch.nn.MSELoss(reduction='mean')
    optimizer = getattr(optim, config["optimizer"])(model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"])

    # Chỉ dùng momentum nếu optimizer là SGD
    if config["optimizer"] == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=config["lr"], momentum=config["momentum"], weight_decay=config["weight_decay"])

    # Load checkpoint nếu có
    if checkpoint_dir:
        checkpoint = torch.load(checkpoint_dir)
        model.load_state_dict(checkpoint["model_state"])
        optimizer.load_state_dict(checkpoint["optimizer_state"])
    
    
    pseudo_masks = generate_pseudo_masks(ts_size=model.config['ts_size'], num_samples=config['batch_size'])
    
    # ------------------- TRAIN AE ----------------------------
    # for t in tqdm(range(config['epochs'])):
    #     # ------------ Train model: -----------------
    #     # Đặt mô hình về chế độ train để huấn luyện
    #     model.train()
        
    #     x_ori = get_mini_batch(batch_size=config['batch_size'], data=train_batches)  # (bs, ts_size, z_dim)
        
    #     x_ori = torch.tensor(x_ori, dtype=torch.float32).to(config['device'])
    #     x_enc, x_dec = model(x_ori, pseudo_masks, 'ae')
    #     loss = criterion(x_dec, x_ori)
        
    #     optimizer.zero_grad()
    #     loss.backward()
    #     optimizer.step()
        
    #     # -------------- END OF: Train model -----------------
    #     # ------------- Calculate loss on validation set: -----------------------
    #     model.eval()
        
    #     val_loss = 0
        
    #     num_batches = len(val_batches) // config['batch_size']
        
    #     for i in range(num_batches):
    #         # Lấy batch dữ liệu
    #         val_batch = val_batches[i * config['batch_size'] : (i + 1) * config['batch_size']]  # (bs, ts_size, z_dim)
            
    #         x_ori = torch.tensor(val_batch, dtype=torch.float32).to(config['device'])
    #         x_enc, x_dec = model(x_ori, pseudo_masks, 'ae')
    #         loss = criterion(x_dec, x_ori)
            
    #         # Cộng dồn loss cho mỗi batch
    #         val_loss += loss.item()
        
    #     # Tính trung bình của val_loss
    #     if num_batches > 0:
    #         val_loss /= num_batches
    #     else:
    #         val_loss = 1e9
        
    #     # ------------- END OF: Calculate loss on validation set: -----------------------
        
        
    #     # Báo cáo loss trên tập validation cho Ray Tune
    #     ray.train.report({'loss': val_loss})
    
    # ------------------- END OF: TRAIN AE ----------------------------
    
    
    # ------------------- TRAIN EMBED ----------------------------
    # for t in tqdm(range(config['epochs'])):
    #     # ------------ Train model: -----------------
    #     x_ori = get_mini_batch(batch_size=config['batch_size'], data=train_batches)  # (bs, ts_size, z_dim)
        
    #     x_ori = torch.tensor(x_ori, dtype=torch.float32).to(config['device'])
    #     random_masks = generate_random_masks(num_samples=config['batch_size'], ts_size=model.config['ts_size'], mask_size=model.config['mask_size'], num_masks=model.config['num_masks'])

    #     # Get the target x_ori_enc by Autoencoder
    #     model.eval()
    #     masks = pseudo_masks
    #     x_ori_enc, _ = model(x_ori, pseudo_masks, 'ae')
    #     x_ori_enc = x_ori_enc.clone().detach()  # (bs, ts_size, hidden_dim)
    #     b, l, f = x_ori_enc.size()

    #     model.train()
    #     masks = random_masks
    #     x_enc, x_inter, x_dec = model(x_ori, random_masks, 'mae')

    #     # Only calculate loss for those being masked
    #     x_enc_masked = x_enc[masks, :].reshape(b, -1, f)
    #     x_ori_enc_masked = x_ori_enc[masks, :].reshape(b, -1, f)
    #     loss = criterion(x_enc_masked, x_ori_enc_masked)
    #     # By annotate lines above, we take loss on all patches
    #     # loss = self.criterion(x_enc, x_ori_enc)  # embed_loss
        
    #     optimizer.zero_grad()
    #     loss.backward()
    #     optimizer.step()
        
    #     # -------------- END OF: Train model -----------------
    #     # ------------- Calculate loss on validation set: -----------------------
    #     model.eval()
        
    #     val_loss = 0
        
    #     num_batches = len(val_batches) // config['batch_size']
        
    #     for i in range(num_batches):
    #         # Lấy batch dữ liệu
    #         val_batch = val_batches[i * config['batch_size'] : (i + 1) * config['batch_size']]  # (bs, ts_size, z_dim)
            
    #         x_ori = torch.tensor(val_batch, dtype=torch.float32).to(config['device'])
    #         random_masks = generate_random_masks(num_samples=config['batch_size'], ts_size=model.config['ts_size'], mask_size=model.config['mask_size'], num_masks=model.config['num_masks'])  # (bs, ts_size)
            
    #         # Get the target x_ori_enc by Autoencoder
    #         masks = pseudo_masks
    #         x_ori_enc, _ = model(x_ori, pseudo_masks, 'ae')
    #         x_ori_enc = x_ori_enc.clone().detach()  # (bs, ts_size, hidden_dim)
    #         b, l, f = x_ori_enc.size()
            
    #         masks = random_masks
    #         x_enc, x_inter, x_dec = model(x_ori, random_masks, 'mae')

    #         # Only calculate loss for those being masked
    #         x_enc_masked = x_enc[masks, :].reshape(b, -1, f)
    #         x_ori_enc_masked = x_ori_enc[masks, :].reshape(b, -1, f)
    #         loss = criterion(x_enc_masked, x_ori_enc_masked)
    #         # By annotate lines above, we take loss on all patches
    #         # loss = self.criterion(x_enc, x_ori_enc)  # embed_loss
            
    #         # Cộng dồn loss cho mỗi batch
    #         val_loss += loss.item()
        
    #     # Tính trung bình của val_loss
    #     if num_batches > 0:
    #         val_loss /= num_batches
    #     else:
    #         val_loss = 1e9
        
    #     # ------------- END OF: Calculate loss on validation set: -----------------------
        
    #     # Báo cáo loss trên tập validation cho Ray Tune
    #     ray.train.report({'loss': val_loss})
        
    # ------------------- END OF: TRAIN EMBED ----------------------------
    
    
    # ------------------- TRAIN RECON ----------------------------
    for t in tqdm(range(config['epochs'])):
        # ------------ Train model: -----------------
        x_ori = get_mini_batch(batch_size=config['batch_size'], data=train_batches)  # (bs, ts_size, z_dim)
        
        x_ori = torch.tensor(x_ori, dtype=torch.float32).to(config['device'])
        random_masks = generate_random_masks(num_samples=config['batch_size'], ts_size=model.config['ts_size'], mask_size=model.config['mask_size'], num_masks=model.config['num_masks'])  # (bs, ts_size)

        model.train()
        masks = random_masks
        _, x_inter, x_dec = model(x_ori, random_masks, 'mae')
        loss = criterion(x_dec, x_ori)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # -------------- END OF: Train model -----------------
        # ------------- Calculate loss on validation set: -----------------------
        model.eval()
        
        val_loss = 0
        
        num_batches = len(val_batches) // config['batch_size']
        
        for i in range(num_batches):
            # Lấy batch dữ liệu
            val_batch = val_batches[i * config['batch_size'] : (i + 1) * config['batch_size']]  # (bs, ts_size, z_dim)
            
            x_ori = torch.tensor(val_batch, dtype=torch.float32).to(config['device'])
            random_masks = generate_random_masks(num_samples=config['batch_size'], ts_size=model.config['ts_size'], mask_size=model.config['mask_size'], num_masks=model.config['num_masks'])  # (bs, ts_size)
            
            masks = random_masks
            _, x_inter, x_dec = model(x_ori, random_masks, 'mae')
            loss = criterion(x_dec, x_ori)
            
            # Cộng dồn loss cho mỗi batch
            val_loss += loss.item()
        
        # Tính trung bình của val_loss
        if num_batches > 0:
            val_loss /= num_batches
        else:
            val_loss = 1e9
        
        # ------------- END OF: Calculate loss on validation set: -----------------------
        
        #  Báo cáo loss trên tập validation cho Ray Tune
        ray.train.report({'loss': val_loss})
        
    # ------------------- END OF: TRAIN RECON ----------------------------
        
        

# Bước 3: Cấu hình hyperparameter tuning
def main():
    
    scheduler = ASHAScheduler(
        metric="loss",  # Sử dụng loss trên tập validation để đánh giá
        mode="min",
        max_t=config['max_num_epochs'],
        grace_period=config['grace_period'],
        reduction_factor=config['reduction_factor']
    )
    
    result = tune.run(
        # train_model,
        tune.with_parameters(trainable=train_model),
        resources_per_trial={"cpu": 1, "gpu": config['gpus_per_trial']},
        config=config,
        num_samples=config['num_samples'],
        scheduler=scheduler
    )

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(best_trial.last_result["loss"]))
    
    return best_trial
    
    

if __name__ == "__main__":
    best_trial = main()

2024-09-22 22:01:55,969	INFO worker.py:1786 -- Started a local Ray instance.
2024-09-22 22:01:56,982	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2024-09-22 22:01:56,984	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-09-22 22:02:20
Running for:,00:00:22.95
Memory:,13.4/62.6 GiB

Trial name,status,loc,batch_size,embed_dim,gamma,hidden_dim,lr,mask_size,momentum,num_embed,num_layer,num_masks,optimizer,scheduler,step_size,ts_size,weight_decay,iter,total time (s),loss
train_model_9c899_00000,TERMINATED,192.168.2.115:2875302,128,6,0.457633,24,0.00719801,1,0.877649,64,4,2,Adam,StepLR,20,60,4.94267e-05,10,14.5636,1000000000.0
train_model_9c899_00001,TERMINATED,192.168.2.115:2875303,128,12,0.294301,12,0.000619912,2,0.975541,64,3,2,Adam,ReduceLROnPlateau,10,24,0.000450539,1,11.725,1.88365e+16
train_model_9c899_00002,TERMINATED,192.168.2.115:2875304,32,12,0.370728,12,0.00250421,1,0.857546,32,2,1,Adam,StepLR,5,60,2.90297e-05,10,13.6344,1000000000.0
train_model_9c899_00003,TERMINATED,192.168.2.115:2875307,64,12,0.180369,24,0.000803313,1,0.843203,64,4,2,Adam,ReduceLROnPlateau,5,60,1.58034e-05,10,14.5352,1000000000.0
train_model_9c899_00004,TERMINATED,192.168.2.115:2875309,32,12,0.17396,24,0.00117239,1,0.954146,32,4,2,Adam,StepLR,10,24,0.000518312,1,11.9768,1.87191e+16
train_model_9c899_00005,TERMINATED,192.168.2.115:2875308,64,18,0.374203,12,0.00296235,1,0.838773,32,4,1,Adam,ReduceLROnPlateau,5,24,3.51074e-05,2,12.7858,1.83271e+16
train_model_9c899_00006,TERMINATED,192.168.2.115:2875310,128,18,0.467479,12,0.0081818,1,0.827437,32,2,1,SGD,StepLR,5,24,3.3139e-05,8,14.3509,1.3062e+16
train_model_9c899_00007,TERMINATED,192.168.2.115:2875311,32,6,0.412069,12,0.00965453,2,0.800118,32,3,2,Adam,StepLR,20,24,0.000732101,1,11.5415,1.85775e+16
train_model_9c899_00008,TERMINATED,192.168.2.115:2875313,32,6,0.363492,24,0.000130471,2,0.846465,32,4,2,Adam,StepLR,20,24,4.22666e-05,2,12.8614,1.84786e+16
train_model_9c899_00009,TERMINATED,192.168.2.115:2875315,32,12,0.222401,24,0.000397912,2,0.890696,32,4,1,SGD,ReduceLROnPlateau,10,24,2.24808e-05,2,12.9012,1.82612e+16


[36m(train_model pid=2875302)[0m   0%|          | 0/74 [00:00<?, ?it/s]


[36m(train_model pid=2875303)[0m   x = torch.tensor(x, dtype=torch.float32)
[36m(train_model pid=2875303)[0m   masks = torch.tensor(masks, dtype=torch.float32)


[36m(train_model pid=2875303)[0m   0%|          | 0/10 [00:00<?, ?it/s][32m [repeated 20x across cluster][0m


Trial name,loss
train_model_9c899_00000,1000000000.0
train_model_9c899_00001,1.88365e+16
train_model_9c899_00002,1000000000.0
train_model_9c899_00003,1000000000.0
train_model_9c899_00004,1.87191e+16
train_model_9c899_00005,1.83271e+16
train_model_9c899_00006,1.3062e+16
train_model_9c899_00007,1.85775e+16
train_model_9c899_00008,1.84786e+16
train_model_9c899_00009,1.82612e+16


2024-09-22 22:02:20,028	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/tiennv/ray_results/train_model_2024-09-22_22-01-57' in 0.0442s.
2024-09-22 22:02:20,057	INFO tune.py:1041 -- Total run time: 23.07 seconds (22.90 seconds for the tuning loop).


Best trial config: {'stock_data_file': '/home/tiennv/FPT/FinanceTransformers/Models_Development/Stock_Embedder/Datasets/technology_ver_1.csv', 'batch_size': 128, 'split_ratio': 0.8, 'calculate_technical_indicators': True, 'rolling_window': 30, 'lr': 0.007198013043215177, 'optimizer': 'Adam', 'momentum': 0.8776486243327521, 'weight_decay': 4.942669666368748e-05, 'scheduler': 'StepLR', 'step_size': 20, 'gamma': 0.45763252076024297, 'ts_size': 60, 'mask_size': 1, 'num_masks': 2, 'hidden_dim': 24, 'embed_dim': 6, 'num_layer': 4, 'z_dim': 20, 'num_embed': 64, 'num_samples': 20, 'epochs': 10, 'max_num_epochs': 100, 'gpus_per_trial': 0, 'grace_period': 1, 'reduction_factor': 2, 'device': 'cpu', 'stock_features': ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume', 'stoch', 'adx', 'bollinger_hband', 'mfi', 'rsi', 'ma', 'std', 'adl', 'williams', 'macd', 'obv', 'sar', 'ichimoku_a', 'ichimoku_b'], 'min_val': array([ 4.59453315e-01,  4.78500009e-01,  4.87500012e-01,  4.73500013e-01,
        4.8

In [14]:
best_trial.config

{'stock_data_file': '/home/tiennv/FPT/FinanceTransformers/Models_Development/Stock_Embedder/Datasets/technology_ver_1.csv',
 'batch_size': 128,
 'split_ratio': 0.8,
 'calculate_technical_indicators': True,
 'rolling_window': 30,
 'lr': 0.007198013043215177,
 'optimizer': 'Adam',
 'momentum': 0.8776486243327521,
 'weight_decay': 4.942669666368748e-05,
 'scheduler': 'StepLR',
 'step_size': 20,
 'gamma': 0.45763252076024297,
 'ts_size': 60,
 'mask_size': 1,
 'num_masks': 2,
 'hidden_dim': 24,
 'embed_dim': 6,
 'num_layer': 4,
 'z_dim': 20,
 'num_embed': 64,
 'num_samples': 20,
 'epochs': 10,
 'max_num_epochs': 100,
 'gpus_per_trial': 0,
 'grace_period': 1,
 'reduction_factor': 2,
 'device': 'cpu',
 'stock_features': ['Adj Close',
  'Close',
  'High',
  'Low',
  'Open',
  'Volume',
  'stoch',
  'adx',
  'bollinger_hband',
  'mfi',
  'rsi',
  'ma',
  'std',
  'adl',
  'williams',
  'macd',
  'obv',
  'sar',
  'ichimoku_a',
  'ichimoku_b'],
 'min_val': array([ 4.59453315e-01,  4.78500009e-01