<a href="https://colab.research.google.com/github/khanhvietdm-glitch/BTC-Trading/blob/main/Take_the_trained_model_to_run_on_test_data_or_new_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Take the trained model to run on test data or new data
# Data usage until 21-05-2025
#!/usr/bin/env python3
# apply_saved_policy.py
# torch, pandas, numpy, scikit-learn, gym, stable-baselines3
!pip install stable_baselines3
!pip install ta
!pip install 'shimmy>=2.0'
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
import gym
from gym import spaces
from stable_baselines3 import DQN

# --- 1. Đọc & tính chỉ báo như lúc train ---
def add_technical(df):
    df = df.copy()
    df['log_ret'] = np.log(df['close']).diff()
    delta = df['close'].diff()
    gain  = delta.clip(lower=0).rolling(14).mean()
    loss  = (-delta.clip(upper=0)).rolling(14).mean().replace(0,1e-6)
    df['rsi']    = 100 - 100/(1 + gain/loss)
    df['sma_5']  = df['close'].rolling(5).mean()
    df['sma_10'] = df['close'].rolling(10).mean()
    mb   = df['close'].rolling(20).mean()
    sig  = df['close'].rolling(20).std()
    df['boll_up'] = mb + 2*sig
    df['boll_lb'] = mb - 2*sig
    return df.dropna()

# load nguyên cả file để split lại
xls      = pd.ExcelFile('/content/crypto_daily_data.xlsx')
full_df  = pd.read_excel(xls, 'BTC_Daily', parse_dates=['time']).set_index('time').sort_index()
full_df  = add_technical(full_df)

# 1.1 split
split_date = '2022-01-01'
train_df   = full_df[:split_date]
test_df    = full_df[split_date:]

# --- 2. Tái tạo scaler & PCA từ train ---
features  = ['log_ret','rsi','sma_5','sma_10','boll_up','boll_lb','volume']
scaler    = MinMaxScaler()
train_vals = scaler.fit_transform(train_df[features])
test_vals  = scaler.transform(test_df[features])

pca       = PCA(n_components=5)
train_pca = pca.fit_transform(train_vals)
test_pca  = pca.transform(test_vals)

# --- 3. Định nghĩa lại môi trường ---
class FSRLEnv(gym.Env):
    def __init__(self, df, pca_arr, window=20, cash_start=1e6):
        super().__init__()
        self.df           = df.reset_index(drop=True)
        self.times_index  = df.index
        self.pca_arr      = pca_arr
        self.window       = window
        self.cash_start   = cash_start
        self.action_space = spaces.Discrete(5)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(window * pca_arr.shape[1],),
            dtype=np.float32
        )
        self.reset()

    def reset(self):
        self.t       = self.window
        self.cash    = self.cash_start
        self.nav     = [self.cash]
        self.times   = []
        self.signals = []
        return self._get_state()

    def _get_state(self):
        return self.pca_arr[self.t-self.window:self.t].flatten()

    def step(self, action):
        action = int(action)   # bắt buộc cast về int
        row    = self.df.iloc[self.t]
        # mapping action → signal
        if   action == 0:
            signal = 1 if row.rsi < 30 else (-1 if row.rsi > 70 else 0)
        elif action == 1:
            signal = 1 if row.sma_5 > row.sma_10 else (-1 if row.sma_5 < row.sma_10 else 0)
        elif action == 2:
            signal = 1 if row.close < row.boll_lb else (-1 if row.close > row.boll_up else 0)
        elif action == 3:
            mom    = (row.close / self.df.close.iloc[self.t-20]) - 1
            signal = 1 if mom > 0 else -1
        else:
            prices = self.df.close.iloc[self.t-20:self.t]
            vols   = self.df.volume.iloc[self.t-20:self.t]
            vwap   = (prices * vols).sum() / vols.sum()
            signal = 1 if row.close < vwap else -1

        ret       = signal * row.log_ret
        self.cash *= np.exp(ret)
        self.nav.append(self.cash)

        self.signals.append(signal)
        self.times.append(self.times_index[self.t])
        self.t += 1
        done = self.t >= len(self.df)
        return self._get_state(), ret, done, {}

# --- 4. Tạo env_test & load trọng số model SB3 ---
env_test = FSRLEnv(test_df, test_pca)
# Thay vì tự khởi tạo policy, dùng DQN.load để SB3 wrap đúng
model = DQN.load("/content/fsrl_crypto_model.zip", env=env_test)

# --- 5. Chạy inference trên tập test ---
obs   = env_test.reset()
done  = False
actions, navs, times = [], [], []
while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, _, done, _ = env_test.step(action)
    actions.append(int(action))
    navs.append(env_test.nav[-1])
    times.append(env_test.times[-1])

# --- 6. Xuất ra Excel ---
strategy_map = {
    0: 'RSI-Strategy',
    1: 'SMA-Crossover',
    2: 'Bollinger-Reversion',
    3: 'Momentum-20d',
    4: 'VWAP-Reversion'
}
df_out = pd.DataFrame({
    'time':     times,
    'action':   actions,
    'strategy': [strategy_map[a] for a in actions],
    'nav':      navs
})
df_out.to_excel('test_policy_signals.xlsx', index=False)




FileNotFoundError: [Errno 2] No such file or directory: '/content/crypto_daily_data.xlsx'

In [None]:
!pip install stable_baselines3
!pip install ta
!pip install 'shimmy>=2.0'

In [None]:
#Take the trained model to run on test data or new data
# Data usage until 14-06-2025

import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
import gym
from gym import spaces
from stable_baselines3 import DQN

# --- 1. Tính chỉ báo như lúc train ---
def add_technical(df):
    df = df.copy()
    df['log_ret'] = np.log(df['close']).diff()
    delta = df['close'].diff()
    gain  = delta.clip(lower=0).rolling(14).mean()
    loss  = (-delta.clip(upper=0)).rolling(14).mean().replace(0,1e-6)
    df['rsi']    = 100 - 100/(1 + gain/loss)
    df['sma_5']  = df['close'].rolling(5).mean()
    df['sma_10'] = df['close'].rolling(10).mean()
    mb   = df['close'].rolling(20).mean()
    sig  = df['close'].rolling(20).std()
    df['boll_up'] = mb + 2*sig
    df['boll_lb'] = mb - 2*sig
    return df.dropna()

# Đọc toàn bộ file, split train/test
xls      = pd.ExcelFile('/content/btc_daily_data 14-06.xlsx')
full_df  = pd.read_excel(xls, 'BTC_Daily', parse_dates=['time']).set_index('time').sort_index()
full_df  = add_technical(full_df)

split_date = '2025-05-23'
train_df   = full_df[:split_date]
test_df    = full_df[split_date:]

# --- 2. Scale & PCA ---
features   = ['log_ret','rsi','sma_5','sma_10','boll_up','boll_lb','volume']
scaler     = MinMaxScaler()
train_vals = scaler.fit_transform(train_df[features])
test_vals  = scaler.transform(test_df[features])

pca        = PCA(n_components=5)
train_pca  = pca.fit_transform(train_vals)
test_pca   = pca.transform(test_vals)

# --- 3. Định nghĩa môi trường FSRL ---
class FSRLEnv(gym.Env):
    def __init__(self, df, pca_arr, window=20, cash_start=1e6):
        super().__init__()
        self.df           = df.reset_index(drop=True)
        self.times_index  = df.index
        self.pca_arr      = pca_arr
        self.window       = window
        self.cash_start   = cash_start
        self.action_space = spaces.Discrete(5)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(window * pca_arr.shape[1],),
            dtype=np.float32
        )
        self.reset()

    def reset(self):
        self.t        = self.window
        self.cash     = self.cash_start
        self.nav      = [self.cash]
        self.times    = []
        self.signals  = []
        return self._get_state()

    def _get_state(self):
        return self.pca_arr[self.t-self.window:self.t].flatten()

    def step(self, action):
        action = int(action)
        row    = self.df.iloc[self.t]

        # chọn signal
        if   action == 0:
            signal = 1 if row.rsi < 30 else (-1 if row.rsi > 70 else 0)
        elif action == 1:
            signal = 1 if row.sma_5 > row.sma_10 else (-1 if row.sma_5 < row.sma_10 else 0)
        elif action == 2:
            signal = 1 if row.close < row.boll_lb else (-1 if row.close > row.boll_up else 0)
        elif action == 3:
            mom    = (row.close / self.df.close.iloc[self.t-20]) - 1
            signal = 1 if mom > 0 else -1
        else:
            prices = self.df.close.iloc[self.t-20:self.t]
            vols   = self.df.volume.iloc[self.t-20:self.t]
            vwap   = (prices * vols).sum() / vols.sum()
            signal = 1 if row.close < vwap else -1

        # cập nhật NAV
        ret = signal * row.log_ret
        self.cash *= np.exp(ret)
        self.nav.append(self.cash)

        # lưu lịch sử
        self.signals.append(signal)
        self.times.append(self.times_index[self.t])

        self.t += 1
        done = self.t >= len(self.df)
        return self._get_state(), ret, done, {}

# --- 4. Tạo env_test và load model đã train ---
env_test = FSRLEnv(test_df, test_pca)
model    = DQN.load("/content/fsrl_crypto_model.zip", env=env_test)

# --- 5. Inference trên tập test ---
obs     = env_test.reset()
done    = False
actions = []
signals = []
navs    = []
times   = []

while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, _, done, _ = env_test.step(action)

    actions.append(int(action))
    signals.append(env_test.signals[-1])
    navs.append(env_test.nav[-1])
    times.append(env_test.times[-1])

# --- 6. Xuất kết quả ra Excel ---
strategy_map = {
    0: 'RSI-Strategy',
    1: 'SMA-Crossover',
    2: 'Bollinger-Reversion',
    3: 'Momentum-20d',
    4: 'VWAP-Reversion'
}

df_out = pd.DataFrame({
    'time':     times,
    'action':   actions,
    'signal':   signals,
    'strategy': [strategy_map[a] for a in actions],
    'nav':      navs
})
df_out.to_excel('test_policy_signals_with_signalbegin 23-05-2025.xlsx', index=False)


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


