In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
from stable_baselines3 import SAC
import gym
from gym import spaces
import matplotlib.pyplot as plt
import random

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

def load_and_preprocess(file_name):
    data = pd.read_csv(file_name)
    data["Date"] = pd.to_datetime(data["Date"])
    data = data.sort_values(by="Date")
    numeric_data = data.drop(columns=["Date"])
    numeric_data = numeric_data[['Close'] + [c for c in numeric_data.columns if c != 'Close']]
    scaler = MinMaxScaler()
    data[numeric_data.columns] = scaler.fit_transform(numeric_data)
    return data, scaler

def create_sequences(data, sequence_length=30):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length, 0])
    return np.array(X), np.array(y)

class BiLSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, dropout):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers,
                            batch_first=True, dropout=dropout, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])

def compute_next_day_predictions(data_array, seq_len, model, device):
    model.eval()
    T = len(data_array)
    preds_full = np.zeros(T, dtype=np.float32)

    seqs = []
    idxs = []
    for i in range(T - seq_len):
        seqs.append(data_array[i:i+seq_len])
        idxs.append(i + seq_len - 1)

    X = torch.tensor(np.array(seqs), dtype=torch.float32).to(device)
    with torch.no_grad():
        preds = model(X).cpu().numpy().squeeze()

    for k, i in enumerate(idxs):
        preds_full[i] = preds[k]

    first_nonzero = np.nonzero(preds_full)[0]
    if len(first_nonzero) > 0:
        preds_full[:first_nonzero[0]] = preds_full[first_nonzero[0]]
    return preds_full

class TradingEnv(gym.Env):
    def __init__(self, data, sequence_length=30, fee=0.001):
        super().__init__()
        self.data = data
        self.seq_len = sequence_length
        self.fee = fee
        self.total_steps = len(data) - sequence_length
        self.reset()
        self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(sequence_length, data.shape[1]),
            dtype=np.float32
        )

    def reset(self, seed=None, options=None):
        self.step_idx = 0
        self.last_action = 0.0
        self.portfolio = 1.0
        return self._obs(), {}

    def _obs(self):
        return self.data[self.step_idx:self.step_idx + self.seq_len]

    def step(self, action):
        self.step_idx += 1
        done = self.step_idx >= self.total_steps
        reward = 0.0
        if not done:
            p_now = self.data[self.step_idx, 0]
            p_prev = self.data[self.step_idx - 1, 0]
            fee = self.fee * abs(action[0] - self.last_action)
            reward = action[0] * (p_now - p_prev) - fee
            self.last_action = action[0]
        self.portfolio += reward
        obs = self._obs() if not done else self.data[-self.seq_len:]
        return obs, reward, done, False, {"portfolio_value": self.portfolio}

def evaluate(env, model):
    obs, _ = env.reset()
    done = False
    values = [1.0]
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, _, done, _, info = env.step(action)
        values.append(info["portfolio_value"])
    return np.array(values)

def metrics(vals):
    r = np.diff(vals) / vals[:-1]
    cum = vals[-1] / vals[0] - 1
    sharpe = np.mean(r) / np.std(r) if np.std(r) > 0 else 0
    neg = r[r < 0]
    sortino = np.mean(r) / np.std(neg) if len(neg) > 0 and np.std(neg) > 0 else 0
    dd = np.min(vals / np.maximum.accumulate(vals) - 1)
    return cum, sharpe, sortino, dd

if __name__ == "__main__":
    seq_len = 30
    data, _ = load_and_preprocess("BTC.csv")

    train = data[(data["Date"].dt.year >= 2017) & (data["Date"].dt.year <= 2022)]
    test = data[data["Date"].dt.year == 2023]

    train_np = train.drop(columns=["Date"]).values
    test_np = test.drop(columns=["Date"]).values

    X_train, y_train = create_sequences(train_np, seq_len)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    bilstm = BiLSTMModel(X_train.shape[2], 224, 1, 0.14).to(device)
    opt = torch.optim.Adam(bilstm.parameters(), lr=0.0019)
    loss_fn = nn.MSELoss()

    loader = DataLoader(
        TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                      torch.tensor(y_train, dtype=torch.float32)),
        batch_size=32, shuffle=True
    )

    for _ in range(50):
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            loss_fn(bilstm(xb).squeeze(), yb).backward()
            opt.step()

    preds = compute_next_day_predictions(test_np, seq_len, bilstm, device)
    hybrid_test = np.hstack([test_np, preds.reshape(-1, 1)])

    env_hybrid = TradingEnv(hybrid_test, seq_len)
    env_simple = TradingEnv(test_np, seq_len)

    sac_hybrid = SAC("MlpPolicy", env_hybrid, verbose=1, seed=SEED)
    sac_simple = SAC("MlpPolicy", env_simple, verbose=1, seed=SEED)

    sac_hybrid.learn(50000)
    sac_simple.learn(50000)

    v_h = evaluate(env_hybrid, sac_hybrid)
    v_s = evaluate(env_simple, sac_simple)

    print(pd.DataFrame({
        "Method": ["Bi-LSTM SAC", "Simple SAC"],
        "Cumulative Return": [metrics(v_h)[0], metrics(v_s)[0]],
        "Sharpe": [metrics(v_h)[1], metrics(v_s)[1]],
        "Sortino": [metrics(v_h)[2], metrics(v_s)[2]],
        "Max Drawdown": [metrics(v_h)[3], metrics(v_s)[3]]
    }))

    dates = pd.date_range("2023-01-01", periods=len(v_h)-1)
    plt.plot(dates, np.cumprod(1 + np.diff(v_h)/v_h[:-1]), label="Bi-LSTM SAC")
    plt.plot(dates, np.cumprod(1 + np.diff(v_s)/v_s[:-1]), label="Simple SAC")
    plt.legend()
    plt.grid()
    plt.show()
