# Import Packages

In [None]:
import pandas as pd
import numpy as np
import sys
import os
from ray import tune
import torch

# Add parent directory to path for imports
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))
sys.path.append(parent_dir)

# Import our custom environment from utils
from utils.tune import tune_dqn_with_ray, tune_ppo_with_ray
from utils.eval import best_dqn_rollout, best_ppo_rollout
from utils.visualize import pnl_plot

print("✅ All imports successful!")
print(f"PyTorch version: {torch.__version__}")
print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")
print(f"Parent directory added: {parent_dir}")

# Loading Data

In [None]:
# Load and prepare data
data_path = "../data/features.csv"
data = pd.read_csv(data_path)

print(f"Data shape: {data.shape}")
print(f"Data columns: {data.columns.tolist()}")
print("\nFirst few rows:")
print(data.head())

ESSENTIAL_FEATURES = ['closed_price', 'scaled_volume_WETH', 'scaled_volume_USDC']
MICROSTRUCTURE_FEATURES = ['volume_imbalance', 'scaled_total_volume', 'n_swap', 'interval_swap', 'liquidity', 'tick', 'R_ewma', 'volatility_ewm']
TECHNICAL_FEATURES = ['ma24', 'ma168', 'bb_upper', 'bb_middle', 'bb_lower', 'adxr', 'dx', 'n_mb']
REGIME_LABEL_BASED_ON_MICROSTURCTURE_FEATURES = ["km_label_micro_0", "km_label_micro_1", "km_label_micro_2"]
REGIME_LABEL_BASED_ON_ALL_FEATURES = ["km_label_all_0", "km_label_all_1", "km_label_all_2"]

print(f"\nEssentail Features: {ESSENTIAL_FEATURES}")
print(f"Microstructure Features: {MICROSTRUCTURE_FEATURES}")
print(f"Technical Features: {TECHNICAL_FEATURES}")

In [None]:
def one_hot_encode_label_columns(df: pd.DataFrame, label_column_names: list[str]) -> pd.DataFrame:
    """
    One-hot encode each column in label_column_names and append the dummy columns.
    - Keeps the original label columns.
    - Output columns are named as <col>_<class>.
    - Uses pandas.get_dummies (no sklearn dependency).
    """
    
    out = df.copy()
    for col in label_column_names:
        if col not in out.columns:
            raise KeyError(f"Column '{col}' not found in DataFrame.")
        # If you want a column for NaN as well, set dummy_na=True
        dummies = pd.get_dummies(out[col], prefix=col, prefix_sep="_", dtype="int8", dummy_na=False)
        out = pd.concat([out, dummies], axis=1)
    return out

# Example:
data = one_hot_encode_label_columns(data, ["km_label_micro", "km_label_all"])

data

## 1. Essential Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

## Train

In [None]:
TRAIN_ENV_KW = {
    "init_value": 100_00.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0]),
    "start_index": 0,
}

num_samples=10

param_space_dqn = {
    "net_dims":   tune.choice([(64,64), (128,128), (256,256)]),
    "lr":         tune.loguniform(1e-5, 5e-4),
    "batch_size": tune.choice([64, 128, 256]),
    "gamma":      tune.uniform(0.96, 0.995),
    "tau":        tune.loguniform(1e-4, 1e-2),
    "horizon_len": tune.choice([128, 256, 512, 1024]),
    "eps":        tune.uniform(0.30, 0.80),
    "buffer":     tune.qlograndint(int(2e5), int(1e6), int(1e4)),
    "repeat":     tune.qrandint(1, 2, 1),
    "use_per":    tune.choice([False, True]),
    "passes":       tune.choice([16,32,64]),
    "episode_len": tune.choice([int(train_data.shape[0])]),
    "seed":       tune.randint(0, 1_000_000),
}

param_space_ppo = {
    "net_dims":     tune.choice([(64, 64), (128, 128), (256, 128)]),
    "lr":           tune.loguniform(1e-5, 3e-4),
    "batch_size":   tune.choice([1024, 2048, 4096]),
    "gamma":        tune.uniform(0.96, 0.999),
    "episode_len":  tune.choice([int(train_data.shape[0])]),
    "horizon_len":  tune.sample_from(lambda spec: spec.config["episode_len"]),
    "repeat":       tune.qrandint(2, 6, 1),
    "gae_lambda":   tune.uniform(0.90, 0.98),
    "clip_ratio":   tune.uniform(0.15, 0.35),
    "entropy_coef": tune.loguniform(1e-3, 5e-2),
    "vf_coef":      tune.uniform(0.3, 0.7),
    "passes":       tune.choice([16,32,64]),
    "seed":         tune.randint(0, 1_000_000),
}

storage_root="./ray_results"

In [6]:
experiment_name="dqn_univ3_search_1"

results, best = tune_dqn_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  param_space=param_space_dqn,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

2025-09-19 01:12:12,065	INFO tune.py:1041 -- Total run time: 3378.56 seconds (3377.13 seconds for the tuning loop).
Resume experiment with: Tuner.restore(path="/Users/seitahuang/Desktop/AMM_RL/experiments/ray_results/dqn_univ3_search_1", trainable=...)


Best final_equity: 10000.0
Best config: {'net_dims': (64, 64), 'lr': 2.9808598161803766e-05, 'batch_size': 64, 'gamma': 0.9901619534833068, 'tau': 0.0015189073695423617, 'horizon_len': 1024, 'eps': 0.5799339572196087, 'buffer': 390000, 'repeat': 1, 'use_per': False, 'passes': 32, 'episode_len': 20792, 'seed': 69494}
Best logdir: /Users/seitahuang/Desktop/AMM_RL/experiments/ray_results/dqn_univ3_search_1/trainable_ray_c0016_00008_8_batch_size=64,buffer=390000,episode_len=20792,eps=0.5799,gamma=0.9902,horizon_len=1024,lr=0.0000,net_d_2025-09-19_00-15-53


In [None]:
experiment_name="ppo_univ3_search_1"

results, best = tune_ppo_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  param_space=param_space_ppo,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

## Evaluate

In [None]:
ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(data.shape[0]),
    "start_index": 0,
}

result_root="./result"
action_dim=5


In [None]:
experiment_dir = "./ray_results/dqn_univ3_search_1"
result_root = "./result_dqn"

res_dqn_1 = best_dqn_rollout(
    data=numeric_data,
    time_data=time_data,
    ENV_KW=ENV_KW,
    experiment_dir=experiment_dir,
    result_root=result_root,
)

result_dqn_1 = res_dqn_1["result"]["dataframe"]

In [None]:
experiment_dir = "./ray_results/ppo_univ3_search_1"
result_root = "./result_ppo"

res_ppo_1 = best_ppo_rollout(
    data=numeric_data,
    time_data=time_data,
    ENV_KW=ENV_KW,
    experiment_dir=experiment_dir,
    result_root=result_root,
)

result_ppo_1 = res_ppo_1["result"]["dataframe"]

## Visualization

In [None]:
t = result_dqn_1["time"]
y = result_dqn_1["after_equity"]
a = result_dqn_1["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


In [None]:
t = result_ppo_1["time"]
y = result_ppo_1["after_equity"]
a = result_ppo_1["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


## 2. Essential Features and Microstructure Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+MICROSTRUCTURE_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

## Train

In [None]:
TRAIN_ENV_KW = {
    "init_value": 100_00.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0]),
    "start_index": 0,
}

num_samples=10

param_space_dqn = {
    "net_dims":   tune.choice([(64,64), (128,128), (256,256)]),
    "lr":         tune.loguniform(1e-5, 5e-4),
    "batch_size": tune.choice([64, 128, 256]),
    "gamma":      tune.uniform(0.96, 0.995),
    "tau":        tune.loguniform(1e-4, 1e-2),
    "horizon_len": tune.choice([128, 256, 512, 1024]),
    "eps":        tune.uniform(0.30, 0.80),
    "buffer":     tune.qlograndint(int(2e5), int(1e6), int(1e4)),
    "repeat":     tune.qrandint(1, 2, 1),
    "use_per":    tune.choice([False, True]),
    "passes":       tune.choice([16,32,64]),
    "episode_len": tune.choice([int(train_data.shape[0])]),
    "seed":       tune.randint(0, 1_000_000),
}

param_space_ppo = {
    "net_dims":     tune.choice([(64, 64), (128, 128), (256, 128)]),
    "lr":           tune.loguniform(1e-5, 3e-4),
    "batch_size":   tune.choice([1024, 2048, 4096]),
    "gamma":        tune.uniform(0.96, 0.999),
    "episode_len":  tune.choice([int(train_data.shape[0])]),
    "horizon_len":  tune.sample_from(lambda spec: spec.config["episode_len"]),
    "repeat":       tune.qrandint(2, 6, 1),
    "gae_lambda":   tune.uniform(0.90, 0.98),
    "clip_ratio":   tune.uniform(0.15, 0.35),
    "entropy_coef": tune.loguniform(1e-3, 5e-2),
    "vf_coef":      tune.uniform(0.3, 0.7),
    "passes":       tune.choice([16,32,64]),
    "seed":         tune.randint(0, 1_000_000),
}

storage_root="./ray_results"

In [None]:
experiment_name="dqn_univ3_search_2"

results, best = tune_dqn_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  param_space=param_space_dqn,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

In [None]:
experiment_name="ppo_univ3_search_2"

results, best = tune_ppo_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  param_space=param_space_ppo,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

## Evaluate

In [None]:
ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(data.shape[0]),
    "start_index": 0,
}

result_root="./result"
action_dim=5


In [None]:
experiment_dir = "./ray_results/dqn_univ3_search_2"
result_root = "./result_dqn"

res_dqn_2 = best_dqn_rollout(
    data=numeric_data,
    time_data=time_data,
    ENV_KW=ENV_KW,
    experiment_dir=experiment_dir,
    result_root=result_root,
)

result_dqn_2= res_dqn_2["result"]["dataframe"]

In [None]:
experiment_dir = "./ray_results/ppo_univ3_search_2"
result_root = "./result_ppo"

res_ppo_2 = best_ppo_rollout(
    data=numeric_data,
    time_data=time_data,
    ENV_KW=ENV_KW,
    experiment_dir=experiment_dir,
    result_root=result_root,
)

result_ppo_2 = res_ppo_2["result"]["dataframe"]

## Visualization

In [None]:
t = result_dqn_2["time"]
y = result_dqn_2["after_equity"]
a = result_dqn_2["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


In [None]:
t = result_ppo_2["time"]
y = result_ppo_2["after_equity"]
a = result_ppo_2["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


## 3. Essential Features, Microstructure Features and Technical Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+MICROSTRUCTURE_FEATURES+TECHNICAL_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

## Train

In [None]:
TRAIN_ENV_KW = {
    "init_value": 100_00.0,
    "liquidation_value": 0.0,
    "gas_cost": 0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0]),
    "start_index": 0,
}

num_samples=10

param_space_dqn = {
    "net_dims":   tune.choice([(64,64), (128,128), (256,256)]),
    "lr":         tune.loguniform(1e-5, 5e-4),
    "batch_size": tune.choice([64, 128, 256]),
    "gamma":      tune.uniform(0.96, 0.995),
    "tau":        tune.loguniform(1e-4, 1e-2),
    "horizon_len": tune.choice([128, 256, 512, 1024]),
    "eps":        tune.uniform(0.30, 0.80),
    "buffer":     tune.qlograndint(int(2e5), int(1e6), int(1e4)),
    "repeat":     tune.qrandint(1, 2, 1),
    "use_per":    tune.choice([False, True]),
    "passes":       tune.choice([2,4]),
    "episode_len": tune.choice([int(train_data.shape[0])]),
    "seed":       tune.randint(0, 1_000_000),
}

param_space_ppo = {
    "net_dims":     tune.choice([(64, 64), (128, 128), (256, 128)]),
    "lr":           tune.loguniform(1e-5, 3e-4),
    "batch_size":   tune.choice([1024, 2048, 4096]),
    "gamma":        tune.uniform(0.96, 0.999),
    "episode_len":  tune.choice([int(train_data.shape[0])]),
    "horizon_len":  tune.sample_from(lambda spec: spec.config["episode_len"]),
    "repeat":       tune.qrandint(2, 6, 1),
    "gae_lambda":   tune.uniform(0.90, 0.98),
    "clip_ratio":   tune.uniform(0.15, 0.35),
    "entropy_coef": tune.loguniform(1e-3, 5e-2),
    "vf_coef":      tune.uniform(0.3, 0.7),
    "passes":       tune.choice([2, 4]),
    "seed":         tune.randint(0, 1_000_000),
}

storage_root="./ray_results"

In [None]:
experiment_name="dqn_univ3_search_3"

results, best = tune_dqn_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  param_space=param_space_dqn,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

In [None]:
experiment_name="ppo_univ3_search_3"

results, best = tune_ppo_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  param_space=param_space_ppo,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

## Evaluate

In [None]:
ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 0.0,
    "fee_tier": 0.0005,
    "max_steps": int(data.shape[0]),
    "start_index": 0,
}

result_root="./result"
action_dim=5

In [None]:
experiment_dir = "./ray_results/dqn_univ3_search_3"
result_root = "./result_dqn"

res_dqn_3 = best_dqn_rollout(
    data=numeric_data,
    time_data=time_data,
    ENV_KW=ENV_KW,
    experiment_dir=experiment_dir,
    result_root=result_root,
)

result_dqn_3 = res_dqn_3["result"]["dataframe"]

In [None]:
experiment_dir = "./ray_results/ppo_univ3_search_3"
result_root = "./result_ppo"

res_ppo_3 = best_ppo_rollout(
    data=numeric_data,
    time_data=time_data,
    ENV_KW=ENV_KW,
    experiment_dir=experiment_dir,
    result_root=result_root,
)

result_ppo_3 = res_ppo_3["result"]["dataframe"]

## Visualization

In [None]:
t = result_dqn_3["time"]
y = result_dqn_3["after_equity"]
a = result_dqn_3["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


In [None]:
t = result_ppo_3["time"]
y = result_ppo_3["after_equity"]
a = result_ppo_3["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


## 4. Essential Features and Regime Label Based on Microstructure Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+REGIME_LABEL_BASED_ON_MICROSTURCTURE_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

## Train

In [None]:
TRAIN_ENV_KW = {
    "init_value": 100_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0]),
    "start_index": 0,
}

num_samples=50

param_space_dqn = {
    "net_dims":   tune.choice([(64,64), (128,128), (256,256)]),
    "lr":         tune.loguniform(1e-5, 5e-4),
    "batch_size": tune.choice([64, 128, 256]),
    "gamma":      tune.uniform(0.96, 0.995),
    "tau":        tune.loguniform(1e-4, 1e-2),
    "horizon_len": tune.choice([128, 256, 512, 1024]),
    "eps":        tune.uniform(0.30, 0.80),
    "buffer":     tune.qlograndint(int(2e5), int(1e6), int(1e4)),
    "repeat":     tune.qrandint(1, 2, 1),
    "use_per":    tune.choice([False, True]),
    "passes":       tune.choice([2]),
    "episode_len": tune.choice([int(train_data.shape[0])]),
    "seed":       tune.randint(0, 1_000_000),
}

param_space_ppo = {
    "net_dims":     tune.choice([(64, 64), (128, 128), (256, 128)]),
    "lr":           tune.loguniform(1e-5, 3e-4),
    "batch_size":   tune.choice([128, 256, 512]),
    "gamma":        tune.uniform(0.96, 0.999),
    "episode_len":  tune.choice([int(train_data.shape[0])]),
    "horizon_len":  tune.sample_from(lambda spec: spec.config["episode_len"]),
    "repeat":       tune.qrandint(2, 6, 1),
    "gae_lambda":   tune.uniform(0.90, 0.98),
    "clip_ratio":   tune.uniform(0.15, 0.35),
    "entropy_coef": tune.loguniform(1e-3, 5e-2),
    "vf_coef":      tune.uniform(0.3, 0.7),
    "passes":       tune.choice([128,256,512]),
    "seed":         tune.randint(0, 1_000_000),
}

storage_root="./ray_results"

In [None]:
experiment_name="dqn_univ3_search_4"

results, best = tune_dqn_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  param_space=param_space_dqn,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

In [None]:
experiment_name="ppo_univ3_search_4"

results, best = tune_ppo_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

## Evaluate

In [None]:
ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 0.0,
    "fee_tier": 0.0005,
    "max_steps": int(data.shape[0]),
    "start_index": 0,
}

result_root="./result"
action_dim=5

In [None]:
experiment_dir = "./ray_results/dqn_univ3_search_4"
result_root = "./result_dqn"

res_dqn_4 = best_dqn_rollout(
    data=numeric_data,
    time_data=time_data,
    ENV_KW=ENV_KW,
    experiment_dir=experiment_dir,
    result_root=result_root,
)

result_dqn_4 = res_dqn_4["result"]["dataframe"]

In [None]:
experiment_dir = "./ray_results/ppo_univ3_search_4"
result_root = "./result_ppo"

res_ppo_4 = best_ppo_rollout(
    data=numeric_data,
    time_data=time_data,
    ENV_KW=ENV_KW,
    experiment_dir=experiment_dir,
    result_root=result_root,
)

result_ppo_4 = res_ppo_4["result"]["dataframe"]

## Visualization

In [None]:
t = result_dqn_4["time"]
y = result_dqn_4["after_equity"]
a = result_dqn_4["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


In [None]:
t = result_ppo_4["time"]
y = result_ppo_4["after_equity"]
a = result_ppo_4["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


## 5. Essential Features and Regime Label Based on All Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+REGIME_LABEL_BASED_ON_ALL_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

## Train

In [None]:
TRAIN_ENV_KW = {
    "init_value": 100_00.0,
    "liquidation_value": 0.0,
    "gas_cost": 0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0]),
    "start_index": 0,
}

num_samples=50

param_space_dqn = {
    "net_dims":   tune.choice([(64,64), (128,128), (256,256)]),
    "lr":         tune.loguniform(1e-5, 5e-4),
    "batch_size": tune.choice([64, 128, 256]),
    "gamma":      tune.uniform(0.96, 0.995),
    "tau":        tune.loguniform(1e-4, 1e-2),
    "horizon_len": tune.choice([128, 256, 512, 1024]),
    "eps":        tune.uniform(0.30, 0.80),
    "buffer":     tune.qlograndint(int(2e5), int(1e6), int(1e4)),
    "repeat":     tune.qrandint(1, 2, 1),
    "use_per":    tune.choice([False, True]),
    "passes":       tune.choice([16,32,64]),
    "episode_len": tune.choice([int(train_data.shape[0])]),
    "seed":       tune.randint(0, 1_000_000),
}

param_space_ppo = {
    "net_dims":     tune.choice([(64, 64), (128, 128), (256, 128)]),
    "lr":           tune.loguniform(1e-5, 3e-4),
    "batch_size":   tune.choice([1024, 2048, 4096]),
    "gamma":        tune.uniform(0.96, 0.999),
    "episode_len":  tune.choice([int(train_data.shape[0])]),
    "horizon_len":  tune.sample_from(lambda spec: spec.config["episode_len"]),
    "repeat":       tune.qrandint(2, 6, 1),
    "gae_lambda":   tune.uniform(0.90, 0.98),
    "clip_ratio":   tune.uniform(0.15, 0.35),
    "entropy_coef": tune.loguniform(1e-3, 5e-2),
    "vf_coef":      tune.uniform(0.3, 0.7),
    "passes":       tune.choice([16,32,64]),
    "seed":         tune.randint(0, 1_000_000),
}

storage_root="./ray_results"

In [None]:
experiment_name="dqn_univ3_search_5"

results, best = tune_dqn_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  param_space=param_space_dqn,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

In [None]:
experiment_name="ppo_univ3_search_5"

results, best = tune_ppo_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

## Evaluate

In [None]:
ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(data.shape[0]),
    "start_index": 0,
}

result_root="./result"
action_dim=5

In [None]:
experiment_dir = "./ray_results/dqn_univ3_search_5"
result_root = "./result_dqn"

res_dqn_5 = best_dqn_rollout(
    data=numeric_data,
    time_data=time_data,
    ENV_KW=ENV_KW,
    experiment_dir=experiment_dir,
    result_root=result_root,
)

result_dqn_5 = res_dqn_5["result"]["dataframe"]

In [None]:
experiment_dir = "./ray_results/ppo_univ3_search_5"
result_root = "./result_ppo"

res_ppo_5= best_ppo_rollout(
    data=numeric_data,
    time_data=time_data,
    ENV_KW=ENV_KW,
    experiment_dir=experiment_dir,
    result_root=result_root,
)

result_ppo_5 = res_ppo_5["result"]["dataframe"]

## Visualization

In [None]:
t = result_dqn_5["time"]
y = result_dqn_5["after_equity"]
a = result_dqn_5["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


In [None]:
t = result_ppo_5["time"]
y = result_ppo_5["after_equity"]
a = result_ppo_5["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()
