# Import Packages

In [None]:
import pandas as pd
import numpy as np
import sys
import os
import torch

# Add parent directory to path for imports
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))
sys.path.append(parent_dir)

# Import our custom environment from utils
from utils.tune import tune_dqn_with_ray, tune_ppo_with_ray
from utils.eval import best_dqn_rollout, best_ppo_rollout
from utils.visualize import pnl_plot

print("✅ All imports successful!")
print(f"PyTorch version: {torch.__version__}")
print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")
print(f"Parent directory added: {parent_dir}")

# Loading Data

In [None]:
# Load and prepare data
data_path = "../data/features.csv"
data = pd.read_csv(data_path)

print(f"Data shape: {data.shape}")
print(f"Data columns: {data.columns.tolist()}")
print("\nFirst few rows:")
print(data.head())

ESSENTIAL_FEATURES = ['closed_price', 'scaled_volume_WETH', 'scaled_volume_USDC']
MICROSTRUCTURE_FEATURES = ['volume_imbalance', 'scaled_total_volume', 'n_swap', 'interval_swap', 'liquidity', 'tick', 'R_ewma', 'volatility_ewm']
TECHNICAL_FEATURES = ['ma24', 'ma168', 'bb_upper', 'bb_middle', 'bb_lower', 'adxr', 'dx', 'n_mb']
REGIME_LABEL_BASED_ON_MICROSTURCTURE_FEATURES = ["km_label_micro_0", "km_label_micro_1", "km_label_micro_2"]
REGIME_LABEL_BASED_ON_ALL_FEATURES = ["km_label_all_0", "km_label_all_1", "km_label_all_2"]

print(f"\nEssentail Features: {ESSENTIAL_FEATURES}")
print(f"Microstructure Features: {MICROSTRUCTURE_FEATURES}")
print(f"Technical Features: {TECHNICAL_FEATURES}")

In [None]:
def one_hot_encode_label_columns(df: pd.DataFrame, label_column_names: list[str]) -> pd.DataFrame:
    """
    One-hot encode each column in label_column_names and append the dummy columns.
    - Keeps the original label columns.
    - Output columns are named as <col>_<class>.
    - Uses pandas.get_dummies (no sklearn dependency).
    """
    
    out = df.copy()
    for col in label_column_names:
        if col not in out.columns:
            raise KeyError(f"Column '{col}' not found in DataFrame.")
        # If you want a column for NaN as well, set dummy_na=True
        dummies = pd.get_dummies(out[col], prefix=col, prefix_sep="_", dtype="int8", dummy_na=False)
        out = pd.concat([out, dummies], axis=1)
    return out

# Example:
data = one_hot_encode_label_columns(data, ["km_label_micro", "km_label_all"])

data

## 1. Essential Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

## Train

In [None]:
TRAIN_ENV_KW = {
    "init_value": 100_00.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0] - 1)*10000,
    "start_index": 0,
}


num_samples=50
storage_root="./ray_results"


In [None]:
experiment_name="dqn_univ3_search_1"

results, best = tune_dqn_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

In [None]:
experiment_name="ppo_univ3_search_1"

results, best = tune_ppo_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

## Evaluate

In [None]:
TRAIN_ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0] - 1),
    "start_index": 0,
}

TEST_ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(test_data.shape[0] - 1),
    "start_index": 0,
}

result_root="./result"
action_dim=5


In [None]:
experiment_dir="./ray_results/dqn_univ3_search_1"

res1_dqn = best_dqn_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root=result_root,
    experiment_dir=experiment_dir,   # or provide a specific path to your dqn_univ3_search
    action_dim=action_dim,
)

train_result1_dqn = res1_dqn["train"]["dataframe"]
test_result1_dqn = res1_dqn["test"]["dataframe"]
all_result1_dqn = pd.concat([train_result1_dqn, test_result1_dqn])
all_result1_dqn

In [None]:
experiment_dir="./ray_results/ppo_univ3_search_1"

res1_ppo = best_ppo_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root=result_root,
    experiment_dir=experiment_dir, 
    action_dim=action_dim
)

train_result1_ppo = res1_ppo["train"]["dataframe"]
test_result1_ppo = res1_ppo["test"]["dataframe"]
all_result1_ppo = pd.concat([train_result1_ppo, test_result1_ppo])
all_result1_ppo

## Visualization

In [None]:
t = all_result1_dqn["time"]
y = all_result1_dqn["step_reward"].cumsum()
a = all_result1_dqn["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


In [None]:
t = all_result1_ppo["time"]
y = all_result1_ppo["step_reward"].cumsum()
a = all_result1_ppo["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()

## 2. Essential Features and Microstructure Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+MICROSTRUCTURE_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

## Train

In [None]:
TRAIN_ENV_KW = {
    "init_value": 100_00.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0] - 1)*10000,
    "start_index": 0,
}


num_samples=50
storage_root="./ray_results"


In [None]:
experiment_name="dqn_univ3_search_2"

results, best = tune_dqn_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

In [None]:
experiment_name="ppo_univ3_search_2"

results, best = tune_ppo_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

## Evaluate

In [None]:
TRAIN_ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0] - 1),
    "start_index": 0,
}

TEST_ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(test_data.shape[0] - 1),
    "start_index": 0,
}

result_root="./result"
action_dim=5


In [None]:
experiment_dir="./ray_results/dqn_univ3_search_2"

res2_dqn = best_dqn_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root=result_root,
    experiment_dir=experiment_dir,   # or provide a specific path to your dqn_univ3_search
    action_dim=action_dim,
)

train_result2_dqn = res2_dqn["train"]["dataframe"]
test_result2_dqn = res2_dqn["test"]["dataframe"]
all_result2_dqn = pd.concat([train_result2_dqn, test_result2_dqn])
all_result2_dqn

In [None]:
experiment_dir="./ray_results/ppo_univ3_search_2"

res2_ppo = best_ppo_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root=result_root,
    experiment_dir=experiment_dir, 
    action_dim=action_dim
)

train_result2_ppo = res2_ppo["train"]["dataframe"]
test_result2_ppo = res2_ppo["test"]["dataframe"]
all_result2_ppo = pd.concat([train_result2_ppo, test_result2_ppo])
all_result2_ppo

## Visualization

In [None]:
t = all_result2_dqn["time"]
y = all_result2_dqn["step_reward"].cumsum()
a = all_result2_dqn["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


In [None]:
t = all_result2_ppo["time"]
y = all_result2_ppo["step_reward"].cumsum()
a = all_result2_ppo["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()

## 3. Essential Features, Microstructure Features and Technical Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+MICROSTRUCTURE_FEATURES+TECHNICAL_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

## Train

In [None]:
TRAIN_ENV_KW = {
    "init_value": 100_00.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0] - 1)*10000,
    "start_index": 0,
}


num_samples=50
storage_root="./ray_results"


In [None]:
experiment_name="dqn_univ3_search_3"

results, best = tune_dqn_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

In [None]:
experiment_name="ppo_univ3_search_3"

results, best = tune_ppo_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

## Evaluate

In [None]:
TRAIN_ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0] - 1),
    "start_index": 0,
}

TEST_ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(test_data.shape[0] - 1),
    "start_index": 0,
}

result_root="./result"
action_dim=5


In [None]:
experiment_dir="./ray_results/dqn_univ3_search_3"

res3_dqn = best_dqn_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root=result_root,
    experiment_dir=experiment_dir,   # or provide a specific path to your dqn_univ3_search
    action_dim=action_dim,
)

train_result3_dqn = res3_dqn["train"]["dataframe"]
test_result3_dqn = res3_dqn["test"]["dataframe"]
all_result3_dqn = pd.concat([train_result3_dqn, test_result3_dqn])
all_result3_dqn

In [None]:
experiment_dir="./ray_results/ppo_univ3_search_3"

res3_ppo = best_ppo_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root=result_root,
    experiment_dir=experiment_dir, 
    action_dim=action_dim
)

train_result3_ppo = res3_ppo["train"]["dataframe"]
test_result3_ppo = res3_ppo["test"]["dataframe"]
all_result3_ppo = pd.concat([train_result3_ppo, test_result3_ppo])
all_result3_ppo

## Visualization

In [None]:
t = all_result3_dqn["time"]
y = all_result3_dqn["step_reward"].cumsum()
a = all_result3_dqn["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


In [None]:
t = all_result3_ppo["time"]
y = all_result3_ppo["step_reward"].cumsum()
a = all_result3_ppo["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()

## 4. Essential Features and Regime Label Based on Microstructure Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+REGIME_LABEL_BASED_ON_MICROSTURCTURE_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

## Train

In [None]:
TRAIN_ENV_KW = {
    "init_value": 100_00.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0] - 1)*10000,
    "start_index": 0,
}

num_samples=50
storage_root="./ray_results"


In [None]:
experiment_name="dqn_univ3_search_4"

results, best = tune_dqn_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

In [None]:
experiment_name="ppo_univ3_search_4"

results, best = tune_ppo_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

## Evaluate

In [None]:
TRAIN_ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0] - 1),
    "start_index": 0,
}

TEST_ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(test_data.shape[0] - 1),
    "start_index": 0,
}

result_root="./result"
action_dim=5


In [None]:
experiment_dir="./ray_results/dqn_univ3_search_4"

res4_dqn = best_dqn_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root=result_root,
    experiment_dir=experiment_dir,   # or provide a specific path to your dqn_univ3_search
    action_dim=action_dim,
)

train_result4_dqn = res4_dqn["train"]["dataframe"]
test_result4_dqn = res4_dqn["test"]["dataframe"]
all_result4_dqn = pd.concat([train_result4_dqn, test_result4_dqn])
all_result4_dqn

In [None]:
experiment_dir="./ray_results/ppo_univ3_search_4"

res4_ppo = best_ppo_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root=result_root,
    experiment_dir=experiment_dir, 
    action_dim=action_dim
)

train_result4_ppo = res4_ppo["train"]["dataframe"]
test_result4_ppo = res4_ppo["test"]["dataframe"]
all_result4_ppo = pd.concat([train_result4_ppo, test_result4_ppo])
all_result4_ppo

## Visualization

In [None]:
t = all_result4_dqn["time"]
y = all_result4_dqn["step_reward"].cumsum()
a = all_result4_dqn["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


In [None]:
t = all_result4_ppo["time"]
y = all_result4_ppo["step_reward"].cumsum()
a = all_result4_ppo["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()

## 5. Essential Features and Regime Label Based on All Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+REGIME_LABEL_BASED_ON_ALL_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

## Train

In [None]:
TRAIN_ENV_KW = {
    "init_value": 100_00.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0] - 1)*10000,
    "start_index": 0,
}

num_samples=50
storage_root="./ray_results"


In [None]:
experiment_name="dqn_univ3_search_5"

results, best = tune_dqn_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

In [None]:
experiment_name="ppo_univ3_search_5"

results, best = tune_ppo_with_ray(train_data, train_time_data, TRAIN_ENV_KW,
                                  num_samples=num_samples,
                                  experiment_name=experiment_name,
                                  storage_root=storage_root)

## Evaluate

In [None]:
TRAIN_ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(train_data.shape[0] - 1),
    "start_index": 0,
}

TEST_ENV_KW = {
    "init_value": 10_000.0,
    "liquidation_value": 0.0,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": int(test_data.shape[0] - 1),
    "start_index": 0,
}

result_root="./result"
action_dim=5


In [None]:
experiment_dir="./ray_results/dqn_univ3_search_5"

res5_dqn = best_dqn_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root=result_root,
    experiment_dir=experiment_dir,   # or provide a specific path to your dqn_univ3_search
    action_dim=action_dim,
)

train_result5_dqn = res5_dqn["train"]["dataframe"]
test_result5_dqn = res5_dqn["test"]["dataframe"]
all_result5_dqn = pd.concat([train_result5_dqn, test_result5_dqn])
all_result5_dqn

In [None]:
experiment_dir="./ray_results/ppo_univ3_search_5"

res5_ppo = best_ppo_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root=result_root,
    experiment_dir=experiment_dir, 
    action_dim=action_dim
)

train_result5_ppo = res5_ppo["train"]["dataframe"]
test_result5_ppo = res5_ppo["test"]["dataframe"]
all_result5_ppo = pd.concat([train_result5_ppo, test_result5_ppo])
all_result5_ppo

## Visualization

In [None]:
t = all_result5_dqn["time"]
y = all_result5_dqn["step_reward"].cumsum()
a = all_result5_dqn["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


In [None]:
t = all_result5_ppo["time"]
y = all_result5_ppo["step_reward"].cumsum()
a = all_result5_ppo["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()