# Import Packages

In [None]:
import pandas as pd
import numpy as np
import sys
import os
import torch

# Add parent directory to path for imports
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))
sys.path.append(parent_dir)

# Import ElegantRL components
from elegantrl.agents.AgentPPO import AgentPPO
from elegantrl.agents.AgentDQN import AgentDQN
from elegantrl.train.config import Config
from elegantrl.train.run import train_agent

# Import our custom environment from utils
from utils.env import UniswapV3LiquidityEnv
from utils.tune import tune_ppo_with_ray
from utils.eval import best_ppo_rollout
from utils.visualize import pnl_plot

print("✅ All imports successful!")
print(f"PyTorch version: {torch.__version__}")
print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")
print(f"Parent directory added: {parent_dir}")

# Loading Data

In [None]:
# Load and prepare data
data_path = "../data/organized_hourly_data.csv"
data = pd.read_csv(data_path)

print(f"Data shape: {data.shape}")
print(f"Data columns: {data.columns.tolist()}")
print("\nFirst few rows:")
print(data.head())

ESSENTIAL_FEATURES = ['closed_price', 'scaled_volume_WETH', 'scaled_volume_USDC']
MICROSTRUCTURE_FEATURES = ['volume_imbalance', 'scaled_total_volume', 'n_swap', 'interval_swap', 'liquidity', 'tick', 'R_ewma', 'volatility_ewm']
TECHNICAL_FEATURES = ['ma24', 'ma168', 'bb_upper', 'bb_middle', 'bb_lower', 'adxr', 'dx', 'n_mb']
REGIME_LABEL_BASED_ON_MICROSTURCTURE_FEATURES = []
REGIME_LABEL_BASED_ON_ALL_FEATURES = []

print(f"\nEssentail Features: {ESSENTIAL_FEATURES}")
print(f"Microstructure Features: {MICROSTRUCTURE_FEATURES}")
print(f"Technical Features: {TECHNICAL_FEATURES}")

## 1. Essential Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

In [None]:
TRAIN_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": train_data.shape[0],
    "start_index": 0
}

num_samples=20
experiment_name="ppo_univ3_search_1"
storage_root="./ray_results"

In [None]:
results, best = tune_ppo_with_ray(
    train_data=train_data,
    train_time_data=train_time_data,
    TRAIN_ENV_KW=TRAIN_ENV_KW,
    num_samples=num_samples,                             
    experiment_name=experiment_name,          
    storage_root=storage_root,             
    param_space=None                           
)

In [None]:
TRAIN_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": train_data.shape[0],   # your env uses max_steps (not max_step)
    "start_index": 0
}

TEST_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": test_data.shape[0],
    "start_index": 0
}

result_root="./result"
experiment_dir=None   # or provide a specific path to your dqn_univ3_search
action_dim=5


In [None]:
res1 = best_ppo_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root="./result",                
    experiment_dir=None,               
    action_dim=5                          
)

train_result1 = res1["train"]["dataframe"]
test_result1 = res1["test"]["dataframe"]
all_result1 = pd.concat([train_result1, test_result1])
all_result1

In [None]:
t = train_result1["time"]
y = train_result1["cum_reward"]
a = train_result1["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (Train)",
                bottom_title="Action",
                vx=False
                )
fig.show()


In [None]:
t = test_result1["time"]
y = test_result1["cum_reward"]
a = test_result1["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (Test)",
                bottom_title="Action",
                vx=False
                )
fig.show()


In [None]:
t = all_result1["time"]
y = all_result1["step_reward"].cumsum()
a = all_result1["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


## 2. Essential Features and Microstructure Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+MICROSTRUCTURE_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

In [None]:
TRAIN_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": train_data.shape[0],
    "start_index": 0
}

num_samples=20
experiment_name="ppo_univ3_search_2"
storage_root="./ray_results"

In [None]:
results, best = tune_ppo_with_ray(
    train_data=train_data,
    train_time_data=train_time_data,
    TRAIN_ENV_KW=TRAIN_ENV_KW,
    num_samples=num_samples,                             
    experiment_name=experiment_name,          
    storage_root=storage_root,             
    param_space=None                           
)

In [None]:
TRAIN_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": train_data.shape[0],   # your env uses max_steps (not max_step)
    "start_index": 0
}

TEST_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": test_data.shape[0],
    "start_index": 0
}

result_root="./result"
experiment_dir=None   # or provide a specific path to your dqn_univ3_search
action_dim=5


In [None]:
res2 = best_ppo_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root="./result",                
    experiment_dir=None,               
    action_dim=5                          
)

train_result2 = res2["train"]["dataframe"]
test_result2 = res2["test"]["dataframe"]
all_result2 = pd.concat([train_result2, test_result2])
all_result2

In [None]:
t = train_result2["time"]
y = train_result2["cum_reward"]
a = train_result2["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (Train)",
                bottom_title="Action",
                vx=False
                )
fig.show()


In [None]:
t = test_result2["time"]
y = test_result2["cum_reward"]
a = test_result2["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (Test)",
                bottom_title="Action",
                vx=False
                )
fig.show()


In [None]:
t = all_result2["time"]
y = all_result2["step_reward"].cumsum()
a = all_result2["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


## 3. Essential Features, Microstructure Features and Technical Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+MICROSTRUCTURE_FEATURES+TECHNICAL_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

In [None]:
TRAIN_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": train_data.shape[0],
    "start_index": 0
}

num_samples=20
experiment_name="ppo_univ3_search_3"
storage_root="./ray_results"

In [None]:
results, best = tune_ppo_with_ray(
    train_data=train_data,
    train_time_data=train_time_data,
    TRAIN_ENV_KW=TRAIN_ENV_KW,
    num_samples=num_samples,                             
    experiment_name=experiment_name,          
    storage_root=storage_root,             
    param_space=None                           
)

In [None]:
TRAIN_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": train_data.shape[0],   # your env uses max_steps (not max_step)
    "start_index": 0
}

TEST_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": test_data.shape[0],
    "start_index": 0
}

result_root="./result"
experiment_dir=None   # or provide a specific path to your dqn_univ3_search
action_dim=5


In [None]:
res3 = best_ppo_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root="./result",                
    experiment_dir=None,               
    action_dim=5                          
)

train_result3 = res3["train"]["dataframe"]
test_result3 = res3["test"]["dataframe"]
all_result3 = pd.concat([train_result3, test_result3])
all_result3

In [None]:
t = train_result3["time"]
y = train_result3["cum_reward"]
a = train_result3["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (Train)",
                bottom_title="Action",
                vx=False
                )
fig.show()


In [None]:
t = test_result3["time"]
y = test_result3["cum_reward"]
a = test_result3["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (Test)",
                bottom_title="Action",
                vx=False
                )
fig.show()


In [None]:
t = all_result3["time"]
y = all_result3["step_reward"].cumsum()
a = all_result3["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


## 4. Essential Features and Regime Label Based on Microstructure Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+REGIME_LABEL_BASED_ON_MICROSTURCTURE_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

In [None]:
TRAIN_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": train_data.shape[0],
    "start_index": 0
}

num_samples=20
experiment_name="ppo_univ3_search_4"
storage_root="./ray_results"

In [None]:
results, best = tune_ppo_with_ray(
    train_data=train_data,
    train_time_data=train_time_data,
    TRAIN_ENV_KW=TRAIN_ENV_KW,
    num_samples=num_samples,                             
    experiment_name=experiment_name,          
    storage_root=storage_root,             
    param_space=None                           
)

In [None]:
TRAIN_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": train_data.shape[0],   # your env uses max_steps (not max_step)
    "start_index": 0
}

TEST_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": test_data.shape[0],
    "start_index": 0
}

result_root="./result"
experiment_dir=None   # or provide a specific path to your dqn_univ3_search
action_dim=5


In [None]:
res4 = best_ppo_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root="./result",                
    experiment_dir=None,               
    action_dim=5                          
)

train_result4 = res4["train"]["dataframe"]
test_result4 = res4["test"]["dataframe"]
all_result4 = pd.concat([train_result4, test_result4])
all_result4

In [None]:
t = train_result4["time"]
y = train_result4["cum_reward"]
a = train_result4["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (Train)",
                bottom_title="Action",
                vx=False
                )
fig.show()


In [None]:
t = test_result4["time"]
y = test_result4["cum_reward"]
a = test_result4["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (Test)",
                bottom_title="Action",
                vx=False
                )
fig.show()


In [None]:
t = all_result4["time"]
y = all_result4["step_reward"].cumsum()
a = all_result4["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


## 5. Essential Features and Regime Label Based on All Features

In [None]:
# Extract numeric features for the environment (excluding time column)
numeric_data = data[ESSENTIAL_FEATURES+REGIME_LABEL_BASED_ON_ALL_FEATURES].values
time_data = data[["time"]].values
print(f"\nNumeric data shape: {numeric_data.shape}")
print(f"Sample values: {numeric_data[0]}")

TRAIN_TEST_RATIO = 0.8
TRAIN_TEST_SPLIT_POINT = int(numeric_data.shape[0] * 0.8)

train_data = numeric_data[:TRAIN_TEST_SPLIT_POINT]
train_time_data = time_data[:TRAIN_TEST_SPLIT_POINT]
print(f"\nTest data shape: {train_data.shape}")
print(f"Sample values: {numeric_data[0]}")

test_data = numeric_data[TRAIN_TEST_SPLIT_POINT:]
test_time_data = time_data[TRAIN_TEST_SPLIT_POINT:]
print(f"\nTrain data shape: {test_data.shape}")
print(f"Sample values: {numeric_data[0]}")

In [None]:
TRAIN_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": train_data.shape[0],
    "start_index": 0
}

num_samples=20
experiment_name="ppo_univ3_search_5"
storage_root="./ray_results"

In [None]:
results, best = tune_ppo_with_ray(
    train_data=train_data,
    train_time_data=train_time_data,
    TRAIN_ENV_KW=TRAIN_ENV_KW,
    num_samples=num_samples,                             
    experiment_name=experiment_name,          
    storage_root=storage_root,             
    param_space=None                           
)

In [None]:
TRAIN_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": train_data.shape[0],   # your env uses max_steps (not max_step)
    "start_index": 0
}

TEST_ENV_KW = {
    "total_liquidity": 1e17,
    "gas_cost": 5.0,
    "fee_tier": 0.0005,
    "max_steps": test_data.shape[0],
    "start_index": 0
}

result_root="./result"
experiment_dir=None   # or provide a specific path to your dqn_univ3_search
action_dim=5


In [None]:
res5 = best_ppo_rollout(
    train_data, train_time_data,
    test_data,  test_time_data,
    TRAIN_ENV_KW, TEST_ENV_KW,
    result_root="./result",                
    experiment_dir=None,               
    action_dim=5                          
)

train_result5 = res5["train"]["dataframe"]
test_result5 = res5["test"]["dataframe"]
all_result5 = pd.concat([train_result5, test_result5])
all_result5

In [None]:
t = train_result5["time"]
y = train_result5["cum_reward"]
a = train_result5["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (Train)",
                bottom_title="Action",
                vx=False
                )
fig.show()


In [None]:
t = test_result5["time"]
y = test_result5["step_reward"].cumsum()
a = test_result5["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (Test)",
                bottom_title="Action",
                vx=False
                )
fig.show()


In [None]:
t = all_result5["time"]
y = all_result5["step_reward"].cumsum()
a = all_result5["allocation_ratio"]

fig = pnl_plot(t, y, a,
                top_title="Reward Trajectory (All)",
                bottom_title="Action"
                )
fig.show()


## 