In [3]:
import os

def report_dir(path: str):
    """Print whether *path* exists and (if yes) list its contents."""
    exists = os.path.exists(path)
    print(f"{path} exists: {exists}")
    if exists:
        print(f"Files in {path}: {os.listdir(path) or '[empty]'}")
    print()

report_dir("./data_normalized")
report_dir("./data_raw")

./data_normalized exists: True
Files in ./data_normalized: ['norm_book_btc_usdt_20250215_20250228.parquet', 'norm_book_btc_usdt_20241115_20241130.parquet', 'norm_book_btc_usdt_20250201_20250215.parquet', 'norm_book_btc_usdt_20241101_20241115.parquet', 'norm_book_btc_usdt_20240915_20240930.parquet', 'norm_book_btc_usdt_20250115_20250131.parquet', 'norm_book_btc_usdt_20241015_20241031.parquet', 'norm_book_btc_usdt_20241201_20241215.parquet', 'norm_book_btc_usdt_20241001_20241015.parquet', 'norm_book_btc_usdt_20250101_20250115.parquet', 'norm_book_btc_usdt_20240901_20240915.parquet', 'norm_book_btc_usdt_20250301_20250305.parquet', 'norm_book_btc_usdt_20250305_20250310.parquet', 'norm_book_btc_usdt_20241215_20241231.parquet']

./data_raw exists: True
Files in ./data_raw: ['book_btc_usdt_20250201_20250215.parquet', 'book_btc_usdt_20241201_20241215.parquet', 'book_btc_usdt_20250215_20250228.parquet', 'book_btc_usdt_20240101_20240115.parquet', 'book_btc_usdt_20241101_20241115.parquet', 'book_

### Download raw and normalized LOB data for prediction.

In [None]:
# 📦 one-time install (does nothing if gdown is already installed)
!pip install -q gdown

import gdown, pathlib, os

# ---------------------------------------------------------------------------
# RAW LOB  (20-level, 6 Hz)
# ---------------------------------------------------------------------------
raw_url   = "https://drive.google.com/file/d/1PDzhgYNHmekLSUsYIw4YWebEW2gaVeV0/view?usp=sharing"
raw_path  = pathlib.Path("data_raw/book_btc_usdt_20250305_20250310.parquet")
raw_path.parent.mkdir(exist_ok=True)

if raw_path.exists() and raw_path.stat().st_size > 0:
    print(f"✔️  {raw_path.name} already exists – download skipped.")
else:
    print(f"⬇️  downloading RAW → {raw_path} …")
    gdown.download(url=raw_url, output=str(raw_path), quiet=False, fuzzy=True)
    print("✅ download finished." if raw_path.stat().st_size else "⚠️ download failed.")

# ---------------------------------------------------------------------------
# NORMALISED LOB  (10-level, z-scored)  – add your own Drive link below
# ---------------------------------------------------------------------------
norm_url  = "https://drive.google.com/file/d/1mC0EEoo5neMkAly5cnzaxo0S-S6lfsUz/view?usp=sharing"  # <-- put the share link here
norm_path = pathlib.Path("data_normalized/norm_book_btc_usdt_20250305_20250310.parquet")
norm_path.parent.mkdir(exist_ok=True)

if norm_path.exists() and norm_path.stat().st_size > 0:
    print(f"✔️  {norm_path.name} already exists – download skipped.")
else:
    print(f"⬇️  downloading NORMALISED → {norm_path} …")
    gdown.download(url=norm_url, output=str(norm_path), quiet=False, fuzzy=True)
    print("✅ download finished." if norm_path.stat().st_size else "⚠️ download failed.")

⬇️  downloading RAW → data_raw/book_btc_usdt_20250305_202503101.parquet …


Downloading...
From (original): https://drive.google.com/uc?id=1PDzhgYNHmekLSUsYIw4YWebEW2gaVeV0
From (redirected): https://drive.google.com/uc?id=1PDzhgYNHmekLSUsYIw4YWebEW2gaVeV0&confirm=t&uuid=a148a6b4-8e6e-41b7-ba89-a957aa7f0201
To: /home/andras/btc-project/szakdolgozat-high-freq-btc-prediction/data_raw/book_btc_usdt_20250305_202503101.parquet
100%|██████████| 487M/487M [02:29<00:00, 3.26MB/s] 


✅ download finished.
⬇️  downloading NORMALISED → data_normalized/norm_book_btc_usdt_20250305_202503101.parquet …


Downloading...
From (original): https://drive.google.com/uc?id=1mC0EEoo5neMkAly5cnzaxo0S-S6lfsUz
From (redirected): https://drive.google.com/uc?id=1mC0EEoo5neMkAly5cnzaxo0S-S6lfsUz&confirm=t&uuid=086c036b-c81f-4dd3-aeb2-c141903881f0
To: /home/andras/btc-project/szakdolgozat-high-freq-btc-prediction/data_normalized/norm_book_btc_usdt_20250305_202503101.parquet
100%|██████████| 439M/439M [02:45<00:00, 2.65MB/s] 

✅ download finished.





## Validation pipeline

In [None]:
from IV_validation import validate_model

validate_model(
    start_date="2025-03-05",
    end_date="2025-03-10",
    model_path="./models/deeplob_single_parallel_f1_0.4369.pt",
    data_dir="./data_normalized"
)

Using cuda
GPU: NVIDIA RTX A500 Laptop GPU
Memory total: 4.29 GB
Using cuda
GPU: NVIDIA RTX A500 Laptop GPU
Memory total: 4.29 GB

=== DeepLOB Model Validation ===
Time period: 2025-03-05 to 2025-03-10
Symbol: BTC-USDT
Model: ./models/deeplob_single_parallel_f1_0.4369.pt
Finding normalized data for BTC-USDT from 2025-03-05 00:00:00 to 2025-03-10 00:00:00...
Found 2 normalized files:
  norm_book_btc_usdt_20250301_20250305.parquet: 2025-03-05 00:00:00 to 2025-03-05 00:00:00
  norm_book_btc_usdt_20250305_20250310.parquet: 2025-03-05 00:00:00 to 2025-03-10 00:00:00
Found 2 files for processing
File information loaded in 0.01s
Initializing model from ./models/deeplob_single_parallel_f1_0.4369.pt...




KeyboardInterrupt: 

# Trading strategy & visualization

In [3]:
import os
# ---------------------------------------------------------------------------
# 🛈  Interactive trading-strategy visualiser
#     (loads DeepLOB predictions + raw prices and overlays the buy/sell logic)
# ---------------------------------------------------------------------------
from V_trading_strategy_visualization import load_and_visualize_strategy

# ── 1. User-configurable parameters ─────────────────────────────────────────
start_date       = "20250305"   # first day (YYYYMMDD) - for prediction file
end_date         = "20250310"   # last  day (YYYYMMDD) - for prediction file
signal_threshold = 3            # consecutive identical signals required
                                # to open / close a position

# Visualization date range (this will be shown in the chart)
vis_start_date = '2025-03-09 22:00:01'
vis_end_date = '2025-03-09 23:59:00'

# Format vis dates for filename (YYYYMMDD_HHMMSS format)
vis_start_fmt = vis_start_date.replace(' ', '_').replace(':', '')[:15]
vis_end_fmt = vis_end_date.replace(' ', '_').replace(':', '')[:15]

# Path to DeepLOB prediction export
predictions_file = (
    "./results/deeplob/"
    f"predictions_deeplob_single_parallel_f1_0_{start_date}_{end_date}.parquet"
)
print(f"▶ Using prediction file  : {predictions_file}")
print(f"▶ Signal-threshold (N)   : {signal_threshold}")

# ── 2. Output folder for HTML charts ────────────────────────────────────────
output_dir = "./results/visualizations"
os.makedirs(output_dir, exist_ok=True)

html_path = os.path.join(
    output_dir,
    f"trading_strategy_signal{signal_threshold}_{vis_start_fmt}_{vis_end_fmt}.html"
)

# ── 3. Generate the plot with date range filtering ───────────────────────────
fig = load_and_visualize_strategy(
    predictions_file = predictions_file,
    signal_threshold = signal_threshold,
    save_path        = html_path,      # interactive Plotly HTML
    date_range=(vis_start_date, vis_end_date)
)

print(f"✅ Chart saved to: {html_path}")

▶ Using prediction file  : ./results/deeplob/predictions_deeplob_single_parallel_f1_0_20250305_20250310.parquet
▶ Signal-threshold (N)   : 3
Loading predictions from ./results/deeplob/predictions_deeplob_single_parallel_f1_0_20250305_20250310.parquet
Loaded data shape: (2784801, 4)
Time range: 2025-03-05 00:00:00.980656640 - 2025-03-09 23:59:27.416447488
Preprocessing data...
Data preprocessing complete.
Generating trades with signal threshold: 3...
Generated 96066 trades

Trading Strategy Results:
Total trades: 96066
Winning trades: 27922 (29.07%)
Simple-sum return: 10.26%
Average return per trade: 0.00%
Average trade duration: 3.03 seconds
Median trade duration: 2.10 seconds
Sharpe ratio: 7.43
Maximum drawdown: -8.19%
Buy and Hold Return: -7.59%
Strategy Outperformance: 17.84%
Total number of trades: 96066
Filtered to 1463 trades in date range (2025-03-09 22:00:01 to 2025-03-09 23:59:00)
Preparation time: 0.01s
Simple-sum return: 10.26% (matches analyze_performance)
Interactive chart