In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
import os

# ----------------------------
# Strategy 2: Bollinger Mean Reversion (1-min)
# - If price is far below rolling mean (z < -ENTER_Z) -> go LONG (expect reversion up)
# - If price is far above rolling mean (z > +ENTER_Z) -> go SHORT (expect reversion down)
# - Exit when z comes back inside +/- EXIT_Z (hysteresis reduces churn)
# ----------------------------

# --- Paths (project root = one level above /notebooks) ---
PROJECT_ROOT = Path(os.getcwd()).resolve().parents[0]
DATA_DIR = PROJECT_ROOT / "data_processed"
G1_TRADE_DIR = DATA_DIR / "group1" / "trade"

print("PROJECT_ROOT:", PROJECT_ROOT)
print("G1_TRADE_DIR:", G1_TRADE_DIR, "exists:", G1_TRADE_DIR.exists())
assert G1_TRADE_DIR.exists(), f"Missing: {G1_TRADE_DIR}"

# --- Load one quarter first (end-to-end sanity check) ---
g1_files = sorted(G1_TRADE_DIR.glob("*.parquet"))
print("Num quarters (G1 trade):", len(g1_files))
assert len(g1_files) > 0, "No parquet files found in group1/trade"

sample_file = g1_files[0]
df = pd.read_parquet(sample_file)

ASSET = "SP"
price = df[ASSET].astype(float).dropna().sort_index()
print("Using file:", sample_file.name)
print("Price rows:", len(price), "from", price.index.min(), "to", price.index.max())

PROJECT_ROOT: C:\Users\LO79RS\HFD_Project
G1_TRADE_DIR: C:\Users\LO79RS\HFD_Project\data_processed\group1\trade exists: True
Num quarters (G1 trade): 7
Using file: data1_2023_Q1.parquet
Price rows: 23120 from 2023-01-03 09:56:00+00:00 to 2023-03-31 16:00:00+00:00


In [3]:
# --- Parameters (1-min data) ---
BOLL_WIN = 120      # rolling window in minutes (memory)
ENTER_Z  = 2.0      # entry threshold (far from mean)
EXIT_Z   = 0.5      # exit threshold (closer to mean)  -> hysteresis
MIN_HOLD = 5

Bollinger inputs (mid, sigma, z-score)

In [4]:
import numpy as np

# Bollinger parameters (1-minute data)
BOLL_WIN = 120  # rolling window in minutes

mid = price.rolling(BOLL_WIN).mean()
sigma = price.rolling(BOLL_WIN).std(ddof=0)

# Avoid division by zero
sigma = sigma.replace(0, np.nan)

z = (price - mid) / sigma
z.name = "zscore"

print("BOLL_WIN:", BOLL_WIN)
print("z-score summary:")
z.describe()

BOLL_WIN: 120
z-score summary:


count    22770.000000
mean         0.141247
std          1.508879
min        -10.908712
25%         -0.896547
50%          0.221401
75%          1.129227
max         10.908712
Name: zscore, dtype: float64

Mean Reversion Entry/Exit

In [5]:
# Mean reversion thresholds
Z_ENTER = 2.0
Z_EXIT = 0.5

raw_pos = pd.Series(0, index=price.index, dtype=int)

# Entry
raw_pos[z > Z_ENTER] = -1
raw_pos[z < -Z_ENTER] = 1

# Exit (flat zone)
raw_pos[(z.abs() < Z_EXIT)] = 0

raw_pos.name = "raw_pos"

print("Z_ENTER:", Z_ENTER, "Z_EXIT:", Z_EXIT)
raw_pos.value_counts(dropna=False)

Z_ENTER: 2.0 Z_EXIT: 0.5


raw_pos
 0    19713
-1     1891
 1     1516
Name: count, dtype: int64

In [6]:
# Build persistent position from raw signal
pos = pd.Series(0, index=price.index, dtype=int)

state = 0
for t in range(len(price)):
    r = raw_pos.iloc[t]

    if state == 0:
        # enter only when signal appears
        if r != 0:
            state = r

    elif state == 1:
        # exit long only when flat signal appears
        if r == 0:
            state = 0

    elif state == -1:
        # exit short only when flat signal appears
        if r == 0:
            state = 0

    pos.iloc[t] = state

pos.name = "pos"

pos.value_counts(dropna=False)

pos
 0    19713
-1     1909
 1     1498
Name: count, dtype: int64

Trade counts (entries/exits/flips)

In [8]:
# Trade statistics based on position changes
prev_pos = pos.shift(1).fillna(0)

n_flips = (pos != prev_pos).sum()
n_entries = ((prev_pos == 0) & (pos != 0)).sum()
n_exits = ((prev_pos != 0) & (pos == 0)).sum()

print("Trade summary:")
print("Total position changes:", n_flips)
print("Entries (0 -> +/-1):", n_entries)
print("Exits (+/-1 -> 0):", n_exits)

Trade summary:
Total position changes: 986
Entries (0 -> +/-1): 493
Exits (+/-1 -> 0): 493


In [9]:
# Direct long <-> short flips (should be zero for clean MR logic)

long_to_short = ((prev_pos == 1) & (pos == -1)).sum()
short_to_long = ((prev_pos == -1) & (pos == 1)).sum()

print("Direct flips summary:")
print("Long -> Short:", long_to_short)
print("Short -> Long:", short_to_long)

Direct flips summary:
Long -> Short: 0
Short -> Long: 0


In [10]:
# PnL backbone (1-minute, position shifted to avoid look-ahead)

dP = price.diff()

# Trade next bar using previous bar's position
pos_lag = pos.shift(1).fillna(0).astype(int)

# Gross PnL in index points (no transaction costs)
pnl_points = pos_lag * dP

print("Gross PnL (points) summary:")
print(pnl_points.describe())

Gross PnL (points) summary:
count    23119.000000
mean         0.007137
std          0.895547
min        -26.031000
25%         -0.000000
50%          0.000000
75%          0.000000
max         50.982000
dtype: float64


In [11]:
# Aggregate 1-minute PnL to daily gross PnL

pnl_daily = pnl_points.resample("1D").sum()

print("Daily gross PnL summary:")
print(pnl_daily.describe())

Daily gross PnL summary:
count    88.000000
mean      1.874898
std      10.423467
min     -21.828000
25%      -2.613250
50%       0.000000
75%       7.655000
max      37.238000
dtype: float64
