In [2]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve().parents[0]
SRC_PATH = PROJECT_ROOT / "src"

if str(SRC_PATH) not in sys.path:
    sys.path.insert(0, str(SRC_PATH))

print("Using src from:", SRC_PATH)

Using src from: C:\Users\LO79RS\HFD_Project\src


In [3]:
from io_utils import load_data
from config import CONTRACTS

### Strategy 1 (EMA(20)-EMA(240))
Asset: SP (S&P futures)
- Position space: long / short / flat

Trend definition:
- EMA(20) - EMA(240)

Filter:
- Volatility-scaled threshold using 30-minute rolling std

Interpretation:
- Trade only when the trend signal is strong relative to recent market noise.


In [4]:
import numpy as np
import pandas as pd

G1_TRADE_DIR = Path("../data_processed/group1/trade")
g1_files = sorted(G1_TRADE_DIR.glob("*.parquet"))

sample_file = g1_files[0]
df = load_data(str(sample_file))

# Focus on a single liquid contract
price = df["SP"].astype(float).dropna()

print("Using file:", sample_file.name)
print("Price rows:", price.shape[0])
price.head()

Using file: data1_2023_Q1.parquet
Price rows: 23120


datetime
2023-01-03 09:56:00+00:00    3838.926
2023-01-03 09:57:00+00:00    3837.926
2023-01-03 09:58:00+00:00    3835.875
2023-01-03 09:59:00+00:00    3832.116
2023-01-03 10:00:00+00:00    3831.176
Name: SP, dtype: float64

Strategy parameters (SP, EMA, 30-min rule)

In [5]:
ASSET = "SP"

# EMA settings
EMA_FAST = 20
EMA_SLOW = 240

# Position rule
# We want long/short/flat based on EMA spread
BAND = 0.0  

# Trading rule: minimum holding period before switching position
MIN_HOLD = "30min"

In [6]:
# Use one quarter first to validate end-to-end
G1_TRADE_DIR = Path("../data_processed/group1/trade")
g1_trade_files = sorted(G1_TRADE_DIR.glob("*.parquet"))

sample_file = g1_trade_files[0]
df = load_data(str(sample_file))

price = df[ASSET].astype(float).dropna().sort_index()

ema_fast = price.ewm(span=EMA_FAST, adjust=False).mean()
ema_slow = price.ewm(span=EMA_SLOW, adjust=False).mean()

spread = ema_fast - ema_slow

print("Using file:", sample_file.name)
print("Price rows:", len(price), "from", price.index.min(), "to", price.index.max())

Using file: data1_2023_Q1.parquet
Price rows: 23120 from 2023-01-03 09:56:00+00:00 to 2023-03-31 16:00:00+00:00


Raw signal: long/short/flat

In [7]:
VOL_WIN = 120
K_ENTER = 2.0
K_EXIT  = 0.8  # must be smaller than enter

ret = price.pct_change()
vol = ret.rolling(VOL_WIN).std()

enter_band = K_ENTER * vol
exit_band  = K_EXIT  * vol

raw_pos = np.where(spread > enter_band, 1,
          np.where(spread < -enter_band, -1, 0))
raw_pos = pd.Series(raw_pos, index=price.index).fillna(0).astype(int)

# Build position with hysteresis (enter strong, exit weak)
pos = raw_pos.copy()
pos.iloc[:] = 0

for i in range(1, len(pos)):
    prev = pos.iat[i-1]
    s = spread.iat[i]
    eb = exit_band.iat[i]

    # If raw signal says enter, do it
    if raw_pos.iat[i] != 0:
        pos.iat[i] = raw_pos.iat[i]
    else:
        # Otherwise, keep position unless signal weak enough to exit
        if prev == 1 and s < eb:
            pos.iat[i] = 0
        elif prev == -1 and s > -eb:
            pos.iat[i] = 0
        else:
            pos.iat[i] = prev

In [8]:
pos = raw_pos.astype(int)

In [9]:
n_flips = (pos.diff().fillna(0) != 0).sum()
print("Position changes:", int(n_flips))

Position changes: 136


In [10]:
# --- Flip diagnostics (1-min data) ---

# Ensure pos is a clean int series (in case it isn't yet)
pos = pos.astype(int)

# Previous and current position at each timestamp
prev_pos = pos.shift(1)
curr_pos = pos

# Rows where position changed
flip_mask = curr_pos.ne(prev_pos)
flips = pd.DataFrame({"prev": prev_pos[flip_mask], "curr": curr_pos[flip_mask]}).dropna()

# Count flip types
flip_counts = flips.value_counts().sort_values(ascending=False)
print("Flip types (prev -> curr) counts:")
print(flip_counts)

# Summary buckets
n_total = int(flip_mask.sum())
n_long_to_short = int(((prev_pos == 1) & (curr_pos == -1)).sum())
n_short_to_long = int(((prev_pos == -1) & (curr_pos == 1)).sum())
n_enter = int(((prev_pos == 0) & (curr_pos != 0)).sum())
n_exit  = int(((prev_pos != 0) & (curr_pos == 0)).sum())

print("\nSummary:")
print("Total flips:", n_total)
print("Long -> Short:", n_long_to_short)
print("Short -> Long:", n_short_to_long)
print("Enter (0 -> +/-1):", n_enter)
print("Exit (+/-1 -> 0):", n_exit)

Flip types (prev -> curr) counts:
prev  curr
-1.0   1      67
 1.0  -1      66
 0.0  -1       1
       1       1
 1.0   0       1
Name: count, dtype: int64

Summary:
Total flips: 137
Long -> Short: 66
Short -> Long: 67
Enter (0 -> +/-1): 2
Exit (+/-1 -> 0): 2


PnL backbone (1-minute, position shifted)

In [11]:
# 1-minute price changes (in index points)
dP = price.diff()

# Use previous bar's position to trade the next bar (avoid look-ahead)
pos_lag = pos.shift(1).fillna(0).astype(int)

# Gross PnL in index points
pnl_points = pos_lag * dP

pnl_points.describe()

count    23119.000000
mean         0.006300
std          2.199485
min        -63.973000
25%         -0.506000
50%          0.000000
75%          0.554000
max         55.590000
dtype: float64

In [12]:
# Aggregate 1-minute PnL to daily gross PnL
pnl_daily = pnl_points.resample("1D").sum()

print("Daily gross PnL summary:")
print(pnl_daily.describe())

Daily gross PnL summary:
count     88.000000
mean       1.655239
std       38.856623
min     -122.705000
25%       -6.289000
50%        0.000000
75%       26.018250
max       72.263000
dtype: float64


#### Strategy 2

Strategy parameters (SP, EMA, 30-min rule)

In [13]:
ASSET = "SP"

# EMA settings
EMA_FAST_S2 = 20
EMA_SLOW_S2 = 60

# Position rule
# We want long/short/flat based on EMA spread
BAND_S2 = 0.0  

# Trading rule: minimum holding period before switching position
MIN_HOLD = "30min"

In [14]:
ema_fast_s2 = price.ewm(span=EMA_FAST_S2, adjust=False).mean()
ema_slow_s2 = price.ewm(span=EMA_SLOW_S2, adjust=False).mean()

spread = ema_fast_s2- ema_slow_s2

Raw signal: long/short/flat

In [15]:
VOL_WIN_S2 = 120
K_ENTER_S2 = 2.0
K_EXIT_S2  = 0.8  # must be smaller than enter

ret = price.pct_change()
vol2 = ret.rolling(VOL_WIN_S2).std()

enter_band2 = K_ENTER_S2 * vol
exit_band2  = K_EXIT_S2  * vol

raw_pos2= np.where(spread > enter_band, 1,
          np.where(spread < -enter_band, -1, 0))
raw_pos2 = pd.Series(raw_pos2, index=price.index).fillna(0).astype(int)

# Build position with hysteresis (enter strong, exit weak)
pos2 = raw_pos2.copy()
pos2.iloc[:] = 0

for i in range(1, len(pos2)):
    prev = pos2.iat[i-1]
    s = spread.iat[i]
    eb = exit_band.iat[i]

    # If raw signal says enter, do it
    if raw_pos2.iat[i] != 0:
        pos2.iat[i] = raw_pos2.iat[i]
    else:
        # Otherwise, keep position unless signal weak enough to exit
        if prev == 1 and s < eb:
            pos2.iat[i] = 0
        elif prev == -1 and s > -eb:
            pos2.iat[i] = 0
        else:
            pos2.iat[i] = prev

In [16]:
pos2 = raw_pos2.astype(int)

In [17]:
n_flips = (pos2.diff().fillna(0) != 0).sum()
print("Position changes:", int(n_flips))

Position changes: 290


In [18]:
# --- Flip diagnostics (1-min data) ---

# Ensure pos is a clean int series (in case it isn't yet)
pos2 = pos2.astype(int)

# Previous and current position at each timestamp
prev_pos2 = pos2.shift(1)
curr_pos2 = pos2

# Rows where position changed
flip_mask2 = curr_pos2.ne(prev_pos2)
flips2 = pd.DataFrame({"prev": prev_pos2[flip_mask2], "curr": curr_pos2[flip_mask2]}).dropna()

# Count flip types
flip_counts2 = flips2.value_counts().sort_values(ascending=False)
print("Flip types (prev -> curr) counts:")
print(flip_counts2)

# Summary buckets
n_total2 = int(flip_mask2.sum())
n_long_to_short2 = int(((prev_pos2 == 1) & (curr_pos2 == -1)).sum())
n_short_to_long2 = int(((prev_pos2 == -1) & (curr_pos2 == 1)).sum())
n_enter2 = int(((prev_pos2 == 0) & (curr_pos2 != 0)).sum())
n_exit2  = int(((prev_pos2 != 0) & (curr_pos2 == 0)).sum())

print("\nSummary:")
print("Total flips:", n_total2)
print("Long -> Short:", n_long_to_short2)
print("Short -> Long:", n_short_to_long2)
print("Enter (0 -> +/-1):", n_enter2)
print("Exit (+/-1 -> 0):", n_exit2)

Flip types (prev -> curr) counts:
prev  curr
 1.0  -1      141
-1.0   1      140
       0        3
 0.0   1        3
      -1        2
 1.0   0        1
Name: count, dtype: int64

Summary:
Total flips: 291
Long -> Short: 141
Short -> Long: 140
Enter (0 -> +/-1): 5
Exit (+/-1 -> 0): 5


PnL backbone (1-minute, position shifted)

In [19]:
# 1-minute price changes (in index points)
dP = price.diff()

# Use previous bar's position to trade the next bar (avoid look-ahead)
pos_lag2 = pos2.shift(1).fillna(0).astype(int)

# Gross PnL in index points
pnl_points2 = pos_lag2* dP

pnl_points2.describe()

count    23119.000000
mean         0.005060
std          2.199502
min        -63.973000
25%         -0.510000
50%          0.000000
75%          0.531000
max         55.590000
dtype: float64

In [20]:
# Aggregate 1-minute PnL to daily gross PnL
pnl_daily2 = pnl_points2.resample("1D").sum()

print("Daily gross PnL summary:")
print(pnl_daily2.describe())

Daily gross PnL summary:
count     88.000000
mean       1.329352
std       37.463163
min     -111.039000
25%       -9.510500
50%        0.000000
75%       23.345750
max       78.252000
dtype: float64
