<a href="https://colab.research.google.com/github/sevintanerdi/HFD_project/blob/main/notebooks/out_of_sample_on_group_2_strategy_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!cd /content
!rm -rf HFD_project
!git clone https://github.com/sevintanerdi/HFD_project.git

Cloning into 'HFD_project'...
remote: Enumerating objects: 546, done.[K
remote: Counting objects: 100% (177/177), done.[K
remote: Compressing objects: 100% (97/97), done.[K
remote: Total 546 (delta 104), reused 131 (delta 79), pack-reused 369 (from 1)[K
Receiving objects: 100% (546/546), 29.58 MiB | 17.07 MiB/s, done.
Resolving deltas: 100% (211/211), done.


In [None]:
!pip install quantstats


Collecting quantstats
  Downloading quantstats-0.0.81-py3-none-any.whl.metadata (10 kB)
Downloading quantstats-0.0.81-py3-none-any.whl (90 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.1/90.1 kB[0m [31m494.4 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: quantstats
Successfully installed quantstats-0.0.81


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import quantstats as qs

In [None]:
# Settings0
quarters = [
    "2023_Q2",
    "2024_Q1", "2024_Q3",
    "2025_Q3", "2025_Q4"
]

POINT_VALUE = 100
TCOST = 15

In [None]:
# function
def mySR(x, scale):
    return np.sqrt(scale) * np.nanmean(x) / np.nanstd(x)

In [None]:
# summary container
summary_group2_final = pd.DataFrame()

In [None]:
def calmar_from_pnl(daily_pnl, scale=252):
    cum_pnl = daily_pnl.cumsum()
    drawdown = cum_pnl - cum_pnl.cummax()
    max_dd = drawdown.min()
    if max_dd == 0:
        return np.nan
    return (daily_pnl.mean() * scale) / abs(max_dd)

In [None]:
pnl_gross_all, pnl_net_all = [], []

for quarter in quarters:

    data = pd.read_parquet(
        f"/content/HFD_project/data_processed/group2_processed_outofsampledata/"
        f"data2_{quarter}_processed.parquet"
    )

    pos_flat = data["pos_flat"].values

    fastEMA = data["XAU"].ewm(span=20).mean()
    slowEMA = data["XAU"].ewm(span=100).mean()

    pos = np.where(
        fastEMA.shift(1).notna() & slowEMA.shift(1).notna(),
        np.where(fastEMA.shift(1) > slowEMA.shift(1), 1, -1),
        np.nan
    )
    pos[pos_flat == 1] = 0

    pnl_gross = np.where(
        np.isnan(pos * data["XAU"].diff()),
        0,
        pos * data["XAU"].diff() * POINT_VALUE
    )

    ntrans = np.abs(np.diff(pos, prepend=0))
    pnl_net = pnl_gross - ntrans * TCOST

    pnl_gross_d = (
        pd.Series(pnl_gross, index=data.index.date)
        .groupby(level=0).sum()
    )
    pnl_net_d = (
        pd.Series(pnl_net, index=data.index.date)
        .groupby(level=0).sum()
    )
    ntrans_d = (
        pd.Series(ntrans, index=data.index.date)
        .groupby(level=0).sum()
    )

    pnl_gross_d.index = pd.to_datetime(pnl_gross_d.index)
    pnl_net_d.index   = pd.to_datetime(pnl_net_d.index)

    pnl_gross_all.append(pnl_gross_d)
    pnl_net_all.append(pnl_net_d)

    summary_group2_final = pd.concat(
        [
            summary_group2_final,
            pd.DataFrame([{
                "quarter": quarter,
                "strategy": "momentum",
                "params": "EMA20-100",
                "gross_SR": mySR(pnl_gross_d, 252),
                "net_SR": mySR(pnl_net_d, 252),
                "gross_PnL": pnl_gross_d.sum(),
                "net_PnL": pnl_net_d.sum(),
                "gross_CR": calmar_from_pnl(pnl_gross_d),
                "net_CR": calmar_from_pnl(pnl_net_d),
                "av_daily_ntrans": ntrans_d.mean(),
                "stat": (mySR(pnl_net_d, 252) - 0.5)
                        * np.maximum(0, np.log(np.abs(pnl_net_d.sum() / 1000)))
            }])
        ],
        ignore_index=True
    )

    plt.figure(figsize=(12, 6))
    plt.plot(pnl_gross_d.cumsum(), label="Gross PnL")
    plt.plot(pnl_net_d.cumsum(), label="Net PnL")
    plt.title(f"XAU EMA(20,100) – {quarter} (Out-of-sample)")
    plt.legend()
    plt.savefig(
        f"/content/HFD_project/outputs/XAU_EMA20_100_{quarter}_OOS.png",
        dpi=300, bbox_inches="tight"
    )
    plt.close()

# Aggregate across all out-of-sample quarters
pnl_gross_all = pd.concat(pnl_gross_all).sort_index()
pnl_net_all   = pd.concat(pnl_net_all).sort_index()

plt.figure(figsize=(12, 6))
plt.plot(pnl_gross_all.cumsum(), label="Gross PnL")
plt.plot(pnl_net_all.cumsum(), label="Net PnL")
plt.title("Aggregated Cumulative PnL – Out-of-sample")
plt.legend()
plt.savefig(
    "/content/HFD_project/outputs/aggregated_cumulative_pnl_outofsample.png",
    dpi=300, bbox_inches="tight"
)
plt.close()

In [None]:
summary_group2_final

Unnamed: 0,quarter,strategy,params,gross_SR,net_SR,gross_PnL,net_PnL,gross_CR,net_CR,av_daily_ntrans,stat
0,2023_Q2,momentum,EMA20-100,-1.428548,-2.532374,-10746.4,-19536.4,-1.925321,-2.472077,7.61039,-9.013063
1,2024_Q1,momentum,EMA20-100,1.357649,-0.239169,7955.5,-1449.5,1.860873,-0.265252,8.038462,-0.274393
2,2024_Q3,momentum,EMA20-100,-0.517676,-1.45933,-5224.2,-15089.5,-0.754301,-1.684601,8.35443,-5.31762
3,2025_Q3,momentum,EMA20-100,0.95335,0.145074,11615.9,1793.9,3.143143,0.344148,8.177215,-0.207416
4,2025_Q4,momentum,EMA20-100,2.717001,2.373147,27936.5,24582.5,18.455727,15.706342,7.62963,5.997881


In [None]:
summary_group2_final.to_csv(
    "/content/HFD_project/outputs/summary_group2_XAU_EMA20_100_outofsample.csv",
    index=False
)

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(14, 4))
ax.axis("off")

table = ax.table(
    cellText=summary_group2_final.round(3).values,
    colLabels=summary_group2_final.columns,
    loc="center"
)

table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1, 1.5)

plt.savefig(
    "/content/HFD_project/outputs/summary_stats_group2_outofsample.png",
    dpi=300,
    bbox_inches="tight"
)

plt.close()
