In [4]:
import pandas as pd
import numpy as np
import os

def compute_metrics_consistent(df):
    metrics = {}
    for session, group in df.groupby("session"):
        group = group.sort_values("exit_time")

        if group.empty:
            continue

        capital = 100_000
        capital_curve = []
        for pnl in group["pnl"]:
            capital += pnl
            capital_curve.append(capital)

        returns = pd.Series(capital_curve).pct_change().dropna()

        if len(returns) >= 2:
            mean_return = returns.mean()
            vol = returns.std()
            sharpe = (mean_return / vol) * np.sqrt(252) if vol > 0 else 0
        else:
            sharpe = 0

        cumulative = pd.Series(capital_curve).cummax()
        drawdown = (cumulative - capital_curve) / cumulative
        max_drawdown = drawdown.max()

        wins = group[group["pnl"] > 0]["pnl"]
        losses = group[group["pnl"] <= 0]["pnl"]
        avg_win = wins.mean() if not wins.empty else 0
        avg_loss = losses.mean() if not losses.empty else 0

        win_count = len(wins)
        loss_count = len(losses)
        win_rate = win_count / (win_count + loss_count) if (win_count + loss_count) > 0 else 0

        metrics[session] = {
            "sharpe": sharpe,
            "max_drawdown": max_drawdown,
            "win_rate": win_rate,
            "total_pnl": group["pnl"].sum(),
            "final_capital": capital_curve[-1],
            "num_trades": len(group),
            "avg_win": avg_win,
            "avg_loss": avg_loss
        }
    return metrics

def compare_all_assets(folder_path, bet_method="optimal_f"):
    results = []

    for file in os.listdir(folder_path):
        if not file.endswith(".csv") or not file.startswith("baseline_"):
            continue

        asset = file.split("_")[1]
        method = file.split("_")[2].replace(".csv", "")
        if method != bet_method:
            continue

        baseline_path = os.path.join(folder_path, f"baseline_{asset}_{method}.csv")
        filtered_path = os.path.join(folder_path, f"filtered_{asset}_{method}.csv")

        if not os.path.exists(filtered_path):
            continue

        baseline_df = pd.read_csv(baseline_path)
        filtered_df = pd.read_csv(filtered_path)

        baseline_metrics = compute_metrics_consistent(baseline_df)
        filtered_metrics = compute_metrics_consistent(filtered_df)

        for session in baseline_metrics:
            if session in filtered_metrics:
                base = baseline_metrics[session]
                filt = filtered_metrics[session]

                results.append({
                    "asset": asset,
                    "session": session,
                    "num_trades_base": base["num_trades"],
                    "num_trades_filtered": filt["num_trades"],
                    "trades_filtered_pct": 1 - (filt["num_trades"] / base["num_trades"] if base["num_trades"] > 0 else 1),

                    "win_rate_base": base["win_rate"],
                    "win_rate_filtered": filt["win_rate"],
                    "win_rate_diff": filt["win_rate"] - base["win_rate"],

                    "avg_win_base": base["avg_win"],
                    "avg_win_filtered": filt["avg_win"],
                    "avg_win_diff": filt["avg_win"] - base["avg_win"],

                    "avg_loss_base": base["avg_loss"],
                    "avg_loss_filtered": filt["avg_loss"],
                    "avg_loss_diff": filt["avg_loss"] - base["avg_loss"],

                    "sharpe_base": base["sharpe"],
                    "sharpe_filtered": filt["sharpe"],
                    "sharpe_diff": filt["sharpe"] - base["sharpe"],

                    "drawdown_base": base["max_drawdown"],
                    "drawdown_filtered": filt["max_drawdown"],
                    "drawdown_diff": filt["max_drawdown"] - base["max_drawdown"],

                    "final_capital_base": base["final_capital"],
                    "final_capital_filtered": filt["final_capital"],
                    "capital_diff": filt["final_capital"] - base["final_capital"],

                    "total_pnl_base": base["total_pnl"],
                    "total_pnl_filtered": filt["total_pnl"],
                    "pnl_diff": filt["total_pnl"] - base["total_pnl"]
                })

    df_results = pd.DataFrame(results)

    # Get list of unique assets used
    included_assets = sorted(df_results["asset"].unique())
    print("\n Assets included in this analysis:")
    print(", ".join(included_assets))

    # Optional: Save to file
    with open(f"assets_included_{bet_method}.txt", "w") as f:
        f.write("\n".join(included_assets))

    session_summary = (
        df_results
        .groupby("session")
        .mean(numeric_only=True)
        .reset_index()
        .round(4)
    )

    return df_results, session_summary


folder_path = "results_metalabel/"
method = "optimal_f"  # or "fixed", "fixed_amount", "kelly"

df_all, df_summary = compare_all_assets(folder_path, bet_method=method)

# View results
print(df_summary)

# Optionally export
df_all.to_csv(f"results_all_{method}.csv", index=False)
df_summary.to_csv(f"summary_by_session_{method}.csv", index=False)


KeyError: 'asset'