In [1621]:
import pandas as pd
import numpy as np
df = pd.read_csv('plots/2024/svm_qp_results.csv')

In [1622]:
# choose only the lambda_hinge = 0 ones
df = df[df["lambda_hinge"] == 0.0]
df

Unnamed: 0,grid_case,C_svm_init,tau,lambda_hinge,goal_monthly,split,train_feas_pct,val_feas_pct,nn_mean_ret,nn_vol,nn_sharpe,svm_mean_ret,svm_vol,svm_sharpe
0,A,0.01,0.01,0.0,0.005,test,100.0,100.0,0.011188,0.024644,0.453978,0.010675,0.024668,0.432762
1,A,0.01,0.01,0.0,0.005,train,100.0,100.0,0.012192,0.025792,0.472717,0.011546,0.025861,0.446463
2,A,0.01,0.01,0.0,0.005,val,100.0,100.0,0.010570,0.028885,0.365953,0.008838,0.028135,0.314128
3,AC,1.00,0.05,0.0,0.005,test,100.0,100.0,0.016033,0.026269,0.610354,0.010675,0.024668,0.432762
4,AC,1.00,0.05,0.0,0.005,train,100.0,100.0,0.015621,0.026732,0.584343,0.011546,0.025861,0.446463
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319,U,0.10,0.10,0.0,0.020,train,100.0,100.0,0.025633,0.025890,0.990077,0.024876,0.026167,0.950658
320,U,0.10,0.10,0.0,0.020,val,100.0,100.0,0.020162,0.029707,0.678672,0.020331,0.029577,0.687379
321,Y,1.00,0.01,0.0,0.020,test,100.0,100.0,0.034334,0.032253,1.064505,0.027782,0.028674,0.968878
322,Y,1.00,0.01,0.0,0.020,train,100.0,100.0,0.028682,0.029460,0.973594,0.024876,0.026167,0.950658


In [1623]:
import pandas as pd
import numpy as np

def export_insample_frontier(df: pd.DataFrame,
                             goals: list,
                             out_csv: str,
                             select_split: str,
                             select_metric: str,
                             stats_split: str,
                             goal_col: str | None = None) -> pd.DataFrame:
    """
    For each goal in `goals`:
      1) Choose grid_case that maximizes `select_metric` on `select_split`.
      2) Extract NN/SVM mean return & vol from `stats_split` for that grid_case.
      3) Save rows to `out_csv` and return the DataFrame.

    Output columns:
      ['return_goal','grid_case','nn_mean_ret','nn_vol','svm_mean_ret','svm_vol']
    """

    # --- resolve goal column name ---
    if goal_col is None:
        if "goal_monthly" in df.columns:
            goal_col = "goal_monthly"
        elif "return_goal" in df.columns:
            goal_col = "return_goal"
        else:
            raise ValueError("Could not find goal column ('goal_monthly' or 'return_goal').")

    required = {goal_col, "grid_case", "split",
                "nn_mean_ret", "nn_vol", "svm_mean_ret", "svm_vol", select_metric}
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"Input df missing required columns: {missing}")

    # clean bad rows
    df = df.replace([np.inf, -np.inf], np.nan).dropna(subset=list(required))

    rows = []
    for g in goals:
        # 1) choose the best grid_case on the selection split
        cand = df[(df[goal_col] == g) & (df["split"] == select_split)]
        if cand.empty:
            print(f"[warn] No rows for goal={g} on split='{select_split}'. Skipping.")
            continue

        best_row = cand.sort_values(select_metric, ascending=True).iloc[0]
        chosen_case = best_row["grid_case"]

        
        # 2) fetch stats from stats_split for that case
        stats = df[(df["split"] == stats_split) &
                   (df[goal_col] == g) &
                   (df["grid_case"] == chosen_case)]

        if stats.empty:
            print(f"[warn] No {stats_split} stats for goal={g}, grid_case={chosen_case}. Skipping.")
            continue

        # If duplicates exist, take the one with highest nn_mean_ret (arbitrary but stable)
        stats = stats.sort_values("nn_mean_ret", ascending=False).iloc[0]

        rows.append({
            "return_goal": float(g),
            "grid_case": chosen_case,
            "nn_mean_ret": float(stats["nn_mean_ret"]),
            "nn_vol": float(stats["nn_vol"]),
            "svm_mean_ret": float(stats["svm_mean_ret"]),
            "svm_vol": float(stats["svm_vol"]),
        })

    out = pd.DataFrame(rows).sort_values("return_goal").reset_index(drop=True)
    out.to_csv(out_csv, index=False)
    print(f"Saved {out_csv} with {len(out)} rows.")
    return out

In [1624]:
#goals = [0.002, 0.0025, 0.003, 0.0035, 0.004, 0.0045, 0.005, 0.0055, 0.006, 0.0065, 0.007, 0.0075]
goals = [0.005, 0.0065, 0.0075, 0.009, 0.0105, 0.012, 0.013, 0.0145, 0.016, 0.017, 0.0185, 0.02]  # 2024 goals
out_csv = "plots/2024/insample.csv"

insample_frontier = export_insample_frontier(
    df, goals, out_csv,
    select_split="val",      # pick best case by validation
    select_metric="nn_vol",  # minimize NN vol
    stats_split="train"      # pull stats from training (in-sample)
)

Saved plots/2024/insample.csv with 12 rows.


In [1625]:
outputs = pd.read_csv(out_csv)

outputs["nn_sharpe"] = outputs["nn_mean_ret"] / outputs["nn_vol"]
outputs["svm_sharpe"] = outputs["svm_mean_ret"] / outputs["svm_vol"]
outputs

Unnamed: 0,return_goal,grid_case,nn_mean_ret,nn_vol,svm_mean_ret,svm_vol,nn_sharpe,svm_sharpe
0,0.005,I,0.011569,0.025863,0.011546,0.025861,0.447321,0.446463
1,0.0065,E,0.012835,0.025891,0.012666,0.025942,0.495713,0.488256
2,0.0075,E,0.013606,0.025939,0.01347,0.02598,0.524548,0.518481
3,0.009,U,0.01504,0.02609,0.01468,0.02604,0.576477,0.563769
4,0.0105,I,0.01596,0.026092,0.015941,0.026088,0.611701,0.611075
5,0.012,U,0.01768,0.02622,0.017263,0.02611,0.674278,0.661173
6,0.013,U,0.018523,0.026163,0.018169,0.026109,0.708,0.695898
7,0.0145,U,0.019912,0.026188,0.019562,0.026087,0.760362,0.74989
8,0.016,U,0.02138,0.026172,0.020993,0.026079,0.81691,0.804972
9,0.017,I,0.021982,0.026077,0.021955,0.02608,0.842972,0.841831


In [1626]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

CSV_PATH = "plots/2024/insample.csv"
OUTFILE  = "plots/2024/in_sample_frontier.png"

ANNUALIZE   = False     # True → annualize return & vol
SMOOTH_CURVE = True     # True → draw a smooth visual curve (PCHIP if available)
N_SMOOTH     = 200      # points on the smooth curve

def _pct(x, _): return f"{x*100:.1f}%"

def _require_cols(df, cols):
    missing = [c for c in cols if c not in df.columns]
    if missing:
        raise ValueError(f"CSV missing columns: {missing}")

def _unique_sorted_xy(x, y):
    """ensure strictly increasing x and collapse duplicates by averaging y"""
    order = np.argsort(x)
    x, y = np.asarray(x)[order], np.asarray(y)[order]
    # collapse duplicate x's
    ux, idx = np.unique(x, return_index=True)
    if len(ux) == len(x):
        return x, y
    y_collapsed = np.array([y[x == v].mean() for v in ux])
    return ux, y_collapsed

def _smooth_xy(x, y, n=200):
    """PCHIP if SciPy is present; otherwise linear interpolation."""
    x, y = _unique_sorted_xy(x, y)
    xs = np.linspace(x.min(), x.max(), n)
    try:
        from scipy.interpolate import PchipInterpolator
        ys = PchipInterpolator(x, y)(xs)
    except Exception:
        ys = np.interp(xs, x, y)
    return xs, ys

def plot_frontier_best_only(csv_path=CSV_PATH, outfile=OUTFILE,
                            annualize=ANNUALIZE, smooth=SMOOTH_CURVE,
                            n_smooth=N_SMOOTH):

    df = pd.read_csv(csv_path)
    # allow either return_goal or goal_monthly
    if "return_goal" not in df.columns and "goal_monthly" in df.columns:
        df = df.rename(columns={"goal_monthly": "return_goal"})

    _require_cols(df, ["return_goal","nn_mean_ret","nn_vol","svm_mean_ret","svm_vol"])

    # one row per goal already; drop any incomplete lines
    df = (df.replace([np.inf,-np.inf], np.nan)
            .dropna(subset=["return_goal","nn_mean_ret","nn_vol","svm_mean_ret","svm_vol"])
            .copy())

    # sort by goal just for consistent labeling; we’ll sort by vol for line drawing
    df.sort_values("return_goal", inplace=True)

    # annualize if requested
    if annualize:
        df["nn_mean_ret"]  *= 12.0
        df["svm_mean_ret"] *= 12.0
        df["nn_vol"]        *= np.sqrt(12.0)
        df["svm_vol"]       *= np.sqrt(12.0)

    # prepare (vol,ret) arrays
    x_svm, y_svm = df["svm_vol"].values, df["svm_mean_ret"].values
    x_nn,  y_nn  = df["nn_vol"].values,  df["nn_mean_ret"].values

    # --- plot ---
    fig, ax = plt.subplots(figsize=(7.5, 5.5))

    # smooth “guides” (optional, purely visual)
    if smooth and len(df) >= 3:
        xs_svm, ys_svm = _smooth_xy(x_svm, y_svm, n=n_smooth)
        xs_nn,  ys_nn  = _smooth_xy(x_nn,  y_nn,  n=n_smooth)
        ax.plot(xs_svm, ys_svm, lw=2, alpha=0.85, label="SVM+MVO (two-stage)")
        ax.plot(xs_nn,  ys_nn,  lw=2, alpha=0.85, label="End-to-end (NN)")
    else:
        # fallback: polyline through points
        xs, ys = _unique_sorted_xy(x_svm, y_svm)
        ax.plot(xs, ys, marker="o", lw=1.6, label="SVM+MVO (two-stage)")
        xs, ys = _unique_sorted_xy(x_nn, y_nn)
        ax.plot(xs, ys, marker="o", lw=1.6, label="End-to-end (NN)")

    # draw the actual observed best-per-goal points
    ax.scatter(x_svm, y_svm, s=28, zorder=3, label=None)
    ax.scatter(x_nn,  y_nn,  s=28, zorder=3, label=None)

    # arrows + goal labels (at NN point)
    for _, r in df.iterrows():
        ax.annotate("", xy=(r["nn_vol"], r["nn_mean_ret"]),
                         xytext=(r["svm_vol"], r["svm_mean_ret"]),
                         arrowprops=dict(arrowstyle="->", lw=1, alpha=0.7))
        ax.annotate(f"{r['return_goal']*100:.2f}%",
                    xy=(r["nn_vol"], r["nn_mean_ret"]),
                    xytext=(5,5), textcoords="offset points", fontsize=9)

    unit = "annualized" if annualize else "monthly"
    ax.set_xlabel(f"Realized volatility ({unit})")
    ax.set_ylabel(f"Realized mean return ({unit})")
    ax.set_title(f"In-sample decision frontier ({unit})")

    ax.xaxis.set_major_formatter(FuncFormatter(_pct))
    ax.yaxis.set_major_formatter(FuncFormatter(_pct))
    ax.grid(True, linewidth=0.6, alpha=0.5)
    ax.legend(frameon=False, loc="lower right")
    fig.tight_layout()
    fig.savefig(outfile, dpi=300)
    plt.close(fig)
    print(f"Saved {outfile}")

# run
plot_frontier_best_only()

Saved plots/2024/in_sample_frontier.png


In [1627]:
output = pd.read_csv(CSV_PATH)
output

Unnamed: 0,return_goal,grid_case,nn_mean_ret,nn_vol,svm_mean_ret,svm_vol
0,0.005,I,0.011569,0.025863,0.011546,0.025861
1,0.0065,E,0.012835,0.025891,0.012666,0.025942
2,0.0075,E,0.013606,0.025939,0.01347,0.02598
3,0.009,U,0.01504,0.02609,0.01468,0.02604
4,0.0105,I,0.01596,0.026092,0.015941,0.026088
5,0.012,U,0.01768,0.02622,0.017263,0.02611
6,0.013,U,0.018523,0.026163,0.018169,0.026109
7,0.0145,U,0.019912,0.026188,0.019562,0.026087
8,0.016,U,0.02138,0.026172,0.020993,0.026079
9,0.017,I,0.021982,0.026077,0.021955,0.02608


In [1628]:
df = pd.read_csv('plots/2024/svm_qp_results.csv')

In [1629]:
df = df[df["lambda_hinge"] == 0.0]


In [1630]:
df_train = df[df['split'] == 'train']
df_val = df[df['split'] == 'val']
df_test = df[df['split'] == 'test']

In [1631]:
goal = 0.016
df_val = df_val[df_val['goal_monthly'] == goal]
df_val_sorted = df_val.sort_values(by="nn_vol", ascending=True)
df_val_sorted

Unnamed: 0,grid_case,C_svm_init,tau,lambda_hinge,goal_monthly,split,train_feas_pct,val_feas_pct,nn_mean_ret,nn_vol,nn_sharpe,svm_mean_ret,svm_vol,svm_sharpe
239,U,0.1,0.1,0.0,0.016,val,100.0,100.0,0.016921,0.029266,0.578195,0.016944,0.029274,0.578812
230,I,0.01,0.1,0.0,0.016,val,100.0,100.0,0.016944,0.029274,0.578812,0.016944,0.029274,0.578812
227,E,0.01,0.05,0.0,0.016,val,100.0,100.0,0.016944,0.029274,0.578812,0.016944,0.029274,0.578812
224,AG,1.0,0.1,0.0,0.016,val,100.0,100.0,0.01705,0.029614,0.575737,0.016944,0.029274,0.578812
218,A,0.01,0.01,0.0,0.016,val,100.0,100.0,0.01821,0.029792,0.611217,0.016944,0.029274,0.578812
236,Q,0.1,0.05,0.0,0.016,val,100.0,100.0,0.01856,0.030055,0.617531,0.016944,0.029274,0.578812
221,AC,1.0,0.05,0.0,0.016,val,100.0,100.0,0.01909,0.03114,0.613049,0.016944,0.029274,0.578812
233,M,0.1,0.01,0.0,0.016,val,100.0,100.0,0.020728,0.03237,0.640343,0.016944,0.029274,0.578812
242,Y,1.0,0.01,0.0,0.016,val,100.0,100.0,0.0229,0.032534,0.703903,0.016944,0.029274,0.578812


In [1632]:
df_train[(df_train['grid_case']== 'U') & (df_train['goal_monthly'] == goal)]

Unnamed: 0,grid_case,C_svm_init,tau,lambda_hinge,goal_monthly,split,train_feas_pct,val_feas_pct,nn_mean_ret,nn_vol,nn_sharpe,svm_mean_ret,svm_vol,svm_sharpe
238,U,0.1,0.1,0.0,0.016,train,100.0,100.0,0.02138,0.026172,0.81691,0.020993,0.026079,0.804972


In [1619]:
df_test[(df_test['grid_case']== 'U') & (df_test['goal_monthly'] == goal)]

Unnamed: 0,grid_case,C_svm_init,tau,lambda_hinge,goal_monthly,split,train_feas_pct,val_feas_pct,nn_mean_ret,nn_vol,nn_sharpe,svm_mean_ret,svm_vol,svm_sharpe
237,U,0.1,0.1,0.0,0.016,test,100.0,100.0,0.023333,0.027078,0.861693,0.022988,0.027173,0.845981


# Paper

In [1633]:
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

# ---- Optional: simple white-border trimmer ----------------------------------
def _trim_white(img, thresh=240):
    """
    Trim white margins from an RGB(A) image array [0..255].
    Returns a cropped view. If no white border, returns original.
    """
    if img.ndim == 3 and img.shape[2] == 4:
        rgb = img[..., :3]
    else:
        rgb = img
    gray = rgb.mean(axis=2)
    mask = gray < thresh  # keep anything darker than 'thresh'

    if not mask.any():
        return img  # all white (unlikely) -> skip

    rows = np.where(mask.any(axis=1))[0]
    cols = np.where(mask.any(axis=0))[0]
    r0, r1 = rows[0], rows[-1] + 1
    c0, c1 = cols[0], cols[-1] + 1
    return img[r0:r1, c0:c1, ...]


def make_two_panel_frontier(
    left_png,
    right_png,
    outfile,
    left_title="Crisis (2008–2009) — in-sample frontier",
    right_title="Non-crisis (2013–2024) — in-sample frontier",
    panel_labels=("a", "b"),
    figsize=(10, 4.8),
    dpi=300,
    trim=True,
):
    left_png = Path(left_png)
    right_png = Path(right_png)

    img_L = plt.imread(left_png)
    img_R = plt.imread(right_png)

    if trim:
        img_L = _trim_white((img_L * 255).astype(np.uint8)) if img_L.max() <= 1.0 else _trim_white(img_L)
        img_R = _trim_white((img_R * 255).astype(np.uint8)) if img_R.max() <= 1.0 else _trim_white(img_R)

    fig, axes = plt.subplots(1, 2, figsize=figsize, dpi=dpi, constrained_layout=True)

    axes[0].imshow(img_L)
    axes[0].set_title(left_title, fontsize=11)
    axes[0].axis("off")
    axes[0].text(
        0.02, 0.98, f"({panel_labels[0]})",
        transform=axes[0].transAxes, ha="left", va="top", fontsize=11, fontweight="bold"
    )

    axes[1].imshow(img_R)
    axes[1].set_title(right_title, fontsize=11)
    axes[1].axis("off")
    axes[1].text(
        0.02, 0.98, f"({panel_labels[1]})",
        transform=axes[1].transAxes, ha="left", va="top", fontsize=11, fontweight="bold"
    )

    # Optional global title (comment out if you’ll caption in LaTeX)
    # fig.suptitle("In-sample decision frontiers (monthly units)", fontsize=12, y=1.02)

    outfile = Path(outfile)
    outfile.parent.mkdir(parents=True, exist_ok=True)
    fig.savefig(outfile, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved two-panel figure → {outfile}")


# ---------------- Example usage ----------------
# Update the paths to your PNGs and desired output path:
make_two_panel_frontier(
    left_png="plots/2024/in_sample_frontier.png",
    right_png="plots/2008/in_sample_frontier.png",
    outfile="plots/paper/in_sample_frontiers_2panel.png",
    left_title="Non-crisis (2024) — in-sample frontier",
    right_title="Crisis (2008-2009) — in-sample frontier",
)

Saved two-panel figure → plots/paper/in_sample_frontiers_2panel.png


In [1634]:
df = pd.read_csv("svm_qp_results_wealth_2024.csv")
#df = pd.read_csv("svm_qp_results_wealth_2008.csv")

In [1436]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates

def cumulative_wealth_plot(dates, r_nn, r_svm, title, outfile):
    # --- cum wealth ---
    w_nn  = np.cumprod(1 + np.asarray(r_nn,  dtype=float))
    w_svm = np.cumprod(1 + np.asarray(r_svm, dtype=float))

    # --- coerce dates to datetime (handles 'YYYY-MM', 'YYYY-MM-DD', Period, etc.) ---
    dates_dt = pd.to_datetime(pd.Index(dates))

    fig, ax = plt.subplots(figsize=(7, 4))
    ax.plot(dates_dt, w_svm, label="SVM+MVO (two-stage)")
    ax.plot(dates_dt, w_nn,  label="End-to-end (NN)")

    # ---- tidy x-axis: at most ~6 ticks, monthly formatter ----
    n = len(dates_dt)
    interval = max(1, int(np.ceil(n / 6)))             # show ≤ ~6 ticks
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=interval))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    # keep labels horizontal; AutoDateLocator can add rotation—force 0°
    for lab in ax.get_xticklabels():
        lab.set_rotation(0)
        lab.set_ha('center')

    ax.margins(x=0.01)  # small padding on the sides
    ax.set_ylabel("Cumulative wealth (start = 1)")
    ax.set_title(title)
    ax.grid(True, alpha=0.4)
    ax.legend(frameon=False)

    fig.tight_layout()
    fig.savefig(outfile, dpi=300)
    plt.close(fig)

In [1437]:
df_test = df[df['split'] == 'test']
nn_r = df_test[df_test['grid_case'] == 'U']['nn_r_series']
svm_r = df_test[df_test['grid_case'] == 'U']['svm_r_series']

# convert string to list of floats
nn_r = nn_r.iloc[0].strip('[]').split(',')
nn_r = [float(x) for x in nn_r]
svm_r = svm_r.iloc[0].strip('[]').split(',')
svm_r = [float(x) for x in svm_r]

cumulative_wealth_plot(
    dates = pd.date_range(start="2024-01", periods=len(nn_r), freq='ME'),
    r_nn = nn_r,
    r_svm = svm_r,
    title = "Cumulative wealth in test period (2024)",
    outfile = "testing_cu.png"
)

## Combine wealth plots

In [1439]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

def stack_cum_wealth(top_png_2024,
                     bottom_png_2008,
                     out_png="fig_cum_wealth_panels.png",
                     labels=("(a) Non-crisis", "(b) Crisis")):
    fig, axes = plt.subplots(
        nrows=2, ncols=1, figsize=(8, 9), dpi=300, constrained_layout=True
    )

    imgs = [mpimg.imread(top_png_2024), mpimg.imread(bottom_png_2008)]
    for ax, img, lab in zip(axes, imgs, labels):
        ax.imshow(img)
        ax.axis("off")  # don't draw an extra frame over your plot
        # panel tag in the top-left corner
        ax.text(0.02, 0.98, lab, transform=ax.transAxes, va="top", ha="left",
                fontsize=12, fontweight="bold",
                bbox=dict(facecolor="white", alpha=0.6, edgecolor="none", pad=2))

    fig.savefig(out_png, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved {out_png}")

# Example:
stack_cum_wealth("2024_wealth.png",
                 "2008_wealth.png",
                 out_png="plots/paper/cum_wealth_panels.png")

Saved plots/paper/cum_wealth_panels.png
