In [1]:
from __future__ import annotations
import re
from pathlib import Path
import pandas as pd

In [22]:
def read_futures_xlsx(path: str | Path, sheet: str) -> pd.DataFrame:
    path = Path(path)
    df = pd.read_excel(path, sheet_name=sheet)

    # Find the date column (accepts 'Date' or 'Dates', case-insensitive)
    date_col = next((c for c in df.columns if str(c).strip().lower() in {"date", "dates"}), None)
    if date_col is None:
        raise ValueError("No 'Date' or 'Dates' column found in the sheet.")

    # Normalize date index
    df[date_col] = pd.to_datetime(df[date_col])
    df = df.sort_values(date_col).set_index(date_col)

    # Build a rename map for columns like 'BAP1 Comdty' -> 'F1', 'BAP2 Comdty' -> 'F2', etc.
    name_map: dict[str, str] = {}
    for c in df.columns:
        s = str(c)
        m = re.search(r"(\d+)\s*Comdty$", s, flags=re.IGNORECASE)
        if m:
            name_map[c] = f"F{int(m.group(1))}"
            continue
        # Also accept headers already like 'F1', 'M1', or just '1'
        m2 = re.fullmatch(r"[FfMm]?(\d+)", s)
        if m2:
            name_map[c] = f"F{int(m2.group(1))}"

    df = df.rename(columns=name_map)

    # Keep only tenor columns of the form F<number>
    tenor_cols = [c for c in df.columns if re.fullmatch(r"F\d+", str(c))]
    if not tenor_cols:
        raise ValueError("No tenor columns detected (expected headers like F1..F30 or '...1 Comdty').")

    # Sort by tenor number
    tenor_cols = sorted(tenor_cols, key=lambda x: int(x[1:]))
    df = df[tenor_cols]

    return df



# --- EXAMPLE USAGE (edit these two lines to your file/sheet) ---
DataPath = "../data/NGLs_series.xlsx"   # e.g., ../data/NGLs_series.xlsx
sheet = "DAE"                    # e.g., a ticker sheet name

wide = read_futures_xlsx(DataPath, sheet)

print("WIDE (first 5 rows):\n", wide.head(), "\n", sep="")



TypeError: 'str' object is not callable

In [16]:
# constrain and select time series
def drop_full_term_structure_duplicates(df: pd.DataFrame) -> pd.DataFrame:
    """Drop rows where the entire term structure matches the previous row.

    Mirrors your Excel rule: if every F# is exactly equal to the prior row,
    treat it as a holiday duplicate and keep only the first occurrence.
    """
    # True when *all* columns match the previous row
    dup_mask = df.shift().eq(df).all(axis=1)
    return df.loc[~dup_mask]


In [None]:
# Holiday dupe clean

In [None]:
# create roll calendar

In [None]:
# rolling PnL

# Daily PnL

# Cost

# Long Rolling PnL

# Long Rolling EL

# Roll Adjusted Time Series

# Long Drawdown

In [None]:
# momentum

In [None]:
# carry