In [25]:
import os
import datetime
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy as sp
import plotnine as p9
import quandl
from scipy.stats import zscore
import seaborn as sns
from matplotlib.dates import MonthLocator, DateFormatter
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.dates as mdates
from statsmodels.tsa.stattools import adfuller
from pandas.plotting import autocorrelation_plot
from datetime import timedelta as td
import statsmodels.api as sm
import random
from functools import reduce

### **Data**

In [26]:
h_df = pd.read_csv(r'/Users/pr/Downloads/UChicago MSFM/Winter Quarter/QTS/Project/buy-rough-sell-smooth/Dummy_H_Data.csv')
h_df

Unnamed: 0,date,ticker,price,h_val
0,2016-10-31,TSLA,105.24,0.32
1,2016-11-30,GOOGL,154.18,0.50
2,2016-12-31,AMZN,286.65,-0.06
3,2017-01-31,MSFT,100.99,0.49
4,2017-02-28,MSFT,53.10,-0.24
...,...,...,...,...
495,2024-09-30,AAPL,157.69,0.05
496,2024-10-31,MSFT,246.91,-0.35
497,2024-11-30,GOOGL,163.50,0.46
498,2024-12-31,AAPL,267.60,0.27


### **Rank H Values**

In [27]:
# Modify the function to rank h_vals into quintiles per date
def rank_H_quintiles(df):
    # Ensure date column is in datetime format
    monthly = df.copy()
    monthly["date"] = pd.to_datetime(monthly["date"])
    
    # Sort values by date and h_val
    monthly = monthly.sort_values(["date", "h_val"])
    
    # Assign quintile ranks (1 to 5) based on h_val per date
    monthly["h_rank"] = (
        monthly.groupby("date")["h_val"]
        .transform(lambda x: pd.qcut(x, q=5, labels=False, duplicates='drop') + 1)
    )
    
    # Sort final result by ticker & date
    monthly = monthly.sort_values(["ticker", "date"]).reset_index(drop=True)
    
    return monthly


In [28]:
h_rank_df = rank_H_quintiles(h_df)
h_rank_df

Unnamed: 0,date,ticker,price,h_val,h_rank
0,2016-10-31,AAPL,92.21,-0.07,2
1,2017-01-31,AAPL,53.30,-0.16,3
2,2017-02-28,AAPL,268.03,-0.48,1
3,2017-02-28,AAPL,280.93,0.35,5
4,2017-03-31,AAPL,281.85,-0.39,1
...,...,...,...,...,...
495,2024-09-30,TSLA,234.35,0.14,4
496,2024-11-30,TSLA,249.64,0.38,4
497,2024-12-31,TSLA,226.43,-0.12,3
498,2024-12-31,TSLA,271.65,0.18,4


### **Trading Strategy**

In [29]:
def generate_signal(rank_val, long=5, short=1, strat="long/short"):
    """
    Example signal generator for demonstration.
    Adjust as needed for your actual logic.
    """
    if strat == "long_only":
        return 1 if rank_val == long else 0
    elif strat == "short_only":
        return -1 if rank_val == short else 0
    else:  # long/short
        if rank_val == long:
            return 1
        elif rank_val == short:
            return -1
        else:
            return 0

In [30]:
def trading_strategy(
    h_df,
    long=5,
    short=1,
    annual_short_borrow=0.02,  # e.g. 2% annual for short positions
    trading_days_per_year=252,
    strat="long/short",
):
    # ------------------------------
    # 1) Generate Signals
    # ------------------------------
    h_df = h_df.copy()
    h_df["date"] = pd.to_datetime(h_df["date"])
    h_df = h_df.sort_values(["ticker", "date"]).reset_index(drop=True)

    # Build signals for each factor at month-end
    h_df["h_signal"] = h_df["h_rank"].apply(
        lambda r: generate_signal(r, long=long, short=short, strat=strat)
    )

    # SHIFT signals forward by +1 day => new positions start on next day
    h_df["signal_date"] = h_df["date"] + pd.Timedelta(days=1)

    # We'll keep a copy of h_df to return
    positions = h_df[["date", "ticker", "signal_date", "h_signal", "price"]].sort_values(
        ["date", "ticker"]
    )

    # ------------------------------
    # 2) Compute daily change
    # ------------------------------
    positions["price_prev"] = positions.groupby("ticker")["price"].shift(1)
    positions["chg"] = positions["price"] - positions["price_prev"]

    # Multiply by signals => daily raw PnL (in dollar terms if each signal = 1 share)
    positions["pnl"] = positions["h_signal"] * positions["chg"]

    # ------------------------------
    # 3) Subtract short borrow cost
    # ------------------------------
    daily_borrow_rate = annual_short_borrow / trading_days_per_year

    # If signal < 0 => subtract daily borrow rate
    positions["borrow"] = np.where(positions["h_signal"] < 0, -daily_borrow_rate, 0)
    positions["pnl"] += positions["borrow"]

    # ------------------------------
    # 3a) Compute notional (capital at risk)
    #      For each row, if we have ±1 share, notional is price_prev.
    #      If signals can be bigger than ±1, adjust accordingly.
    # ------------------------------
    positions["notional"] = (
        positions["price_prev"].abs() * positions["h_signal"].abs()
    )

    # ------------------------------
    # 4) Aggregate across tickers => daily total pnl & total notional
    # ------------------------------
    positions_agg = (
        positions.groupby("date")
        .agg(
            pnl=("pnl", "sum"),
            notional=("notional", "sum")
        )
        .reset_index()
    )
    positions_agg = positions_agg.sort_values("date").reset_index(drop=True)

    # ------------------------------
    # 5) Cumulative daily returns
    #    5a) Compute daily return = total pnl / total notional
    #    5b) Then cumulate
    # ------------------------------
    positions_agg["daily_return"] = positions_agg["pnl"] / positions_agg["notional"]
    positions_agg["cum_pnl"] = positions_agg["pnl"].cumsum()
    
    # If you want a cumulative return, you can do:
    positions_agg["cum_return"] = (1 + positions_agg["daily_return"]).cumprod() - 1

    # Drop days where we have no PnL or no notional
    positions_agg.dropna(subset=["pnl", "cum_pnl", "daily_return"], inplace=True)

    return positions_agg

In [31]:
returns = trading_strategy(
    h_rank_df,
    long=5,
    short=1,
    annual_short_borrow=0.02,      # e.g. 2% annual for short positions
    trading_days_per_year=252,
    strat = "long/short",
)
returns

Unnamed: 0,date,pnl,notional,daily_return,cum_pnl,cum_return
1,2016-11-30,24.220000,129.96,0.186365,24.220000,0.186365
2,2016-12-31,-210.090079,511.19,-0.410982,-185.870079,-0.301210
3,2017-01-31,-335.140079,360.52,-0.929602,-521.010159,-0.950807
4,2017-02-28,-201.830079,321.33,-0.628108,-722.840238,-0.981705
5,2017-03-31,-65.970079,419.60,-0.157221,-788.810317,-0.984582
...,...,...,...,...,...,...
95,2024-09-30,368.799921,378.48,0.974424,1219.402222,-1.000002
96,2024-10-31,-86.890079,287.59,-0.302132,1132.512143,-1.000002
97,2024-11-30,235.789921,375.38,0.628137,1368.302063,-1.000003
98,2024-12-31,201.539921,438.38,0.459738,1569.841984,-1.000004
