In [108]:
import pandas as pd 
import numpy as np
from datetime import date
from dateutil.relativedelta import relativedelta
import yfinance as yf
import os 
import random

Get All Stocks in S&P 500

In [111]:
def get_sp500_tickers():
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    tables = pd.read_html(url, storage_options={"User-Agent":"Mozilla/5.0"})
    syms = tables[0]["Symbol"].astype(str).tolist()
    return [s.replace(".", "-").strip() for s in syms]

def get_data_cached(tickers,start,end,interval="1d", cache_dir="data_cache"):
    start_ts, end_ts = pd.Timestamp(start), pd.Timestamp(end)
    last_needed = (pd.Timestamp(end) - pd.Timedelta(days=1)).date()
    os.makedirs(cache_dir,exist_ok=True)
    to_fetch = []
    results = {}
    for t in tickers:
        fn = os.path.join(cache_dir,f"{t}.csv")
        if os.path.exists(fn):
            df = pd.read_csv(fn,parse_dates=["Date"],index_col="Date").sort_index()
            have_lo, have_hi = df.index.min().date(), df.index.max().date()
            if have_lo <= start and have_hi >= last_needed:
                results[t] = df.loc[start_ts:end_ts]
                continue
        to_fetch.append(t)

    if to_fetch:
        bulk = yf.download(to_fetch,start=start,end=end,interval=interval,threads=True,group_by='ticker')
        for t in to_fetch:
            try:
                df = bulk[t] if t in bulk else bulk 
                if isinstance(df.columns, pd.MultiIndex):
                    df = df.droplevel(0, axis=1)
                df = df.sort_index()
                df.to_csv(os.path.join(cache_dir, f"{t}.csv"))
                results[t] = df
            except Exception as e:
                print(f"[WARN] {t} failed to fetch: {e}")
    return results

In [None]:
#Assumption - -Apply Strat using curr day closing proce to generate signal and execute on next day opening price
def algorithm(input_df,start,end,interval='1d'):
    if input_df.empty:
        return None
    input_df["SM20"] = input_df["Close"].rolling(window=20).mean()
    input_df["SM50"] = input_df["Close"].rolling(window=50).mean()
    input_df["Signal"] = "Hold"
    input_df["RawSignal"] = np.where(input_df["SM20"]>input_df["SM50"],"Buy",
                                     np.where(input_df["SM20"]<input_df["SM50"],"Sell","Hold"))
    input_df["Signal"] = input_df["RawSignal"].shift(1)
    input_df["ExecPrice"] = input_df["Open"]
    return input_df.dropna(subset=["Signal"])

In [None]:
class Portfolio:
    def __init__(self,starting_cash=1_000,fee_bps=5,slippage_bps=1):
        self.cash = float(starting_cash)
        self.positions = {}
        self.fee_bps = float(fee_bps)
        self.slippage_bps = float(slippage_bps)
        self.trades = []
        self.equity = []
    
    def _apply_costs(self,notional):
        fee = abs(notional)*self.fee_bps/10_000.0
        return fee 
    
    def _exec_price(self,raw_price,side):
        mult = 1+ (self.slippage_bps/10_000) if side == "buy" else 1-(self.slippage_bps/10_000)
        return raw_price*mult
    
    def buy_cash_all(self,ticker,price,date,cash_to_use=None):
        if cash_to_use is None:
            cash_to_use =self.cash
        if cash_to_use <= 0:
            return
        px = self._exec_price(price,"buy")
        shares = int(cash_to_use//px)
        if shares <= 0:
            return 
        notional = shares*px
        fee = self._apply_costs(notional)
        total = notional+fee
        if total > self.cash:
            return 
        self.cash -= total
        self.positions[ticker] = self.positions.get(ticker,0) + shares
        self.trades.append({"Date":date,"Ticker":ticker,"Side":"BUY", "Price":px, "Shares":shares,"Fee":fee})
    
    def sell_all(self, ticker, price, date):
        shares = int(self.positions.get(ticker,0))
        if shares <=0:
            return 
        px = self._exec_price(price,"sell")
        notional = shares*px
        fee = self._apply_costs(notional)
        self.cash += (notional-fee)
        self.positions[ticker] = 0
        self.trades.append({"Date": date, "Ticker": ticker, "Side": "SELL", "Price": px, "Shares": shares, "Fee": fee})

    def mark_to_market(self,date,close_prices:dict):
        pos_value = 0.0
        for tkr, sh in self.positions.items():
            if sh and tkr in close_prices:
                pos_value += sh * float(close_prices[tkr])
        equity = self.cash + pos_value
        self.equity.append({"Date": date, "Equity": equity, "Cash": self.cash, "PosValue": pos_value})

    def total_value(self):
        if not self.equity:
            return self.cash
        return self.equity[-1]["Equity"]

In [66]:
def backtest(signals_dict_or_df,starting_cash=1_000,fee_bps=5, slippage_bps=1, allocate_equal_on_buy=True):
    if isinstance(signals_dict_or_df,pd.DataFrame):
        signals_dict = {"TICKER": signals_dict_or_df.copy()}
    else:
        signals_dict = {k: v.copy() for k, v in signals_dict_or_df.items()}
    
    all_dates = sorted(set().union(*[df.index for df in signals_dict.values()]))
    port = Portfolio(starting_cash=starting_cash,fee_bps=fee_bps, slippage_bps=slippage_bps)
    posture = {t: 0 for t in signals_dict}
    for dt in all_dates:
        # 1) Decide trades for tickers that have this date
        todays_buys = []
        todays_sells = []
        
        for tkr, df in signals_dict.items():
            if dt not in df.index:
                continue
            row = df.loc[dt]
            sig = row["Signal"]
            exec_px = row["ExecPrice"]
            if sig == "Buy" and posture[tkr] == 0: #Buy only if you don't already have that stock
                todays_buys.append((tkr,exec_px))
            elif sig == "Sell" and posture[tkr] == 1: #Sell only if you already have that stock ie no shorting 
                todays_sells.append((tkr,exec_px))
        
        # 2) Execute sells first (free up cash)
        for tkr, px in todays_sells:
            port.sell_all(tkr, px, dt)
            posture[tkr] = 0

        # 3) Execute buys (allocate equally across new buys if requested)
        if todays_buys:
            if allocate_equal_on_buy:
                cash_each = port.cash / len(todays_buys)
                for tkr, px in todays_buys:
                    port.buy_cash_all(tkr,px,dt,cash_to_use=cash_each)
                    if port.positions.get(tkr, 0) > 0:
                        posture[tkr] = 1
        
        # 4) Mark to market using Close prices of today (if available)
        close_prices = {}
        for tkr, df in signals_dict.items():
            if dt in df.index:
                close_prices[tkr] = df.loc[dt,"Close"]
        port.mark_to_market(dt,close_prices)


    equity_df = pd.DataFrame(port.equity).set_index("Date").sort_index()
    trades_df = pd.DataFrame(port.trades)

    if not equity_df.empty:
        ret = equity_df["Equity"].pct_change().fillna(0.0)
        cum_ret = (1 + ret).prod() - 1
        years = max(1e-9, (equity_df.index[-1] - equity_df.index[0]).days / 365.25)
        cagr = (equity_df["Equity"].iloc[-1] / equity_df["Equity"].iloc[0]) ** (1 / years) - 1 if years > 0 else np.nan
        dd = (equity_df["Equity"] / equity_df["Equity"].cummax() - 1).min()
        sharpe = np.sqrt(252) * (ret.mean() / (ret.std() + 1e-12))
        summary = {
            "Start": equity_df.index[0],
            "End": equity_df.index[-1],
            "StartEquity": equity_df["Equity"].iloc[0],
            "EndEquity": equity_df["Equity"].iloc[-1],
            "TotalReturn": cum_ret,
            "CAGR": cagr,
            "MaxDrawdown": dd,
            "Sharpe(naive)": sharpe,
            "Trades": len(trades_df) if not trades_df.empty else 0,
        }
    else:
        summary = {}

    return {"equity": equity_df, "trades": trades_df, "summary": summary, "portfolio": port}

In [112]:
sp_list = get_sp500_tickers()
random.seed(42)  
sp_sampled = random.sample(sp_list, 50)  # pick 3 unique items

end = date.today()
start = end - relativedelta(years=5)        
ticker_dict = get_data_cached(sp_sampled,start,end)

  bulk = yf.download(to_fetch,start=start,end=end,interval=interval,threads=True,group_by='ticker')
[*********************100%***********************]  50 of 50 completed


In [115]:
end = date.today() - relativedelta(days=10)        
start = end - relativedelta(years=1)
signals_dict = {k:algorithm(v) for k,v in ticker_dict.items()}

In [114]:
res = backtest(signals_dict, starting_cash=1_000)
print(res["summary"])
print(res["trades"].head())
print(res["equity"].tail())

{'Start': Timestamp('2020-10-21 00:00:00'), 'End': Timestamp('2025-10-17 00:00:00'), 'StartEquity': np.float64(1000.0), 'EndEquity': np.float64(1511.5153258242685), 'TotalReturn': np.float64(0.5115153258242691), 'CAGR': np.float64(0.0863410997730476), 'MaxDrawdown': np.float64(-0.1403946322515791), 'Sharpe(naive)': np.float64(0.7965941210513233), 'Trades': 458}
        Date Ticker Side      Price  Shares       Fee
0 2020-12-31    MOS  BUY  20.313188       1  0.010157
1 2020-12-31    BAC  BUY  26.736810       1  0.013368
2 2020-12-31   ANET  BUY  18.126812       1  0.009063
3 2020-12-31      T  BUY  15.354038       1  0.007677
4 2020-12-31   INVH  BUY  25.604936       1  0.012802
                 Equity         Cash    PosValue
Date                                            
2025-10-13  1509.234321  1184.734319  324.500002
2025-10-14  1512.464326  1184.734319  327.730007
2025-10-15  1514.965327  1136.385325  378.580002
2025-10-16  1509.615324  1136.385325  373.230000
2025-10-17  1511.5