In [8]:
%%capture
!pip install backtrader

Collecting backtrader
  Downloading backtrader-1.9.78.123-py2.py3-none-any.whl.metadata (6.8 kB)
Downloading backtrader-1.9.78.123-py2.py3-none-any.whl (419 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m419.5/419.5 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: backtrader
Successfully installed backtrader-1.9.78.123


In [1]:
import kagglehub
from kagglehub import KaggleDatasetAdapter
import sys
import os
import pandas as pd

In [2]:
# Config of the notebook
random_seed = 585

In [3]:
# Download latest version
stock_data = kagglehub.dataset_load(
    KaggleDatasetAdapter.PANDAS,
    "camnugent/sandp500",
    "all_stocks_5yr.csv",
)

# download company info
comp_info = kagglehub.dataset_load(
    KaggleDatasetAdapter.PANDAS,
    "paytonfisher/sp-500-companies-with-financial-information",
    "financials.csv",
)


Downloading from https://www.kaggle.com/api/v1/datasets/download/camnugent/sandp500?dataset_version_number=4&file_name=all_stocks_5yr.csv...


100%|██████████| 9.60M/9.60M [00:01<00:00, 8.69MB/s]

Extracting zip of all_stocks_5yr.csv...





Downloading from https://www.kaggle.com/api/v1/datasets/download/paytonfisher/sp-500-companies-with-financial-information?dataset_version_number=1&file_name=financials.csv...


100%|██████████| 91.2k/91.2k [00:00<00:00, 344kB/s]


### Selected Companies

In [4]:
# Calculate the value counts for the 'Sector' column
sector_counts = comp_info['Sector'].value_counts()

In [5]:
# sample 5 top sectors and select 5 random stocks
selected_sectors = list(sector_counts.index[:3])

selected_sector_comp = comp_info[comp_info.apply(lambda row: row["Sector"] in selected_sectors, axis=1)]

# selected_companies
selected_companies = selected_sector_comp.groupby(by="Sector").sample(5, random_state=random_seed)

In [38]:
selected_tickers = list(selected_companies["Symbol"])
sampled_stock_data = stock_data[stock_data.apply(lambda row: row["Name"] in selected_tickers, axis=1)]

# rename Name to Ticker
sampled_stock_data = sampled_stock_data.rename(columns={"Name": "Ticker"})

In [76]:
# create dataframe for the investment
investment_strategy = selected_companies[["Symbol"]].copy()

# set budget as 1 dollar
budget = 100000

investment_strategy["Weight"] = round(1/len(investment_strategy), 3)
investment_strategy["Position_Size"] = budget * investment_strategy["Weight"]

investment_strategy

Unnamed: 0,Symbol,Weight,Position_Size
106,CMG,0.067,6700.0
313,MHK,0.067,6700.0
206,GRMN,0.067,6700.0
279,LEG,0.067,6700.0
345,ORLY,0.067,6700.0
66,BAC,0.067,6700.0
319,MS,0.067,6700.0
499,XL,0.067,6700.0
107,CB,0.067,6700.0
34,AXP,0.067,6700.0


In [77]:
import backtrader as bt
%matplotlib inline

class BuyAndHoldCustom(bt.Strategy):

    # Add a parameters class to accept allocations
    params = (('allocations', {}),)

    def __init__(self):
        self.bought = {}
        # Access the allocations from the parameters
        self.allocations = self.p.allocations

    def next(self):
        for data in self.datas:
            name = data._name
            if not self.getposition(data).size and name not in self.bought:
                cash = self.broker.get_cash()
                allocation = self.params.allocations[name]
                size = int(allocation / data.close[0])
                self.buy(data=data, size=size)
                self.bought[name] = True

    def stop(self):
        # Called at the end of the backtest
        for data in self.datas:
            position = self.getposition(data)
            if position.size > 0:
                self.sell(data=data, size=position.size)
                print(f"SELL at END: {data._name} @ {data.close[0]:.2f}, Size: {position.size}")

def run_custom_backtest(dataframes_dict, allocations_dict, start_date, end_date):
    cerebro = bt.Cerebro()
    cerebro.broker.set_cash(sum(allocations_dict.values()))

    filtered_dataframes = {}
    for stock_name, df in dataframes_dict.items():
        df = df.copy()
        df['Date'] = pd.to_datetime(df['date'])
        df.set_index('Date', inplace=True)
        df = df.loc[start_date:end_date]
        df.columns = df.columns.str.capitalize()  # Ensure 'Close' column exists
        filtered_dataframes[stock_name] = df

        feed = bt.feeds.PandasData(dataname=df)
        cerebro.adddata(feed, name=stock_name)

    # Pass allocations into strategy as parameter
    cerebro.addstrategy(BuyAndHoldCustom, allocations=allocations_dict)

    start_value = cerebro.broker.getvalue()
    cerebro.run()
    end_value = cerebro.broker.getvalue()

    return {
        'start_value': start_value,
        'end_value': end_value,
        'total_gain': end_value - start_value,
        'percent_gain': (end_value - start_value) / start_value * 100,
        'cerebro': cerebro
    }


In [79]:
position_size_dict = {}
for index, row in investment_strategy.iterrows():
    position_size_dict[row['Symbol']] = row['Position_Size']

# create company dicts
dataframes_dict = {}
for index, row in selected_companies.iterrows():
  ticker = row["Symbol"]

  ticker_df = sampled_stock_data[sampled_stock_data["Ticker"] == ticker]
  dataframes_dict[ticker] = ticker_df



result = run_custom_backtest(
    dataframes_dict=dataframes_dict,
    allocations_dict=position_size_dict,
    start_date='2017-08-01',
    end_date='2018-02-07'
)

print("Initial Value:", result['start_value'])
print("Final Value:", result['end_value'])
print("Total Gain:", result['total_gain'])
print("Percent Gain:", result['percent_gain'], "%")


SELL at END: CMG @ 272.21, Size: 19
SELL at END: MHK @ 267.66, Size: 26
SELL at END: GRMN @ 63.08, Size: 134
SELL at END: LEG @ 45.78, Size: 138
SELL at END: ORLY @ 252.08, Size: 32
SELL at END: BAC @ 31.25, Size: 268
SELL at END: MS @ 54.55, Size: 141
SELL at END: XL @ 42.00, Size: 150
SELL at END: CB @ 147.52, Size: 44
SELL at END: AXP @ 93.61, Size: 77
SELL at END: IBM @ 153.85, Size: 46
SELL at END: EBAY @ 42.33, Size: 186
SELL at END: EA @ 123.05, Size: 57
SELL at END: AVGO @ 237.38, Size: 26
SELL at END: IT @ 119.45, Size: 52
Initial Value: 100500.0
Final Value: 106374.05999999998
Total Gain: 5874.059999999983
Percent Gain: 5.844835820895505 %


In [80]:
import matplotlib
matplotlib.use('Agg')  # Use a non-interactive backend for saving

cerebro = result["cerebro"]
# Generate plot
figs = cerebro.plot(style='candlestick')  # You can choose style: line, candlestick, ohlc

# Set large size and save the first figure
fig = figs[0][0]  # First subplot in first figure
fig.set_size_inches(24, 50)  # Width, Height in inches
fig.savefig("large_backtest_plot.png", dpi=300)  # Save with high resolution