In [108]:
import kagglehub
from kagglehub import KaggleDatasetAdapter
import sys
import os
import pandas as pd
import backtrader as bt
import matplotlib

In [109]:
# Download latest version
path = kagglehub.dataset_download("khalilvandian/portfolio-management")

# List all files and directories in the downloaded dataset path
files = os.listdir(path)
print("Files in dataset directory:", files)

Files in dataset directory: ['AVGO.csv', 'AXP.csv', 'BAC.csv', 'CB.csv', 'CMG.csv', 'EA.csv', 'EBAY.csv', 'GRMN.csv', 'IBM.csv', 'IT.csv', 'LEG.csv', 'MHK.csv', 'MS.csv', 'ORLY.csv', 'XL.csv']


In [110]:
def clean_stock_data(filepath):
    df = pd.read_csv(filepath)

    # Standardize column names (lowercase, no special characters)
    df.columns = [col.strip().lower().replace('/', '_').replace(' ', '_') for col in df.columns]

    # Rename 'close/last' to 'close' if needed
    if 'close_last' in df.columns:
        df.rename(columns={'close_last': 'close'}, inplace=True)

    # Remove dollar signs and convert to float
    df['close'] = df['close'].replace('[\\$,]', '', regex=True).astype(float)
    df['open'] = df['open'].replace('[\\$,]', '', regex=True).astype(float)
    df['high'] = df['high'].replace('[\\$,]', '', regex=True).astype(float)
    df['low'] = df['low'].replace('[\\$,]', '', regex=True).astype(float)

    # Parse dates
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)
    df.sort_index(inplace=True)

    return df

In [111]:
stock_data_dict = {}
full_date_index = pd.date_range(start="2015-06-16", end="2025-06-13", freq='B')

for file in files: 
    ticker_name = file.replace(".csv", "").strip()
    file_path = os.path.join(path, file)

    stock_df = clean_stock_data(file_path)
    stock_df = stock_df.reindex(full_date_index)
    stock_df.ffill(inplace=True) 
    stock_df.fillna(0, inplace=True)

    stock_data_dict[ticker_name] = stock_df


# Add no risk option
# copy 1st df and change all values to a constant value
no_risk = stock_data_dict["AVGO"].copy()
no_risk["close"] = 1
no_risk["open"] = 1
no_risk["high"] = 1
no_risk["low"] = 1
no_risk["volume"] = 0
stock_data_dict["NoRisk"] = no_risk

tickers = list(stock_data_dict.keys())
print(tickers)

['AVGO', 'AXP', 'BAC', 'CB', 'CMG', 'EA', 'EBAY', 'GRMN', 'IBM', 'IT', 'LEG', 'MHK', 'MS', 'ORLY', 'XL', 'NoRisk']


In [112]:
# Find and print tickers with any NaN in their "close" column
for ticker, df in stock_data_dict.items():
	if df["close"].isna().any():
		print(f"{ticker} has NaN values in 'close':")
		print(df[df["close"].isna()])

# Backtrade Functions

In [113]:
class BuyAndHoldCustom(bt.Strategy):

    # Add a parameters class to accept allocations
    params = (('allocations', {}),)

    def __init__(self):
        self.bought = {}
        # Access the allocations from the parameters
        self.allocations = self.p.allocations

    def next(self):
        for data in self.datas:
            name = data._name
            if not self.getposition(data).size and name not in self.bought:
                cash = self.broker.get_cash()
                allocation = self.params.allocations[name]
                size = int(allocation / data.close[0])
                self.buy(data=data, size=size)
                self.bought[name] = True

    def stop(self):
        # Called at the end of the backtest
        for data in self.datas:
            position = self.getposition(data)
            if position.size > 0:
                self.sell(data=data, size=position.size)
                print(f"SELL at END: {data._name} @ {data.close[0]:.2f}, Size: {position.size}")

def run_custom_backtest(dataframes_dict, allocations_dict, start_date, end_date, budget):
    cerebro = bt.Cerebro()
    cerebro.broker.set_cash(budget)

    filtered_dataframes = {}
    for stock_name, df in dataframes_dict.items():
        df = df.copy()
        # df['Date'] = pd.to_datetime(df['date'])
        # df.set_index('Date', inplace=True)
        df = df.loc[start_date:end_date]
        df.columns = df.columns.str.capitalize()  # Ensure 'Close' column exists
        filtered_dataframes[stock_name] = df
        feed = bt.feeds.PandasData(dataname=df)
        cerebro.adddata(feed, name=stock_name)

    # Pass allocations into strategy as parameter
    cerebro.addstrategy(BuyAndHoldCustom, allocations=allocations_dict)

    start_value = cerebro.broker.getvalue()
    cerebro.run()
    end_value = cerebro.broker.getvalue()
    end_cash = cerebro.broker.getcash()

    return {
        'start_value': start_value,
        'end_value': end_value,
        'total_gain': end_value - start_value,
        'percent_gain': (end_value - start_value) / (start_value) * 100,
        'end_cash': end_cash,
        'cerebro': cerebro
    }


# Strategy Constant Weights

In [114]:
# create dataframe for the investment
investment_strategy = pd.DataFrame(tickers, columns=["Symbol"])

# set budget as 1 dollar
budget = 100000

investment_strategy["Weight"] = round(1/len(investment_strategy), 3)
investment_strategy["Position_Size"] = budget * investment_strategy["Weight"]

investment_strategy

Unnamed: 0,Symbol,Weight,Position_Size
0,AVGO,0.062,6200.0
1,AXP,0.062,6200.0
2,BAC,0.062,6200.0
3,CB,0.062,6200.0
4,CMG,0.062,6200.0
5,EA,0.062,6200.0
6,EBAY,0.062,6200.0
7,GRMN,0.062,6200.0
8,IBM,0.062,6200.0
9,IT,0.062,6200.0


In [115]:
strategy_one_path = "./Data/First Strategy"
strategy_one_file_name = "constant_weights_portfolio.csv"
pd.read_csv(os.path.join(strategy_one_path, strategy_one_file_name))

Unnamed: 0,Symbol,Weight,Position_Size
0,AVGO,0.062,6200.0
1,AXP,0.062,6200.0
2,BAC,0.062,6200.0
3,CB,0.062,6200.0
4,CMG,0.062,6200.0
5,EA,0.062,6200.0
6,EBAY,0.062,6200.0
7,GRMN,0.062,6200.0
8,IBM,0.062,6200.0
9,IT,0.062,6200.0


In [116]:
position_size_dict = {}
for index, row in investment_strategy.iterrows():
    position_size_dict[row['Symbol']] = row['Position_Size']

# create company dicts
dataframes_dict = stock_data_dict.copy()

result = run_custom_backtest(
    dataframes_dict=dataframes_dict,
    allocations_dict=position_size_dict,
    start_date='2024-12-01',
    end_date='2025-05-31',
    budget=budget
)

print("Initial Value:", result['start_value'])
print("Final Value:", result['end_value'])
print("Total Gain:", result['total_gain'])
print("Percent Gain:", result['percent_gain'], "%")

SELL at END: AVGO @ 242.07, Size: 37
SELL at END: AXP @ 294.05, Size: 20
SELL at END: BAC @ 44.13, Size: 131
SELL at END: CB @ 297.20, Size: 21
SELL at END: CMG @ 50.08, Size: 102
SELL at END: EA @ 143.78, Size: 37
SELL at END: EBAY @ 73.17, Size: 98
SELL at END: GRMN @ 202.97, Size: 29
SELL at END: IBM @ 259.06, Size: 27
SELL at END: IT @ 436.42, Size: 11
SELL at END: LEG @ 9.06, Size: 486
SELL at END: MHK @ 100.61, Size: 45
SELL at END: MS @ 128.03, Size: 47
SELL at END: ORLY @ 91.17, Size: 74
SELL at END: XL @ 0.10, Size: 41333
SELL at END: NoRisk @ 1.00, Size: 6200
Initial Value: 100000
Final Value: 96057.76670000001
Total Gain: -3942.2332999999926
Percent Gain: -3.9422332999999927 %


# Strategy Morkowitz

## Final Portfolio

In [117]:
strategy_two_path = "Data/Second Strategy"
final_portfolio_name = "final_portfolio.csv"

final_portfolio = pd.read_csv(os.path.join(strategy_two_path, final_portfolio_name)).round(decimals=3)
final_portfolio

Unnamed: 0,Ticker,Weight
0,CB,0.134
1,ORLY,0.126
2,IBM,0.124
3,GRMN,0.113
4,EA,0.087
5,AXP,0.071
6,EBAY,0.063
7,MS,0.061
8,BAC,0.061
9,CMG,0.06


In [118]:
# create dataframe for the investment
investment_strategy = final_portfolio.rename(columns={"Ticker":"Symbol"}).copy()

# set budget as 1 dollar
budget = 100000

# investment_strategy["Weight"] = round(1/len(investment_strategy), 3)
investment_strategy["Position_Size"] = budget * investment_strategy["Weight"]
investment_strategy.loc[len(investment_strategy)] = ["NoRisk", 0, 0]

investment_strategy

Unnamed: 0,Symbol,Weight,Position_Size
0,CB,0.134,13400.0
1,ORLY,0.126,12600.0
2,IBM,0.124,12400.0
3,GRMN,0.113,11300.0
4,EA,0.087,8700.0
5,AXP,0.071,7100.0
6,EBAY,0.063,6300.0
7,MS,0.061,6100.0
8,BAC,0.061,6100.0
9,CMG,0.06,6000.0


In [119]:
position_size_dict = {}
for index, row in investment_strategy.iterrows():
    position_size_dict[row['Symbol']] = row['Position_Size']

# create company dicts
dataframes_dict = stock_data_dict.copy()

result = run_custom_backtest(
    dataframes_dict=dataframes_dict,
    allocations_dict=position_size_dict,
    start_date='2024-12-01',
    end_date='2025-05-31',
    budget=budget
)

print("Initial Value:", result['start_value'])
print("Final Value:", result['end_value'])
print("Total Gain:", result['total_gain'])
print("Percent Gain:", result['percent_gain'], "%")

SELL at END: AXP @ 294.05, Size: 23
SELL at END: BAC @ 44.13, Size: 129
SELL at END: CB @ 297.20, Size: 46
SELL at END: CMG @ 50.08, Size: 99
SELL at END: EA @ 143.78, Size: 52
SELL at END: EBAY @ 73.17, Size: 99
SELL at END: GRMN @ 202.97, Size: 52
SELL at END: IBM @ 259.06, Size: 54
SELL at END: IT @ 436.42, Size: 11
SELL at END: LEG @ 9.06, Size: 211
SELL at END: MHK @ 100.61, Size: 4
SELL at END: MS @ 128.03, Size: 46
SELL at END: ORLY @ 91.17, Size: 151
SELL at END: XL @ 0.10, Size: 4000
Initial Value: 100000
Final Value: 98640.60999999999
Total Gain: -1359.390000000014
Percent Gain: -1.3593900000000139 %


## Max Sharpe Portfolio

In [120]:
strategy_two_path = "./Data/Second Strategy"
max_sharpe_portfolio_file_name = "max_sharpe_portfolio.csv"

max_sharpe_portfolio = pd.read_csv(os.path.join(strategy_two_path, max_sharpe_portfolio_file_name)).round(decimals=3)
max_sharpe_portfolio.loc[max_sharpe_portfolio["Ticker"] == "RISK_FREE", "Ticker"] = "NoRisk"

# create dataframe for the investment
investment_strategy = max_sharpe_portfolio.rename(columns={"Ticker":"Symbol"}).copy()

# set budget as 1 dollar
budget = 100000

# investment_strategy["Weight"] = round(1/len(investment_strategy), 3)
investment_strategy["Position_Size"] = budget * investment_strategy["Weight"]

investment_strategy

Unnamed: 0,Symbol,Weight,Position_Size
0,LEG,0.499,49900.0
1,NoRisk,0.315,31500.0
2,XL,0.176,17600.0
3,MHK,0.01,1000.0
4,IT,0.0,0.0
5,MS,0.0,0.0
6,CMG,0.0,0.0
7,GRMN,0.0,0.0
8,ORLY,0.0,0.0
9,BAC,0.0,0.0


In [121]:
position_size_dict = {}
for index, row in investment_strategy.iterrows():
    position_size_dict[row['Symbol']] = row['Position_Size']

# create company dicts
dataframes_dict = stock_data_dict.copy()

result = run_custom_backtest(
    dataframes_dict=dataframes_dict,
    allocations_dict=position_size_dict,
    start_date='2024-12-01',
    end_date='2025-05-31',
    budget=budget
)

print("Initial Value:", result['start_value'])
print("Final Value:", result['end_value'])
print("Total Gain:", result['total_gain'])
print("Percent Gain:", result['percent_gain'], "%")

SELL at END: LEG @ 9.06, Size: 3913
SELL at END: MHK @ 100.61, Size: 7
SELL at END: XL @ 0.10, Size: 117333
SELL at END: NoRisk @ 1.00, Size: 31500
Initial Value: 100000
Final Value: 79624.26670000001
Total Gain: -20375.733299999993
Percent Gain: -20.375733299999993 %


## Min Variance

In [122]:
strategy_two_path = "./Data/Second Strategy"
min_variance_portfolio_file_name = "min_variance_portfolio.csv"

min_variance_portfolio = pd.read_csv(os.path.join(strategy_two_path, min_variance_portfolio_file_name)).round(decimals=3)
min_variance_portfolio.loc[min_variance_portfolio["Ticker"] == "RISK_FREE", "Ticker"] = "NoRisk"

# create dataframe for the investment
investment_strategy = min_variance_portfolio.rename(columns={"Ticker":"Symbol"}).copy()

# set budget as 1 dollar
budget = 100000

# investment_strategy["Weight"] = round(1/len(investment_strategy), 3)
investment_strategy["Position_Size"] = budget * investment_strategy["Weight"]

investment_strategy

Unnamed: 0,Symbol,Weight,Position_Size
0,NoRisk,0.572,57200.0
1,IBM,0.077,7700.0
2,EA,0.076,7600.0
3,CB,0.065,6500.0
4,LEG,0.05,5000.0
5,EBAY,0.041,4100.0
6,CMG,0.036,3600.0
7,ORLY,0.035,3500.0
8,GRMN,0.025,2500.0
9,XL,0.015,1500.0


In [123]:
position_size_dict = {}
for index, row in investment_strategy.iterrows():
    position_size_dict[row['Symbol']] = row['Position_Size']

# create company dicts
dataframes_dict = stock_data_dict.copy()

result = run_custom_backtest(
    dataframes_dict=dataframes_dict,
    allocations_dict=position_size_dict,
    start_date='2024-12-01',
    end_date='2025-05-31',
    budget=budget
)

print("Initial Value:", result['start_value'])
print("Final Value:", result['end_value'])
print("Total Gain:", result['total_gain'])
print("Percent Gain:", result['percent_gain'], "%")

SELL at END: CB @ 297.20, Size: 22
SELL at END: CMG @ 50.08, Size: 59
SELL at END: EA @ 143.78, Size: 45
SELL at END: EBAY @ 73.17, Size: 64
SELL at END: GRMN @ 202.97, Size: 11
SELL at END: IBM @ 259.06, Size: 33
SELL at END: LEG @ 9.06, Size: 392
SELL at END: MHK @ 100.61, Size: 3
SELL at END: ORLY @ 91.17, Size: 42
SELL at END: XL @ 0.10, Size: 10000
SELL at END: NoRisk @ 1.00, Size: 57199
Initial Value: 100000
Final Value: 98437.02999999997
Total Gain: -1562.9700000000303
Percent Gain: -1.5629700000000302 %


# Strategy LSTM