In [1]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

## Group Assignment
### Team Number: 09
### Team Member Names: Aryan Singh, Jack Smith, and Samyak Jain
### Team Strategy Chosen: Market Meet (Market Beat, Market Meet, Risk-Free)

# Stock Filter:
This cell filters the initial CSV file of tickers. We remove any stocks that don't trade in USD or CAD and exclude stocks with average daily volume below 5,000 shares between October 1, 2024 and September 30, 2025 excluding months with less that 18 trading days. This filter also removes any delisted or fake tickers from the list.

ADD Duplicate ticker remover and any cross listed companies(only keep one of them).

In [2]:
# dates per instructions for thr filtering of the list
start_date = '2024-10-01'
end_date = '2025-09-30'

# min. volume and min. trading days required
min_volume = 5000
min_trading_days=18

# ticker list
#tickers_df = pd.read_csv('Tickers_Example.csv', header=None, names=['Ticker'])
tickers_df = pd.DataFrame({
    'Ticker': [
        'AAPL', 'MSFT', 'NVDA', 'INTC', 'CSCO',
        'JPM', 'BAC', 'WFC', 'GS', 'MS',
        'CAT', 'BA', 'UPS', 'HON', 'GE',
        'XOM', 'CVX', 'COP', 'SLB', 'EOG',
        'AMZN', 'TSLA', 'HD', 'NKE', 'MCD',
        'GOOGL', 'META', 'DIS', 'NFLX', 'VZ',
        'LIN', 'APD', 'NEM', 'FCX', 'DOW',
         'WMT', 'PG', 'KO', 'PEP', 'COST',
        'JNJ', 'UNH', 'PFE', 'ABBV', 'MRK',
        'NEE', 'DUK', 'SO', 'D', 'AEP',
        'AMT', 'PLD', 'EQIX', 'SPG','SPG' ,'INVALIDTIC', 'ICICIBANK', 'GC=F']})

# converts the DataFrame column to a list
tickers = tickers_df['Ticker'].tolist()

# created a function called is_us_or_ca_ticker_by_currency which will check if a ticker trades in USD or CAD
def is_us_or_ca_ticker_by_currency(ticker):
    try:
        # get the currency info. of ticker
        currency = yf.Ticker(ticker).info.get('currency', '')
        # returns True if ticker trades in USD or CAD
        return currency in ['USD', 'CAD']
    except Exception:
        return False

# created a function called average_daily_volume_adjusted to calculate the avg daily volume with month filtering
def average_daily_volume_adjusted(df):
    df.index = pd.to_datetime(df.index)
    # count trading days in each month
    monthly_counts = df.groupby(df.index.to_period('M')).size()

    # keep months that have >= min_trading_days
    valid_months = monthly_counts[monthly_counts >= min_trading_days].index

    # filter out invalid months
    df_filtered = df[df.index.to_period('M').isin(valid_months)]
    
    if not df_filtered.empty:
        # calculates the average volume
        avg = df_filtered['Volume'].mean()
        
        # safety check in case avg becomes a Series
        if isinstance(avg, pd.Series):
            avg = avg.values[0]
            
        return avg
        
    else:
        return 0

# list to store tickers that pass all the filters
filtered_tickers = []

#set to store seen tickers
seen = set()

# created a for loop which will loop through each ticker to check if it meets the required instructions
for ticker in tickers:

    #removes a ticker if its already been seen before
    if ticker in seen:
        print(f"[DUPLICATE] {ticker}: Duplicate ticker skipped.")
        continue

    # removing ticker if it doesn’t trade in USD or CAD
    if not is_us_or_ca_ticker_by_currency(ticker):
        print(f"[REMOVED] {ticker}: Not USD or CAD currency.")
        continue

    # retrieving neccessary info.
    ticker_data = yf.Ticker(ticker)
    hist = ticker_data.history(start=start_date, end=end_date)

    # removing timezone information
    if isinstance(hist.index, pd.DatetimeIndex) and hist.index.tz is not None:
        hist.index = hist.index.tz_convert(None)

    # removing ticker if no historical data was returned
    if hist.empty:
        print(f"[DELISTED] {ticker}: No historical data returned.")
        continue

    # removing ticker if the Volume column is missing
    if 'Volume' not in hist.columns:
        print(f"[REMOVED] {ticker}: No Volume column in stock data.")
        continue

    # calculates the adjusted average daily volume
    avg_vol = average_daily_volume_adjusted(hist)

    # removing ticker if volume is below threshold
    if avg_vol < min_volume:
        print(f"[REMOVED] {ticker}: Avg. Volume {avg_vol:.2f} < {min_volume}.")
        continue

    # adds a ticker to seen if it passes all filters
    seen.add(ticker)
    # add ticker to list if it passses all checks
    filtered_tickers.append(ticker)

# display filtered tickers
display(pd.DataFrame(filtered_tickers, columns=['Filtered Tickers']))

[DUPLICATE] SPG: Duplicate ticker skipped.


HTTP Error 404: 


[REMOVED] INVALIDTIC: Not USD or CAD currency.


HTTP Error 404: 


[REMOVED] ICICIBANK: Not USD or CAD currency.
[REMOVED] GC=F: Avg. Volume 4468.72 < 5000.


Unnamed: 0,Filtered Tickers
0,AAPL
1,MSFT
2,NVDA
3,INTC
4,CSCO
5,JPM
6,BAC
7,WFC
8,GS
9,MS


### Optimal Benchmark Weighting On Different Sectors:

TSX Composite weighting comes from https://investingnews.com/daily/resource-investing/how-is-the-sptsx-composite-index-weighted/

S&P500 Weighting comes from: https://www.schwab.com/learn/story/stock-sector-outlook

Note that taking the equally weighted average of the weightings results in a total weight of 99.95%. To fix this, we simply added 0.05% to the Financial Services sector as it is the most correlated sector to the benchmark.

In the end, we want our portfolio weightings per sector to be similar to the benchmark. When discussing Industry Diversification in Topic 8 (Portfolio Approach) during class, we learned that companies in the same sector or industry tend to move together because they react similarly to market forces. This idea can be applied to reason that, if our final portfolio has similar weightings to the benchmark per sector, our portfolio would likely move in a similar way to the benchmark as well.

In this section, we created a dictionary of sectors and their weightings in the benchmark, so that the proper weightings can be applied to our final portfolio later.

In [3]:
# setting the benchmark weighting on different sectors
bench_weights = {
    'Financial Services': 0.2275,
    'Technology': 0.198,
    'Industrials': 0.1115,
    'Energy': 0.1035,
    'Consumer Cyclical': 0.0705,
    'Communication Services': 0.0635,
    'Basic Materials': 0.072,
    'Consumer Defensive': 0.0515,
    'Healthcare': 0.0495,
    'Utilities': 0.0315,
    'Real Estate': 0.021
}

### DataFrame To Store The Necessary Information (Ticker, Sector, Price, Currency, And Market Cap):

This section creates a DataFrame containing each stock's ticker, sector, current price (in its trading currency), trading currency, and its market cap in CAD. We get this information from yfinance. The currency and market cap data are important for later calculations involving our portfolio construction and weighting, as well as meeting the portfolio constraints outlined in the assignment instructions. We convert all market caps to CAD, as our portfolio constraints are set in CAD, and so we can accurately compare the market cap of both US and Canadian companies by having all market caps in CAD. Stocks with unknown sectors are removed since we cannot properly allocate them according to our benchmark weighting.

In [4]:
# initial budget
initial_capital = 1_000_000

# created a function called get_usd_to_cad_rate which will convert from USD -> CAD using the latest conversion rate
def get_usd_to_cad_rate():

    # necessary info. needed for conversion
    fx = yf.Ticker("CADUSD=X")
    fx_hist = fx.history(period="1d")
    rate_cadusd = fx_hist["Close"].iloc[-1]
    return 1 / rate_cadusd

# storing the USD -> CAD conversion rate
usd_to_cad = get_usd_to_cad_rate()

# created empty lists to store the tickers, sectors, prices, currencies, and market caps.
tickers_list = []
sectors_list = []
prices_list = []
currencies_list = []
market_caps_list = []

# created a for loop which will loop through the filtered tickers list
for ticker in filtered_tickers:
    
    try:

        # extracting necessary info.
        stock = yf.Ticker(ticker)
        info = stock.info or {}
        fast = stock.fast_info or {}

        # sector name
        sector = info.get('sector', 'Unknown')
        
        # if sector is not valid, skip the current ticker
        if sector == 'Unknown':
            print(f"[REMOVED] {ticker}: Unknown Sector '{sector}'")
            continue

        # getting the latest price data
        hist = stock.history(period='1d')

        # skip ticker if no price available
        if hist.empty or 'Close' not in hist.columns:
            print(f"[REMOVED] {ticker}: No Price Data.")
            continue

        # extract closing price
        price = float(hist['Close'].iloc[-1])

        # deciding which currency to use
        currency = fast.get('currency') or info.get('currency') or 'USD'
        if currency not in ['USD', 'CAD']:
            currency = 'USD'

        # getting the market cap
        market_cap = fast.get('market_cap', info.get('marketCap', 0))

        # if there are missing market caps, set default value to 0
        if market_cap is None or pd.isna(market_cap) or market_cap <= 0:
            market_cap = 0

        # convert USD market cap to CAD
        if currency == 'USD':
            market_cap_cad = market_cap * usd_to_cad
        else:
            market_cap_cad = market_cap

        # storing the results
        tickers_list.append(ticker)
        sectors_list.append(sector)
        prices_list.append(price)
        currencies_list.append(currency)
        market_caps_list.append(market_cap_cad)

    # handles any errors and exceptions
    except Exception as e:
        print(f"Skipped {ticker}: {e}")
        continue

# created a DataFrame called df which will store all the valid stocks and its info
df = pd.DataFrame({
    'Ticker': tickers_list,
    'Sector': sectors_list,
    'Price': prices_list,
    'Currency': currencies_list, 
    'Market Cap': market_caps_list
})

# printing results
print(df.to_markdown())
print(f"\nSector By Size:\n{df.groupby('Sector').size()}")


|    | Ticker   | Sector                 |   Price | Currency   |   Market Cap |
|---:|:---------|:-----------------------|--------:|:-----------|-------------:|
|  0 | AAPL     | Technology             |  271.49 | USD        |  5.67737e+12 |
|  1 | MSFT     | Technology             |  472.12 | USD        |  4.94509e+12 |
|  2 | NVDA     | Technology             |  178.88 | USD        |  6.14721e+12 |
|  3 | INTC     | Technology             |   34.5  | USD        |  2.31892e+11 |
|  4 | CSCO     | Technology             |   76.1  | USD        |  4.23692e+11 |
|  5 | JPM      | Financial Services     |  298.02 | USD        |  1.15475e+12 |
|  6 | BAC      | Financial Services     |   51.56 | USD        |  5.38146e+11 |
|  7 | WFC      | Financial Services     |   83.11 | USD        |  3.75161e+11 |
|  8 | GS       | Financial Services     |  774.03 | USD        |  3.30178e+11 |
|  9 | MS       | Financial Services     |  158.17 | USD        |  3.55792e+11 |
| 10 | CAT      | Industrial

### Removing The Lowest Market Cap Stocks:

This cell filters out the low market cap stocks by requiring a minimum market cap of $1B CAD. These stocks are excluded because they tend to have higher volatility and often do not correlate super well with the market. For our market meet strategy, we need stocks that move in correlation with the overall market, and large-cap stocks generally demonstrate stronger correlation with large indices. The code also ensures we maintain at least one small-cap stock (< 2B CAD).

In [5]:
# setting the minimum and maximum thresholds for small-cap and large-cap companies
min_small_cap = 1_000_000_000
limit_small_cap = 2_000_000_000
large_cap_cutoff = 10_000_000_000

# removing any penny stocks (market cap < min_small_cap)
df_filtered = df[df['Market Cap'] >= min_small_cap].copy()

# identifying the remaining small-cap stocks after the filter
small_caps = df_filtered[df_filtered['Market Cap'] < limit_small_cap]

# identifying the remaining large-cap stocks after the filter
large_caps = df_filtered[df_filtered['Market Cap'] > large_cap_cutoff]

# created an if statement which ensures that we still keep at least 1 small-cap stock
if len(small_caps) == 0:

    # find the original small-caps stocks before filtering
    original_small_caps = df[(df['Market Cap'] < limit_small_cap) & (df['Market Cap'] >= min_small_cap)]

    # if any exist, pick the in-between small-cap stock with highest market cap
    if not original_small_caps.empty:
        
        best_small_cap = original_small_caps.sort_values(by='Market Cap', ascending=False).iloc[0]

        # add the small cap stock back into the filtered dataframe
        df_filtered = pd.concat([df_filtered, pd.DataFrame([best_small_cap])], ignore_index=True)

# created an if statement which ensures that we still keep at least 1 large-cap stock
if len(large_caps) == 0:

    # find the original large-caps stocks before filtering
    original_large_caps = df[df['Market Cap'] > large_cap_cutoff]

    # if any exist, pick the in-between large-cap stock with highest market cap
    if not original_large_caps.empty:
        
        best_large_cap = original_large_caps.sort_values(by='Market Cap', ascending=False).iloc[0]

        # add the large cap stock back into the filtered dataframe
        df_filtered = pd.concat([df_filtered, pd.DataFrame([best_large_cap])], ignore_index=True)

# updating the DataFrame to reflect these changes
df = df_filtered.reset_index(drop=True)

# storting the small-cap and large-cap tickers we must keep
small_cap_keep = None
large_cap_keep = None

# looking for current small-caps inside df
original_small_caps = df[(df['Market Cap'] < limit_small_cap) & (df['Market Cap'] >= min_small_cap)]

# choosing the small-cap stock with the highest market cap among the stocks
if not original_small_caps.empty:
    small_cap_keep = original_small_caps.sort_values(by='Market Cap', ascending=False).iloc[0]['Ticker']

# looking for current large-caps inside df
original_large_caps = df[df['Market Cap'] > large_cap_cutoff]

# choosing the large-cap stock with the highest market cap among the stocks
if not original_large_caps.empty:
    large_cap_keep = original_large_caps.sort_values(by='Market Cap', ascending=False).iloc[0]['Ticker']
    
df

Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap
0,AAPL,Technology,271.48999,USD,5677369000000.0
1,MSFT,Technology,472.119995,USD,4945090000000.0
2,NVDA,Technology,178.880005,USD,6147210000000.0
3,INTC,Technology,34.5,USD,231891800000.0
4,CSCO,Technology,76.099998,USD,423691800000.0
5,JPM,Financial Services,298.019989,USD,1154748000000.0
6,BAC,Financial Services,51.560001,USD,538146000000.0
7,WFC,Financial Services,83.110001,USD,375161300000.0
8,GS,Financial Services,774.030029,USD,330178200000.0
9,MS,Financial Services,158.169998,USD,355792100000.0


### Calculating Annual Volatility:

In this section, we will calculate the Annual Volatility. The formula for such a calculation and its importance are outlined here: https://www.fool.com/investing/how-to-calculate/annualized-volatility/

Volatility is computed as the standard deviation of daily returns multiplied by the square root of the number of trading periods. In this section, we assume 251 trading days in the 1-year period. It is important that this would inflate the volatility of stocks that have not been trading for at least 1 year. However, we deemed this okay, as for our strategy, we wish to invest in well-established stocks that have a history of following the benchmark, so inflating the volatility of stocks that have traded for less than 1 year and then removing those stocks is in line with our strategy and goal. We also remove any stocks that have traded less than 60 total days in the past year, as again, we wish to only invest in well-established stocks. We wish to remove high volatility stocks, as high volatility could cause our portfolio to deviate significantly from the benchmark. We want to remove stocks with less than 60 trading days in the last year because with that low number of trading days, the volatility of the stock becomes statistically unreliable. 

In [6]:
# get today's date
end_date = datetime.now()

# get the date exactly 1 year ago
start_date = datetime(end_date.year - 1, end_date.month, end_date.day)

# number of trading days in a typical year
trading_days_per_year = 251

# minimum number of price days required to consider the data usable, in our case.
min_days_required = 60

# created a list to store the volatilities
volatilities = []

# created a for loop which will loop through each ticker in the DataFrame
for ticker in df['Ticker']:

    try:
        # extracting necessary info.
        stock = yf.Ticker(ticker)
        hist = stock.history(start=start_date, end=end_date)

        # remove timezone if present
        if isinstance(hist.index, pd.DatetimeIndex) and hist.index.tz is not None:
            hist.index = hist.index.tz_convert(None)

        # skipping the tickers with missing or too few data points
        if hist is None or hist.empty or len(hist) < min_days_required:
            volatilities.append(np.nan)
            continue

        # calculating daily returns
        returns = hist['Close'].pct_change().dropna()

        # skip the current ticker if returns cannot be calculated
        if returns.empty:
            volatilities.append(np.nan)
            continue
            
        # calculating the annualized volatility, per the website
        annual_vol = returns.std() * np.sqrt(trading_days_per_year)

        # add volatility to the list
        volatilities.append(annual_vol)

    # catch any errors or exceptions that arise
    except Exception:
        volatilities.append(np.nan)
        continue

# add the volatility column to DataFrame
df['Volatility'] = volatilities

# removes any unusable tickers
df = df.dropna(subset=['Volatility']).reset_index(drop=True)

# displaying DataFrame
df


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility
0,AAPL,Technology,271.48999,USD,5677369000000.0,0.32602
1,MSFT,Technology,472.119995,USD,4945090000000.0,0.243743
2,NVDA,Technology,178.880005,USD,6147210000000.0,0.498783
3,INTC,Technology,34.5,USD,231891800000.0,0.628142
4,CSCO,Technology,76.099998,USD,423691800000.0,0.235571
5,JPM,Financial Services,298.019989,USD,1154748000000.0,0.241974
6,BAC,Financial Services,51.560001,USD,538146000000.0,0.269122
7,WFC,Financial Services,83.110001,USD,375161300000.0,0.29208
8,GS,Financial Services,774.030029,USD,330178200000.0,0.309949
9,MS,Financial Services,158.169998,USD,355792100000.0,0.308098


### Before Filtering High Volatile Stocks:

In [7]:
# Sanity check to see how much tickers are here before we filter out the high volatile stocks
print("Before filtering:", len(df))
print(df.groupby("Sector").size())

Before filtering: 54
Sector
Basic Materials           5
Communication Services    5
Consumer Cyclical         5
Consumer Defensive        5
Energy                    5
Financial Services        5
Healthcare                5
Industrials               5
Real Estate               4
Technology                5
Utilities                 5
dtype: int64


### Removing Highest Volatility Stocks:

This section removes the top 25% most volatile stocks within each sector, keeping only the 75% with the lowest volatility. High-volatility stocks are more likely to cause our portfolio to deviate significantly from the benchmark. By filtering within each sector rather than across all eligible stocks, we can maintain sector diversification while still being able to weight sectors according to the benchmark weightings. Sectors with fewer than four stocks are left alone to preserve diversification within each sector. This approach balances risk reduction with our need for an adequate number of stocks in each sector.

In [8]:
# cutoff for how many stocks to keep in each sector
cutoff = 0.75

# created lists to store each sector’s filtered tickers and all sectors present in the DataFrame
filtered_rows = []
seen_sectors = []

# created a for loop which will loop through the DataFrame to find and store each unique sector
for i in range(len(df)):
    sector_name = df.iloc[i]['Sector']
    if sector_name not in seen_sectors:
        seen_sectors.append(sector_name)

# created a for loop which will loop through every sector that appears in the DataFrame
for sector in seen_sectors:

    # created a dataframe for just the current sector
    sector_df = df[df['Sector'] == sector].copy()

    # if a sector has fewer than 4 tickers, skip filtering and keep all of them
    if len(sector_df) < 4:
        filtered_rows.append(sector_df)
        continue

    # sort the sector by volatility (ascending order)
    sector_df = sector_df.sort_values(by='Volatility').reset_index(drop=True)

    # calculating how many tickers to keep
    keep_count = int(len(sector_df) * cutoff)

    # making sure at least one stock is kept even if keep_count = 0
    if keep_count < 1:
        keep_count = 1

    # selecting the lowest-volatility stocks
    sector_kept = sector_df.iloc[:keep_count]

    # adding these filtered stocks to the list
    filtered_rows.append(sector_kept)

# combining all the filtered sector dataframes back into the normal DataFrame (df)
df = pd.concat(filtered_rows, ignore_index=True)

# outputting DataFrame
df


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility
0,CSCO,Technology,76.099998,USD,423691800000.0,0.235571
1,MSFT,Technology,472.119995,USD,4945090000000.0,0.243743
2,AAPL,Technology,271.48999,USD,5677369000000.0,0.32602
3,JPM,Financial Services,298.019989,USD,1154748000000.0,0.241974
4,BAC,Financial Services,51.560001,USD,538146000000.0,0.269122
5,WFC,Financial Services,83.110001,USD,375161300000.0,0.29208
6,HON,Industrials,190.020004,USD,180436000000.0,0.251582
7,GE,Industrials,287.440002,USD,429517600000.0,0.307153
8,CAT,Industrials,550.429993,USD,364786600000.0,0.313092
9,XOM,Energy,117.080002,USD,703349500000.0,0.232547


### After Filtering High Volatile Stocks:

In [9]:
# Sanity check to see how much tickers are here after we filter out the high volatile stocks
print("After filtering:", len(df))
print(df.groupby("Sector").size())

After filtering: 33
Sector
Basic Materials           3
Communication Services    3
Consumer Cyclical         3
Consumer Defensive        3
Energy                    3
Financial Services        3
Healthcare                3
Industrials               3
Real Estate               3
Technology                3
Utilities                 3
dtype: int64


### Defining the Benchmark:

In this cell, we are constructing our benchmark as an equally-weighted average of daily returns from the S&P500 and TSX Composite indices over the past year, as outlined in the assignment. We calculate daily percentage returns for both indices and proceed with a simple arithemic average them to create a single benchmark return. This benchmark will serve as our target for correlation when selecting stocks to invest in. Aligning the dates between both indices ensures accurate return matching for correlation calculations. 

In [10]:
# getting the neccessary dates (time period is one year ago from current date)
end_date = datetime.now()
start_date = datetime(end_date.year - 1, end_date.month, end_date.day)

# index symbols for TSX & S&P500
tsx_symbol = "^GSPTSE"
spx_symbol = "^GSPC"

# converting the index symbols into tickers
tsx_ticker = yf.Ticker(tsx_symbol)
spx_ticker = yf.Ticker(spx_symbol)

# extracting 1 year of historical price data for both tickers
tsx_hist = tsx_ticker.history(start=start_date, end=end_date)
spx_hist = spx_ticker.history(start=start_date, end=end_date)

# fixing timezone issues in case of any errors that may arise relating to timestamps
if isinstance(tsx_hist.index, pd.DatetimeIndex) and tsx_hist.index.tz is not None:
    tsx_hist.index = tsx_hist.index.tz_convert(None)

# fixing timezone issues in case of any errors that may arise relating to timestamps
if isinstance(spx_hist.index, pd.DatetimeIndex) and spx_hist.index.tz is not None:
    spx_hist.index = spx_hist.index.tz_convert(None)

# calculating the daily percentage returns for each benchmark
tsx_returns = tsx_hist['Close'].pct_change()
spx_returns = spx_hist['Close'].pct_change()

# combining both benchmark returns into one DataFrame and removing any NaNs using .dropna()
benchmark_df = (pd.DataFrame({'TSX Return': tsx_returns,'S&P 500 Return': spx_returns})).dropna()

# calculating the simple average return, which will serve as our benchmark return
benchmark_df['Benchmark Return'] = (benchmark_df['TSX Return'] + benchmark_df['S&P 500 Return']) / 2

# stripping off the time component in DataFrame
benchmark_df.index = benchmark_df.index.strftime('%Y-%m-%d')

# outputting DataFrame
benchmark_df


Unnamed: 0_level_0,TSX Return,S&P 500 Return,Benchmark Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-11-22,0.002111,0.003468,0.002790
2024-11-25,-0.001332,0.003020,0.000844
2024-11-26,-0.000209,0.005722,0.002757
2024-11-27,0.003275,-0.003801,-0.000263
2024-11-29,0.004091,0.005608,0.004849
...,...,...,...
2025-11-17,-0.008254,-0.009162,-0.008708
2025-11-18,-0.001320,-0.008256,-0.004788
2025-11-19,0.008054,0.003754,0.005904
2025-11-20,-0.012279,-0.015567,-0.013923


### Calculating Stock-Benchmark Correlations:

This section calculates the correlation between each stock's daily returns and the benchmark returns over the past year. Correlation measures how closely a stock's price movements track with the benchmark. Stocks with higher correlation to the benchmark are more likely to produce returns similar to the market average. We require at least 20 overlapping trading days to ensure statistical significance of the correlation. This correlation data will be used to rank and select the best stocks for tracking the benchmark.

In [11]:
# created an empty list to store the correlations of each stock
correlations = []

# created a for loop which will loop through each stock in the DataFrame
for i in range(len(df)):
    
    ticker = df.iloc[i]['Ticker']
    
    try:

        # extracting necessary historical data
        stock = yf.Ticker(ticker)
        stock_hist = stock.history(start=start_date, end=end_date)

        # if no such data exists, store it as NaN and continue
        if stock_hist is None or stock_hist.empty:
            correlations.append(np.nan)
            continue

        # calculating the daily returns for the stock
        stock_returns = stock_hist['Close'].pct_change().dropna()

        # if no such returns exist, store as NaN and continue
        if stock_returns.empty:
            correlations.append(np.nan)
            continue

        # strip of the time component to match the benchmark_df index
        stock_returns.index = stock_returns.index.strftime('%Y-%m-%d')

        # list to hold common dates between both DataFrames
        dates = []

        # created a for loop which will loop through the stock dates and keep only the ones in the benchmark_df
        for x in stock_returns.index:
            if x in benchmark_df.index:
                dates.append(x)

        # if there are fewer than 20 overlapping dates, correlation is unreliable, and skip it
        if len(dates) < 20:            
            correlations.append(np.nan)
            continue

        # align the stock and benchmark returns using the common date list
        stock_aligned = stock_returns.loc[dates]
        bench_aligned = benchmark_df.loc[dates, 'Benchmark Return']

        # calculate the actual correlation using .corr
        corr_between_stock_and_bench = stock_aligned.corr(bench_aligned)

        # store the correlation value
        correlations.append(corr_between_stock_and_bench)

    # handle any exceptions or errors that may arise and set the default value to NaN
    except Exception:
        correlations.append(np.nan)

### Selecting Top Stocks by Correlation (Picking the Top 2 Stocks In Each Sector):

This section selects the top 2 stocks in each sector based on their correlation to the benchmark. By choosing highly correlated stocks, we maximize the probability that our portfolio will move with the benchmark. The sector-by-sector selection ensures that we maintain enough stocks in each sector to which we allocate weighting. Stocks with missing correlation data are removed.

In [12]:
# adding the correlation results into the DataFrame and removing any tickers that didn't have a correlation value 
df['Correlation'] = correlations
df = df.dropna(subset=['Correlation']).reset_index(drop=True)

# created a list to store all unique sectors
sector_lst = []

# created a for loop that will loop through the df and collect each new sector once
for i in range(len(df)):
    sector = df.iloc[i]['Sector']
    if sector not in sector_lst:
        sector_lst.append(sector)

# created a list to store the top tickers
stocks_chosen = []

# created a for loop to loop through each sector and choose the best 2 tickers
for sector in sector_lst:

    # extracting the rows of the current sector and removing any rows with missing correlation
    sector_df = df[df['Sector'] == sector].copy()
    sector_df = sector_df.dropna(subset=['Correlation'])

    # if the sector has no valid tickers, skip
    if len(sector_df) == 0:
        continue

    # sorting the tickers in the sector from highest to lowest correlation
    sector_df = sector_df.sort_values(by='Correlation', ascending=False).reset_index(drop=True)

    # if a sector has 2 or fewer valid tickers, keep all of them
    if len(sector_df) <= 2:
        chosen = sector_df
    else:
        # else, # take the top 2 strongest tickers
        chosen = sector_df.iloc[:2]

    # store the chosen tickers
    stocks_chosen.append(chosen)

# combine all the selected tickers into a single DataFrame
stocks_chosen_df = pd.concat(stocks_chosen, ignore_index=True)

# created a list to store and ensure at least one small-cap and one large-cap ticker
force_keep_tickers = []

# ensure the required small-cap stock is included
if small_cap_keep is not None:
    if small_cap_keep not in stocks_chosen_df['Ticker'].values:
        row_small_cap = df[df['Ticker'] == small_cap_keep]
        if not row_small_cap.empty:
            force_keep_tickers.append(row_small_cap.iloc[0])

# ensure the required large-cap stock is included
if large_cap_keep is not None:
    if large_cap_keep not in stocks_chosen_df['Ticker'].values:
        row_large_cap = df[df['Ticker'] == large_cap_keep]
        if not row_large_cap.empty:
            force_keep_tickers.append(row_large_cap.iloc[0])

# if we have any forced-kept tickers, append them to the portfolio DataFrame
if len(force_keep_tickers) > 0:
    stocks_chosen_df = pd.concat([stocks_chosen_df, pd.DataFrame(force_keep_tickers)], ignore_index=True)

# outputting the DataFrame
stocks_chosen_df


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility,Correlation
0,AAPL,Technology,271.48999,USD,5677369000000.0,0.32602,0.721702
1,CSCO,Technology,76.099998,USD,423691800000.0,0.235571,0.697324
2,JPM,Financial Services,298.019989,USD,1154748000000.0,0.241974,0.76674
3,BAC,Financial Services,51.560001,USD,538146000000.0,0.269122,0.717973
4,GE,Industrials,287.440002,USD,429517600000.0,0.307153,0.68958
5,CAT,Industrials,550.429993,USD,364786600000.0,0.313092,0.681159
6,EOG,Energy,106.970001,USD,82299530000.0,0.28246,0.551885
7,CVX,Energy,149.979996,USD,425812200000.0,0.245732,0.532973
8,AMZN,Consumer Cyclical,220.690002,USD,3324429000000.0,0.3499,0.694489
9,HD,Consumer Cyclical,343.320007,USD,481547200000.0,0.229134,0.536909


### Limiting to 25 Stocks

This cell caps the portfolio at 25 stocks by selecting those with the highest correlation to the benchmark if there are currently more than 25 eligable stocks. We limit the portfolio to 25 stocks as per the assignment outline. The stocks are sorted by correlation in descending order to prioritize higher correlation stocks(stocks that will likely follow the benchmark better).

In [13]:
# created an if statement to check if there more than 25 stocks were selected across all sectors, per instructions
if len(stocks_chosen_df) > 25:

    # sort all selected stocks by correlation (highest to lowest)
    stocks_chosen_df = stocks_chosen_df.sort_values(by="Correlation", ascending=False).reset_index(drop=True)
    
    # keeping only the top 25 highest-correlation stocks
    stocks_chosen_df = stocks_chosen_df.iloc[:25].reset_index(drop=True)

# x = number of stocks
x = len(stocks_chosen_df)

# outputting the number of stocks in the portfolio and the DataFrame itself
print("Number Of Stocks For Portfolio:", x)

stocks_chosen_df

Number Of Stocks For Portfolio: 22


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility,Correlation
0,AAPL,Technology,271.48999,USD,5677369000000.0,0.32602,0.721702
1,CSCO,Technology,76.099998,USD,423691800000.0,0.235571,0.697324
2,JPM,Financial Services,298.019989,USD,1154748000000.0,0.241974,0.76674
3,BAC,Financial Services,51.560001,USD,538146000000.0,0.269122,0.717973
4,GE,Industrials,287.440002,USD,429517600000.0,0.307153,0.68958
5,CAT,Industrials,550.429993,USD,364786600000.0,0.313092,0.681159
6,EOG,Energy,106.970001,USD,82299530000.0,0.28246,0.551885
7,CVX,Energy,149.979996,USD,425812200000.0,0.245732,0.532973
8,AMZN,Consumer Cyclical,220.690002,USD,3324429000000.0,0.3499,0.694489
9,HD,Consumer Cyclical,343.320007,USD,481547200000.0,0.229134,0.536909


Testing Code:

In [14]:
# THIS CODE WAS PRIMARILY USED FOR TESTING
# In this code, it calculates the sector weights based on the total market cap of the selected stocks. It does not affect the portfolio's 
# actual weighting or share calculations.

# sector_totals = {}
# total = 0

# for i in range(len(stocks_chosen_df)):
#     sector = stocks_chosen_df.iloc[i]["Sector"]
#     marketcap = stocks_chosen_df.iloc[i]["Market Cap"]

#     if pd.isna(marketcap) or marketcap <= 0:
#         continue

#     if sector not in sector_totals:
#         sector_totals[sector] = 0.0

#     sector_totals[sector] += marketcap
#     total += marketcap

# portfolio_sector_weights = {}

# for sector in sector_totals:
#     if total > 0:
#         portfolio_sector_weights[sector] = sector_totals[sector] / total
#     else:
#         portfolio_sector_weights[sector] = 0.0

# print("Portfolio Sector Weights:")

# for sector, weight in portfolio_sector_weights.items():
#     print(f"{sector}: {weight:.4f}")


### Portfolio Weight Calculation Using Benchmark Sector Targets:

This cell calculates initial portfolio weights for each stock based on the benchmark sector targets and market cap within each sector. Our weight formula multiplies the benchmark's sector weight by each stock's proportional share of its sector's total market cap. This approach ensures our portfolio's sector allocation is similar to the benchmark while distributing our investment within sectors based on company size. We also apply a minimum weight of (1/2n) and max weight of 15% for each stock in our portfolio.

In [15]:
# enforcing the min. and max. weight allowed per stock
# x = number of stocks
min_weight = 1 / (2 * x)
max_weight = 0.15

# created a dict to store the total market cap of each sector
sector_market_cap_chosen = {}

# created a list to store the initial calculated weights
initial_weights = [0] * x

# created a for loop which will loop through each sector and then calculate total market cap
for i in range(x):

    # retrieving neccessary info.
    stock_sector = stocks_chosen_df.iloc[i]["Sector"]
    cap = float(stocks_chosen_df.iloc[i]["Market Cap"])

    # create a new entry if needed
    if stock_sector not in sector_market_cap_chosen:
        sector_market_cap_chosen[stock_sector] = 0

    # add market cap to sector total
    sector_market_cap_chosen[stock_sector] += cap

# created a for loop to calculate the initial weights based on benchmark sector weights
for i in range(x):

    # retrieving neccessary info.
    row = stocks_chosen_df.iloc[i]
    stock_sector = row["Sector"]
    cap = row["Market Cap"]

    # benchmark weighting for the current sector
    sector_target = bench_weights.get(stock_sector, 0)

    # total sector market cap
    sector_cap_sum = sector_market_cap_chosen[stock_sector]

    # if a sector missing in benchmark or sector cap sum is zero, use equally-weighted method
    if sector_target == 0 or sector_cap_sum <= 0:
        weight = 1 / x
    else:
        # the weight is based on benchmark sector weight * stock's share of sector
        weight = sector_target * (cap / sector_cap_sum)

    # applying the min weight and max weight rules
    weight = max(min_weight, min(weight, max_weight))

    # store the weight
    initial_weights[i] = weight

# normalizing out the weights to make sure it adds to 1
total_weight = sum(initial_weights)
initial_weights = [weight_of_sector / total_weight for weight_of_sector in initial_weights]

# calculating actual sector totals after weighting
sector_weights_actual = {}

# created a for loop to retrieve the necessary info. and add the stock's weight to its sector's total weight
for i in range(x):
    sector = stocks_chosen_df.iloc[i]["Sector"]
    sector_weights_actual[sector] = sector_weights_actual.get(sector, 0) + initial_weights[i]

# created a for loop to apply the 40% sector cap, where needed
for sector, weight_of_sector in sector_weights_actual.items():

    # if the weighting of the sector is greater than 40%, scale it accordingly
    if weight_of_sector > 0.40:
        scale = 0.40 / weight_of_sector

        # created a for loop to apply the scaling to each stock within that sector
        for i in range(x):
            if stocks_chosen_df.iloc[i]["Sector"] == sector:
                initial_weights[i] *= scale

# another normalization to make sure weights sum to 1 again
total_weight = sum(initial_weights)
initial_weights = [weight_of_sector / total_weight for weight_of_sector in initial_weights]


### Enforcing Weight Constriants:

This cell implements the portfolio weight rules of 40% max sector weight and 15% max individual stock weight within our portfolio. We scale down any sector exceeding 40% weight and redistribute the excess weight across the portfolio. We also cap stocks weighting at 15% and redistrubute any excess weight across the portfolio.

In [16]:
# created a list to store the sectors present in the selected stocks
sectors_present = []

# created a for loop to loop through each selected stock and find the unique sectors
for i in range(len(stocks_chosen_df)):
    sector = stocks_chosen_df.iloc[i]["Sector"]

    if sector not in sectors_present:
        sectors_present.append(sector)

# dict to count how many stocks appear in each sector
sector_counts = {}

# created a for loop to fill the sector_counts dict
for i in range(len(stocks_chosen_df)):
    sector = stocks_chosen_df.iloc[i]["Sector"]

    # add new sector if needed
    if sector not in sector_counts:
        sector_counts[sector] = 0

    # increment for the given sector
    sector_counts[sector] += 1

# dict to store each sector’s benchmark weighting
sector_targets = {}

# created a for loop to assign each sector its benchmark weight. The loop will assign the value 0 if not in bench_weights
for sector in sectors_present:
    if sector in bench_weights:
        sector_targets[sector] = bench_weights[sector]
    else:
        sector_targets[sector] = 0

# calculating the total benchmark weighting for normalization
total_target = sum(sector_targets.values())

# created an if statement to check if total > 0, and the normalize benchmark weights so they add to 1
if total_target > 0:
    for sector in sector_targets:
        sector_targets[sector] = sector_targets[sector] / total_target
else:
    # if something breaks, use equally-weighted method
    equal_weight = 1 / len(sectors_present)
    for sector in sector_targets:
        sector_targets[sector] = equal_weight

# list to store the initial unadjusted weight per stock
initial_weights = []

# created a for loop to loop though each sector and assign each stock the same share of its sector’s total weight
for i in range(len(stocks_chosen_df)):
    sector = stocks_chosen_df.iloc[i]["Sector"]
    target_sector_weight = sector_targets.get(sector, 0)

    # divide sector weight equally among the stocks
    per_stock_weight = target_sector_weight / sector_counts[sector]
    initial_weights.append(per_stock_weight)

# normalize weights to make sure they sum to 1
total = sum(initial_weights)

if total > 0:
    initial_weights = [w / total for w in initial_weights]
else:
    # backup
    initial_weights = [1 / len(stocks_chosen_df)] * len(stocks_chosen_df)

# copy initial weights to start applying sector caps
weights = initial_weights[:]
x = len(stocks_chosen_df)

# created a function called apply_sector_cap which will enforce the 40% maximum sector weight rule
def apply_sector_cap(weights):

    # dict to store each sector’s current weight
    sector_weight_current = {}

    # created a for loop to sum the weights for each sector
    for i in range(x):
        sector = stocks_chosen_df.iloc[i]["Sector"]
        if sector not in sector_weight_current:
            sector_weight_current[sector] = 0
        sector_weight_current[sector] += weights[i]

    # created a for loop to loop through each sector and apply scaling if a sector goes above 40%
    for sector in sector_weight_current:
        if sector_weight_current[sector] > 0.40:
            factor = 0.40 / sector_weight_current[sector]

            # scale all stocks inside the sector
            for i in range(x):
                if stocks_chosen_df.iloc[i]["Sector"] == sector:
                    weights[i] *= factor
                    
    # normalizing the weights         
    total = sum(weights)
    return [w / total for w in weights]

# apply the sector cap once
weights = apply_sector_cap(weights)

# max weight per stock
max_stock_cap = 0.15

# created a while loop so it can loop until no stocks violate the 15% stock cap
while True:

    # list to store indices of stocks above the cap
    stocks_above_cap = []

    # created a for loop to loop through every stock weight and record any stock that exceeds the max allowed weight
    for i, w in enumerate(weights):
        if w > max_stock_cap:
            stocks_above_cap.append(i)

    # break out of loop if no violations occurred
    if len(stocks_above_cap) == 0:
        break

    # capping the weight of each violating stock to max by setting its weight equal to the max allowed limit
    for i in stocks_above_cap:
        weights[i] = max_stock_cap

    # calculates the remaining weight after capping
    total_capped = sum(weights[i] for i in stocks_above_cap)
    remaining = 1 - total_capped

    # uncapped stocks
    uncapped = []

    for z in range(x):
        if z not in stocks_above_cap:
            uncapped.append(z)

    # if no weight left or no uncapped stocks, normalize and break
    if remaining <= 0 or len(uncapped) == 0:
        total = sum(weights)
        weights = [w / total for w in weights]
        break

    # calculating the total weight of all stocks that are not capped
    sum_uncapped = sum(weights[z] for z in uncapped)

    # if uncapped stocks had no weight, use equally-weighted method
    if sum_uncapped == 0:
        equal_weight2 = remaining / len(uncapped)
        for z in uncapped:
            weights[z] = equal_weight2
    else:
        # if it hits the else case, scale weights proportionally
        for z in uncapped:
            weights[z] = (weights[z] / sum_uncapped) * remaining

# apply sector cap again after stock-level cap
weights = apply_sector_cap(weights)

# created a for loop to enforce the minimum weight
for i in range(x):
    if weights[i] < min_weight:
        weights[i] = min_weight

# normalize the weights
total = sum(weights)
weights = [w / total for w in weights]

# add weights to the dataframe
stocks_chosen_df["Weight"] = weights

# printing out the max stock weight and the max sector weight
print(f"Max. Stock Weight: {stocks_chosen_df['Weight'].max():.3f}")
print(f"Max. Sector Weight: {stocks_chosen_df.groupby('Sector')['Weight'].sum().max():.3f}")


Max. Stock Weight: 0.110
Max. Sector Weight: 0.219


#### Final Sector Weight Summary and Portfolio Weight Checks:


In [17]:
# dict to store the total weight of each sector
sector_weight_final = {}

# created a for loop to loop through every stock in the final portfolio
for i in range(len(stocks_chosen_df)):

    # sector of current stock
    stock_sector = stocks_chosen_df.iloc[i]["Sector"]

    # weight of current stock
    weight = stocks_chosen_df.iloc[i]["Weight"]
    
    # if this sector has not been added yet, set the value to 0
    if stock_sector not in sector_weight_final:
        
        sector_weight_final[stock_sector] = 0

    # adds the stock's weight to its sector total
    sector_weight_final[stock_sector] += weight

# printing sector-level final weights with the help of a for loop which will loop through the sector totals and print each one
print("Final Sector Weights:")
for stock_sector in sector_weight_final:
    print(f"{stock_sector} {sector_weight_final[stock_sector]:.4f}")

# printing the min. and max. weight in the portfolio as a sanity check
print(f"Minimum Weight In Portfolio: {stocks_chosen_df['Weight'].min():.4f}")
print(f"Maximum Weight In Portfolio: {stocks_chosen_df['Weight'].max():.4f}")


Final Sector Weights:
Technology 0.1907
Financial Services 0.2191
Industrials 0.1074
Energy 0.0997
Consumer Cyclical 0.0679
Communication Services 0.0612
Basic Materials 0.0693
Consumer Defensive 0.0496
Healthcare 0.0477
Utilities 0.0438
Real Estate 0.0438
Minimum Weight In Portfolio: 0.0219
Maximum Weight In Portfolio: 0.1095


# Building the Final Portfolio (Taken Into Account Currency Conversions & Fees):

This cell converts our portfolio weights into actual shares considering stock price, currency conversions, and trading fees. For USD stocks, we convert the prices to CAD using the current CADUSD exchange rate. Trading fees are calculated as the minimum of $2.15 USD flat or 0.001 USD per share as per assignment rules. The final output shows each stock's ticker, price, currency, share count, CAD value, and portfolio weight.

In [20]:
# setting the initial capital and fees
initial_capital = 1_000_000
fee_flat_usd = 2.15
fee_per_share_usd = 0.001

# setting the total budget to the initial capital
total_budget_cad = initial_capital

# extracting the conversion info. of "CADUSD=X"
cad_to_usd = yf.Ticker("CADUSD=X")
cad_to_usd_hist = cad_to_usd.history(period="1d")
cadusd = cad_to_usd_hist["Close"].iloc[-1]

# convers the flat trading fee from USD to CAD and subtract it from the total budget
fee_usd_flat = fee_flat_usd
fee_cad_flat = fee_usd_flat / cadusd
budget_for_stocks = total_budget_cad - fee_cad_flat

# x = number of selected stocks
x = len(stocks_chosen_df)

# created lists to store values before budget scaling
initial_shares_without_budget = []
cad_value_without_budget = []

# created a for loop to calculate the number of shares using raw weights
for i in range(x):

    # retrieving necessary info.
    row = stocks_chosen_df.iloc[i]
    price = row["Price"]
    currency = row["Currency"]
    weight = row["Weight"]

    # convert price into CAD if stock is in USD
    if currency == "USD":
        price_cad = price / cadusd
    else:
        price_cad = price

    # calculates the CAD dollars allocated based on weight
    value_in_cad = budget_for_stocks * weight

    # calculates the number of shares
    shares = value_in_cad / price_cad

    # storing the initial values (without scaling to budget)
    initial_shares_without_budget.append(shares)
    cad_value_without_budget.append(shares * price_cad)

# calculating the total value before adjusting for overspending
total_value_without_budget = sum(cad_value_without_budget)

# used an if statement to see if a stock was overspent on, scale the number of shares downward
if total_value_without_budget > 0:
    scaling_of_cost = budget_for_stocks / total_value_without_budget
else:
    scaling_of_cost = 1

# final shares and value list
shares_final = []
cad_value_final = []

# created a for loop to apply the scaling factor and recalculate everything
for i in range(x):
    
    row = stocks_chosen_df.iloc[i]
    price = row["Price"]
    currency  = row["Currency"]

    # convert price to CAD if needed
    if currency == "USD":
        price_cad = price / cadusd
    else:
        price_cad = price

    # final number of shares
    shares = initial_shares_without_budget[i] * scaling_of_cost

    # convert value of fees to CAD
    value_in_cad = shares * price_cad

    # storing the final results
    shares_final.append(shares)
    cad_value_final.append(value_in_cad)

# calculates the total portfolio value in CAD
total_value_cad = sum(cad_value_final)

# total number of shares purchased across the entire portfolio
total_shares = sum(shares_final)

# calculates the USD fee (whichever is cheaper) and converts it to CAD
fee_usd = min(fee_flat_usd, fee_per_share_usd * total_shares)
total_fees_cad = fee_usd / cadusd

# calculating the total money spent in CAD (should be 1 million)
total_spent_cad = total_value_cad + total_fees_cad

# printing those total values out
print(f"Total Invested (CAD): ${total_value_cad:.2f}")
print(f"Total Fees (CAD): ${total_fees_cad:.2f}")
print(f"Total Spent (CAD): ${total_spent_cad:.2f}")

# adding the numbers of shares and the portfolio value of each stock to the main DataFrame
stocks_chosen_df["Shares"] = shares_final
stocks_chosen_df["Value"] = cad_value_final

# constructing the final portfolio
Portfolio_Final = stocks_chosen_df[["Ticker", "Price", "Currency", "Shares", "Value", "Weight"]].copy()
Portfolio_Final.index = range(1, len(Portfolio_Final) + 1)

# outputting the total weight of portfolio to ensure it adds to 1
print(f"Total Weight Of Portfolio: {Portfolio_Final['Weight'].sum():.3f}")

# Required For TA
# Stocks_Final = Portfolio_Final[["Ticker", "Shares"]].copy()
# Stocks_Final.to_csv("Stocks_Group_09.csv", index=False)

# outputting the final portfolio
Portfolio_Final

Total Invested (CAD): $999996.97
Total Fees (CAD): $3.03
Total Spent (CAD): $1000000.00
Total Weight Of Portfolio: 1.000


Unnamed: 0,Ticker,Price,Currency,Shares,Value,Weight
1,AAPL,271.48999,USD,249.208937,95337.859554,0.095338
2,CSCO,76.099998,USD,889.063512,95337.859554,0.095338
3,JPM,298.019989,USD,260.848444,109542.237619,0.109543
4,BAC,51.560001,USD,1507.720102,109542.237619,0.109543
5,GE,287.440002,USD,132.550052,53687.734042,0.053688
6,CAT,550.429993,USD,69.218952,53687.734042,0.053688
7,EOG,106.970001,USD,330.621121,49835.699312,0.049836
8,CVX,149.979996,USD,235.808392,49835.699312,0.049836
9,AMZN,220.690002,USD,109.158787,33946.056053,0.033946
10,HD,343.320007,USD,70.168509,33946.056053,0.033946


### Performance Tests (NOT THE FINAL PORTFOLIO)

In [19]:
# edit these dates accordingly for the contest dates
contest_start = "2025-11-12"
contest_end = "2025-11-20"

# getting the latest data for the "CADUSD=X" ticker
cadusd_hist = yf.Ticker("CADUSD=X").history(period="1d")
cadusd_end = cadusd_hist["Close"].iloc[-1]

# created a list to store the final CAD value of each value at the end date
final_values_cad = []

# created a for loop to loop through each stock in the final portfolio
for i in range(len(Portfolio_Final)):

    # retrieving necessary info.
    row = Portfolio_Final.iloc[i]
    ticker = row["Ticker"]
    shares = row["Shares"]
    currency = row["Currency"]

    # extracting the historical prices for the test window
    hist = yf.Ticker(ticker).history(start=contest_start, end=contest_end)

    # skip if no data is returned
    if hist.empty or "Close" not in hist.columns:
        continue

    # closing price on the last day
    end_price = hist["Close"].iloc[-1]

    # converting the price to CAD
    if currency == "USD":
        price_cad_end = end_price / cadusd_end
    else:
        price_cad_end = end_price

    # add the CAD value to the list
    final_values_cad.append(shares * price_cad_end)

# total portfolio value at the end of the contest
final_value = sum(final_values_cad)

# portfolio total return over the contest window
portfolio_return = (final_value - initial_capital) / initial_capital

# tickers
tsx_symbol = "^GSPTSE"
spx_symbol = "^GSPC"

# extracting ticker data of the S&P and TSX
tsx_hist = yf.Ticker(tsx_symbol).history(start=contest_start, end=contest_end)
spx_hist = yf.Ticker(spx_symbol).history(start=contest_start, end=contest_end)

# calculate TSX total return over the period
tsx_return = tsx_hist["Close"].iloc[-1] / tsx_hist["Close"].iloc[0] - 1

# calculate S&P total return over the period
spx_return = spx_hist["Close"].iloc[-1] / spx_hist["Close"].iloc[0] - 1

# benchmark return (average between the 2)
benchmark_return = (tsx_return + spx_return) / 2

# printing out the results
print("===== MARKET MEET PERFORMANCE CHECKS =====")
print(f"Initial Capital (CAD): {initial_capital:,.2f}")
print(f"Final Value (CAD): {final_value:,.2f}")
print(f"Portfolio Return: {portfolio_return:.4%}")
print(f"Benchmark Return: {benchmark_return:.4%}")

# difference between our portfolio and the benchmark
difference = portfolio_return - benchmark_return
print(f"Difference vs. Benchmark: {difference:.4%}")

===== MARKET MEET PERFORMANCE CHECKS =====
Initial Capital (CAD): 1,000,000.00
Final Value (CAD): 1,005,149.19
Portfolio Return: 0.5149%
Benchmark Return: -2.4143%
Difference vs. Benchmark: 2.9293%


## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Jack Smith, Aryan Singh, Samyak Jain