In [1]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

## Group Assignment
### Team Number: 09
### Team Member Names: Aryan Singh, Jack Smith, and Samyak Jain
### Team Strategy Chosen: Market Meet (Market Beat, Market Meet, Risk-Free)

### Ticker Data:

In [2]:
# sample DataFrame of tickers for testing

# tickers_df = pd.DataFrame({
#      'Tickers': ['AAPL', 'MSFT', 'GOOGL', 'JPM', 'BAC', 'C', 'WFC', 'GS',
#                  'XOM', 'CVX', 'SLB', 'COP', 'AMZN', 'TSLA', 'HD', 'NKE',
#                  'JNJ', 'PFE', 'UNH', 'ABBV', 'KO', 'PEP', 'WMT', 'COST',
#                  'NEE', 'DUK', 'SO', 'D', 'AMT', 'PLD', 'EQIX', 'SPG',
#                  'FCX', 'NEM', 'LIN', 'APD', 'TD.TO', 'AZI', 'DML.TO', 'ORANY']
#  })

# tickers_df = pd.read_csv('ADD CSV FILE FOR WHENEVER WE GET IT HERE')

# AZI, ORANY, and DML.TO are stocks for testing


# tickers_df = pd.read_csv('Tickers_Example.csv', header=None, names=['Ticker'])
# tickers = tickers_df['Ticker'].tolist()

# tickers_df

In [3]:
start_date = '2024-10-01'
end_date = '2025-09-30'
min_volume = 5000
min_trading_days=18

tickers_df = pd.read_csv('Tickers_Example.csv', header=None, names=['Ticker'])
tickers = tickers_df['Ticker'].tolist()

def is_us_or_ca_ticker_by_currency(ticker):
    try:
        currency = yf.Ticker(ticker).info.get('currency', '')
        return currency in ['USD', 'CAD']
    except Exception:
        return False

def average_daily_volume_adjusted(df):
    df.index = pd.to_datetime(df.index)
    monthly_counts = df.groupby(df.index.to_period('M')).size()
    valid_months = monthly_counts[monthly_counts >= min_trading_days].index
    df_filtered = df[df.index.to_period('M').isin(valid_months)]
    if not df_filtered.empty:
        avg = df_filtered['Volume'].mean()
        # If avg is a series for some reason (it happeded idk how) extract just the number
        if isinstance(avg, pd.Series):
            avg = avg.values[0]
        return avg
    else:
        return 0

filtered_tickers = []

for ticker in tickers:

    if not is_us_or_ca_ticker_by_currency(ticker):
        print(f"[REMOVED] {ticker}: Not USD or CAD currency.")
        continue

    ticker_data = yf.Ticker(ticker)
    hist = ticker_data.history(start=start_date, end=end_date)

    if isinstance(hist.index, pd.DatetimeIndex) and hist.index.tz is not None:
        hist.index = hist.index.tz_convert(None)

    if hist.empty:
        print(f"[DELISTED] {ticker}: No historical data returned.")
        continue

    if 'Volume' not in hist.columns:
        print(f"[REMOVED] {ticker}: No Volume column in stock data.")
        continue

    avg_vol = average_daily_volume_adjusted(hist)

    if avg_vol < min_volume:
        print(f"[REMOVED] {ticker}: Avg volume {avg_vol:.2f} < {min_volume}.")
        continue

    filtered_tickers.append(ticker)

# Display filtered tickers
display(pd.DataFrame(filtered_tickers, columns=['Filtered Tickers']))

[REMOVED] AGN: Not USD or CAD currency.
[REMOVED] CELG: Not USD or CAD currency.
[REMOVED] MON: Not USD or CAD currency.
[REMOVED] RTN: Not USD or CAD currency.


Unnamed: 0,Filtered Tickers
0,AAPL
1,ABBV
2,ABT
3,ACN
4,AIG
5,AMZN
6,AXP
7,BA
8,BAC
9,BB.TO


### Optimal Benchmark Weighting On Different Sectors:

In [4]:
# what the benchmark weighting on different sectors should be

bench_weights = {
    'Financials': 0.229,
    'Information Technology': 0.198,
    'Industrials':0.1115,
    'Energy': 0.0875,
    'Consumer Discretionary': 0.0705,
    'Communication Services': 0.0635,
    'Materials': 0.0625,
    'Consumer Staples': 0.0515,
    #'Health Care': 0.0495,
    'Healthcare': 0.0495,  # Handle both naming conventions, if needed.
    'Utilities': 0.019,
    'Real Estate': 0.0105}
    
sector_mapping = {
    'Financial Services': 'Financials',
    'Technology': 'Technology',
    'Industrials': 'Industrials',
    'Energy': 'Energy',
    'Consumer Cyclical': 'Consumer Discretionary',
    'Communication Services': 'Communication Services',
    'Basic Materials': 'Materials',
    'Consumer Defensive': 'Consumer Staples',
    'Healthcare': 'Health Care',
    'Utilities': 'Utilities',
    'Real Estate': 'Real Estate'}


### DataFrame To Store The Neccesary Infomation (Ticker, Sector, Price, Currency, And Market Cap):

In [5]:
initial_capital = 1_000_000

def get_usd_to_cad_rate():
    
        fx = yf.Ticker("CADUSD=X")
        fx_hist = fx.history(period="1d")
        rate_cadusd = fx_hist["Close"].iloc[-1]
        return 1 / rate_cadusd

usd_to_cad = get_usd_to_cad_rate()

tickers_list = []
sectors_list = []
prices_list = []
currencies_list = []
market_caps_list = []

for ticker in filtered_tickers:
    
    try:
        stock = yf.Ticker(ticker)
        
        info = stock.info or {}
        fast = stock.fast_info or {}
        yf_sector = info.get('sector')
        sector = sector_mapping.get(yf_sector, 'Unknown')
        
        if sector == 'Health Care':
            sector = 'Healthcare'
        
        if sector == 'Unknown':
            print(f"[REMOVED] {ticker}: Unknown Sector '{yf_sector}'")
            continue
        
        hist = stock.history(period='1d')
        
        if hist.empty or 'Close' not in hist.columns:
            print(f"[REMOVED] {ticker}: No Price Data.")
            continue

        price = float(hist['Close'].iloc[-1])
        
        currency = fast.get('currency') or info.get('currency') or 'USD'
        if currency not in ['USD', 'CAD']:
            currency = 'USD'
        
        market_cap = fast.get('market_cap', info.get('marketCap', 0))
        
        if market_cap is None or pd.isna(market_cap) or market_cap <= 0:
            market_cap = 0
        
        if currency == 'USD':
            market_cap_cad = market_cap * usd_to_cad
        else:
            market_cap_cad = market_cap
        tickers_list.append(ticker)
        sectors_list.append(sector)
        prices_list.append(price)
        currencies_list.append(currency)
        market_caps_list.append(market_cap_cad)
    
    except Exception as e:
        print(f"Skipped {ticker}: {e}")
        continue

df = pd.DataFrame({
    'Ticker': tickers_list,
    'Sector': sectors_list,
    'Price': prices_list,
    'Currency': currencies_list, 
    'Market Cap': market_caps_list
})

print(df.to_markdown())
print(f"\nSector By Size:\n{df.groupby('Sector').size()}")


|    | Ticker   | Sector                 |    Price | Currency   |   Market Cap |
|---:|:---------|:-----------------------|---------:|:-----------|-------------:|
|  0 | AAPL     | Technology             |  266.25  | USD        |  5.57016e+12 |
|  1 | ABBV     | Healthcare             |  229.45  | USD        |  5.71679e+11 |
|  2 | ABT      | Healthcare             |  123.97  | USD        |  3.04168e+11 |
|  3 | ACN      | Technology             |  240.79  | USD        |  2.11425e+11 |
|  4 | AIG      | Financials             |   75.69  | USD        |  5.91132e+10 |
|  5 | AMZN     | Consumer Discretionary |  217.14  | USD        |  3.27235e+12 |
|  6 | AXP      | Financials             |  343.8   | USD        |  3.37267e+11 |
|  7 | BA       | Industrials            |  179.38  | USD        |  1.9221e+11  |
|  8 | BAC      | Financials             |   51     | USD        |  5.32528e+11 |
|  9 | BB.TO    | Technology             |    5.84  | CAD        |  3.47206e+09 |
| 10 | BIIB     

### Calculating Annual Volatility:

In [6]:
# calculating annual volatility

end_date = datetime.now()
start_date = datetime(end_date.year - 1, end_date.month, end_date.day)

trading_days_per_year = 251
min_days_required = 60

volatilities = []

for ticker in df['Ticker']:

    try:
        stock = yf.Ticker(ticker)
        hist = stock.history(start=start_date, end=end_date)

        if isinstance(hist.index, pd.DatetimeIndex) and hist.index.tz is not None:
            hist.index = hist.index.tz_convert(None)

        if hist is None or hist.empty or len(hist) < min_days_required:
            volatilities.append(np.nan)
            continue

        returns = hist['Close'].pct_change().dropna()

        if returns.empty:
            volatilities.append(np.nan)
            continue

        annual_vol = returns.std() * np.sqrt(trading_days_per_year)
        volatilities.append(annual_vol)

    except Exception:
        volatilities.append(np.nan)
        continue

df['Volatility'] = volatilities

# Remove unusable tickers
df = df.dropna(subset=['Volatility']).reset_index(drop=True)

df


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility
0,AAPL,Technology,266.25,USD,5570161000000.0,0.325486
1,ABBV,Healthcare,229.449997,USD,571678600000.0,0.258683
2,ABT,Healthcare,123.970001,USD,304167800000.0,0.216239
3,ACN,Technology,240.789993,USD,211425100000.0,0.281143
4,AIG,Financials,75.690002,USD,59113150000.0,0.238536
5,AMZN,Consumer Discretionary,217.139999,USD,3272346000000.0,0.350289
6,AXP,Financials,343.799988,USD,337267500000.0,0.315998
7,BA,Industrials,179.380005,USD,192209600000.0,0.368149
8,BAC,Financials,51.0,USD,532527800000.0,0.269042
9,BB.TO,Technology,5.84,CAD,3472060000.0,0.608878


### Before Filtering High Volatile Stocks:

In [7]:
print("Before filtering:", len(df))
print(df.groupby("Sector").size())

Before filtering: 37
Sector
Communication Services     1
Consumer Discretionary     1
Consumer Staples           6
Financials                10
Healthcare                 8
Industrials                5
Technology                 6
dtype: int64


In [8]:

# I have already created the dataframe with each stock, its secor, its currency, market_cap, and annual volatility
#use this dataframe throughout the rest of the code. It is okay to manupulate the dataframe to remove stocks
#in which are not deemed fit to invest in.

# get rid of stocks with high volatility. i.e. get rid of the stocks with the highest std within each sector. I'm thinking maybe the top 25%

cutoff = 0.75

filtered_rows = []
seen_sectors = []

for i in range(len(df)):
    sector_name = df.iloc[i]['Sector']
    if sector_name not in seen_sectors:
        seen_sectors.append(sector_name)

for sector in seen_sectors:
    
    sector_df = df[df['Sector'] == sector].copy()
    if len(sector_df) < 4:
        filtered_rows.append(sector_df)
        continue
    
    sector_df = sector_df.sort_values(by='Volatility').reset_index(drop=True)
        
    keep_count = int(len(sector_df) * cutoff)
    
    if keep_count < 1:
        keep_count = 1
    
    sector_kept = sector_df.iloc[:keep_count]
    filtered_rows.append(sector_kept)

df = pd.concat(filtered_rows, ignore_index=True)

df


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility
0,ACN,Technology,240.789993,USD,211425100000.0,0.281143
1,AAPL,Technology,266.25,USD,5570161000000.0,0.325486
2,QCOM,Technology,159.589996,USD,242750400000.0,0.384188
3,TXN,Technology,153.330002,USD,196512100000.0,0.394256
4,ABT,Healthcare,123.970001,USD,304167800000.0,0.216239
5,ABBV,Healthcare,229.449997,USD,571678600000.0,0.258683
6,PFE,Healthcare,24.4,USD,195572200000.0,0.263565
7,BMY,Healthcare,45.939999,USD,131840500000.0,0.279708
8,MRK,Healthcare,94.970001,USD,334406000000.0,0.295656
9,BIIB,Healthcare,168.179993,USD,34781150000.0,0.319822


### After Filtering High Volatile Stocks:

In [9]:
print("After filtering:", len(df))
print(df.groupby("Sector").size())

After filtering: 26
Sector
Communication Services    1
Consumer Discretionary    1
Consumer Staples          4
Financials                7
Healthcare                6
Industrials               3
Technology                4
dtype: int64


In [10]:
# create the benchmark. Basically, create a dataframe that consists of the closing returns of the benchmark within the past year(1 year because we are going to 
#track the correlation between the benchmark and the stocks back by one year later. The benchmark is just an equally weighted average of the
#TSX composite and the S&P500 returns.

end_date = datetime.now()
start_date = datetime(end_date.year - 1, end_date.month, end_date.day)

tsx_symbol = "^GSPTSE"
spx_symbol = "^GSPC"

tsx_ticker = yf.Ticker(tsx_symbol)
spx_ticker = yf.Ticker(spx_symbol)

tsx_hist = tsx_ticker.history(start=start_date, end=end_date)
spx_hist = spx_ticker.history(start=start_date, end=end_date)

if isinstance(tsx_hist.index, pd.DatetimeIndex) and tsx_hist.index.tz is not None:
    tsx_hist.index = tsx_hist.index.tz_convert(None)
    
if isinstance(spx_hist.index, pd.DatetimeIndex) and spx_hist.index.tz is not None:
    spx_hist.index = spx_hist.index.tz_convert(None)
    
tsx_returns = tsx_hist['Close'].pct_change()
spx_returns = spx_hist['Close'].pct_change()

benchmark_df = (pd.DataFrame({'TSX Return': tsx_returns,'S&P 500 Return': spx_returns})).dropna()

benchmark_df['Benchmark Return'] = (benchmark_df['TSX Return'] + benchmark_df['S&P 500 Return']) / 2
benchmark_df.index = benchmark_df.index.strftime('%Y-%m-%d')

benchmark_df


Unnamed: 0_level_0,TSX Return,S&P 500 Return,Benchmark Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-11-21,0.014147,0.005340,0.009744
2024-11-22,0.002111,0.003468,0.002790
2024-11-25,-0.001332,0.003020,0.000844
2024-11-26,-0.000209,0.005722,0.002757
2024-11-27,0.003275,-0.003801,-0.000263
...,...,...,...
2025-11-14,0.002410,-0.000502,0.000954
2025-11-17,-0.008254,-0.009162,-0.008708
2025-11-18,-0.001320,-0.008256,-0.004788
2025-11-19,0.008054,0.003754,0.005904


In [11]:
# implement a market_cap check. Basically get rid of any pennystocks as they don't tend to always follow the market very well.

min_small_cap = 1_000_000_000
limit_small_cap = 2_000_000_000
large_cap_cutoff = 10_000_000_000

df_filtered = df[df['Market Cap'] >= min_small_cap].copy()

small_caps = df_filtered[df_filtered['Market Cap'] < limit_small_cap]

large_caps = df_filtered[df_filtered['Market Cap'] > large_cap_cutoff]

if len(small_caps) == 0:
    
    original_small_caps = df[(df['Market Cap'] < limit_small_cap) & (df['Market Cap'] >= min_small_cap)]
    
    if not original_small_caps.empty:
        
        best_small_cap = original_small_caps.sort_values(by='Market Cap', ascending=False).iloc[0]

        df_filtered = pd.concat([df_filtered, pd.DataFrame([best_small_cap])], ignore_index=True)

if len(large_caps) == 0:
    
    original_large_caps = df[df['Market Cap'] > large_cap_cutoff]
    
    if not original_large_caps.empty:
        
        best_large_cap = original_large_caps.sort_values(by='Market Cap', ascending=False).iloc[0]
        
        df_filtered = pd.concat([df_filtered, pd.DataFrame([best_large_cap])], ignore_index=True)

df = df_filtered.reset_index(drop=True)

df


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility
0,ACN,Technology,240.789993,USD,211425100000.0,0.281143
1,AAPL,Technology,266.25,USD,5570161000000.0,0.325486
2,QCOM,Technology,159.589996,USD,242750400000.0,0.384188
3,TXN,Technology,153.330002,USD,196512100000.0,0.394256
4,ABT,Healthcare,123.970001,USD,304167800000.0,0.216239
5,ABBV,Healthcare,229.449997,USD,571678600000.0,0.258683
6,PFE,Healthcare,24.4,USD,195572200000.0,0.263565
7,BMY,Healthcare,45.939999,USD,131840500000.0,0.279708
8,MRK,Healthcare,94.970001,USD,334406000000.0,0.295656
9,BIIB,Healthcare,168.179993,USD,34781150000.0,0.319822


In [12]:
# Pick the top 1-3 stocks in each sector with the best correlation to the benchmark over the past year.

correlations = []

for i in range(len(df)):
    
    ticker = df.iloc[i]['Ticker']
    
    try:
        
        stock = yf.Ticker(ticker)
        stock_hist = stock.history(start=start_date, end=end_date)

        if stock_hist is None or stock_hist.empty:
            
            correlations.append(np.nan)
            
            continue

        stock_returns = stock_hist['Close'].pct_change().dropna()

        if stock_returns.empty:
            
            correlations.append(np.nan)
            
            continue

        stock_returns.index = stock_returns.index.strftime('%Y-%m-%d')
        
        dates = []
        
        for x in stock_returns.index:
            
            if x in benchmark_df.index:
                
                dates.append(x)

        if len(dates) < 20:
            
            correlations.append(np.nan)
            
            continue

        stock_aligned = stock_returns.loc[dates]
        bench_aligned = benchmark_df.loc[dates, 'Benchmark Return']

        corr_between_stock_and_bench = stock_aligned.corr(bench_aligned)
        
        correlations.append(corr_between_stock_and_bench)

    except Exception:
        
        correlations.append(np.nan)

In [13]:

df['Correlation'] = correlations
df = df.dropna(subset=['Correlation']).reset_index(drop=True)

sector_lst = []

for i in range(len(df)):
    
    sector = df.iloc[i]['Sector']
    
    if sector not in sector_lst:
        
        sector_lst.append(sector)

stocks_chosen = []

for sector in sector_lst:
    
    sector_df = df[df['Sector'] == sector].copy()
    sector_df = sector_df.dropna(subset=['Correlation'])
    
    if len(sector_df) == 0:
        
        continue
    
    sector_df = sector_df.sort_values(by='Correlation', ascending=False).reset_index(drop=True)
    
    if len(sector_df) <= 3:
        
        chosen = sector_df
        
    else:
        
        chosen = sector_df.iloc[:3]
    
    stocks_chosen.append(chosen)

stocks_chosen_df = pd.concat(stocks_chosen, ignore_index=True)

stocks_chosen_df


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility,Correlation
0,QCOM,Technology,159.589996,USD,242750400000.0,0.384188,0.732896
1,AAPL,Technology,266.25,USD,5570161000000.0,0.325486,0.71902
2,TXN,Technology,153.330002,USD,196512100000.0,0.394256,0.604149
3,PFE,Healthcare,24.4,USD,195572200000.0,0.263565,0.356598
4,BIIB,Healthcare,168.179993,USD,34781150000.0,0.319822,0.354713
5,ABBV,Healthcare,229.449997,USD,571678600000.0,0.258683,0.323199
6,BLK,Financials,992.52002,USD,217083300000.0,0.273432,0.794809
7,BK,Financials,106.459999,USD,105841700000.0,0.237916,0.741939
8,USB,Financials,46.490002,USD,101989400000.0,0.272268,0.7369
9,AMZN,Consumer Discretionary,217.139999,USD,3272346000000.0,0.350289,0.687248


In [14]:

if len(stocks_chosen_df) > 25:
    
    stocks_chosen_df = stocks_chosen_df.sort_values(by="Correlation", ascending=False).reset_index(drop=True)
    stocks_chosen_df = stocks_chosen_df.iloc[:25].reset_index(drop=True)

x = len(stocks_chosen_df)

print("Number Of Stocks For Portfolio:", x)

Number Of Stocks For Portfolio: 17


In [15]:
sector_totals = {}
total = 0

for i in range(len(stocks_chosen_df)):
    sector = stocks_chosen_df.iloc[i]["Sector"]
    marketcap = stocks_chosen_df.iloc[i]["Market Cap"]

    if pd.isna(marketcap) or marketcap <= 0:
        continue

    if sector not in sector_totals:
        sector_totals[sector] = 0.0

    sector_totals[sector] += marketcap
    total += marketcap

portfolio_sector_weights = {}

for sector in sector_totals:
    if total > 0:
        portfolio_sector_weights[sector] = sector_totals[sector] / total
    else:
        portfolio_sector_weights[sector] = 0.0

print("Portfolio Sector Weights:")

for sector, weight in portfolio_sector_weights.items():
    print(f"{sector}: {weight:.4f}")


Portfolio Sector Weights:
Technology: 0.4906
Healthcare: 0.0655
Financials: 0.0347
Consumer Discretionary: 0.2671
Industrials: 0.0572
Consumer Staples: 0.0825
Communication Services: 0.0024


In [16]:

stocks_chosen_df['Sector'] = stocks_chosen_df['Sector'].replace({
    'Technology': 'Information Technology',
    'Health Care': 'Healthcare'})

min_weight = 1 / (2 * x)
max_weight = 0.15

sector_market_cap_chosen = {}
initial_weights = [0] * x

for i in range(x):
    
    stock_sector = stocks_chosen_df.iloc[i]["Sector"]
    cap = float(stocks_chosen_df.iloc[i]["Market Cap"])

    if stock_sector not in sector_market_cap_chosen:
        sector_market_cap_chosen[stock_sector] = 0.0
        
    sector_market_cap_chosen[stock_sector] += cap

for i in range(x):
    row = stocks_chosen_df.iloc[i]
    stock_sector = row["Sector"]
    cap = row["Market Cap"]

    sector_target = bench_weights.get(stock_sector, 0)
    sector_cap_sum = sector_market_cap_chosen[stock_sector]

    if sector_target == 0 or sector_cap_sum <= 0:
        
        weight = 1 / x
        
    else:
        weight = sector_target * (cap / sector_cap_sum)

    weight = max(min_weight, min(weight, max_weight))

    initial_weights[i] = weight

total_weight = sum(initial_weights)
initial_weights = [weight_of_sector / total_weight for weight_of_sector in initial_weights]

sector_weights_actual = {}

for i in range(x):
    sector = stocks_chosen_df.iloc[i]["Sector"]
    sector_weights_actual[sector] = sector_weights_actual.get(sector, 0) + initial_weights[i]

for sector, weight_of_sector in sector_weights_actual.items():
    if weight_of_sector > 0.40:
        scale = 0.40 / weight_of_sector
        
        for i in range(x):
            if stocks_chosen_df.iloc[i]["Sector"] == sector:
                initial_weights[i] *= scale

total_weight = sum(initial_weights)
initial_weights = [weight_of_sector / total_weight for weight_of_sector in initial_weights]


In [17]:
sectors_present = []

for i in range(len(stocks_chosen_df)):
    sector = stocks_chosen_df.iloc[i]["Sector"]
    if sector not in sectors_present:
        sectors_present.append(sector)

sector_counts = {}

for i in range(len(stocks_chosen_df)):
    sector = stocks_chosen_df.iloc[i]["Sector"]
    
    if sector not in sector_counts:
        sector_counts[sector] = 0
        
    sector_counts[sector] += 1

sector_targets = {}

for sector in sectors_present:
    if sector in bench_weights:
        sector_targets[sector] = bench_weights[sector]
    else:
        sector_targets[sector] = 0

total_target = sum(sector_targets.values())

if total_target > 0:
    for sector in sector_targets:
        sector_targets[sector] = sector_targets[sector] / total_target
else:
    equal_weight = 1 / len(sectors_present)
    for sector in sector_targets:
        sector_targets[sector] = equal_weight

initial_weights = []

for i in range(len(stocks_chosen_df)):
    sector = stocks_chosen_df.iloc[i]["Sector"]
    target_sector_weight = sector_targets.get(sector, 0)
    per_stock_weight = target_sector_weight / sector_counts[sector]
    initial_weights.append(per_stock_weight)

total = sum(initial_weights)

if total > 0:
    initial_weights = [w / total for w in initial_weights]
else:
    initial_weights = [1 / len(stocks_chosen_df)] * len(stocks_chosen_df)

weights = initial_weights[:]
x = len(stocks_chosen_df)

def apply_sector_cap(weights):

    sector_weight_current = {}
    
    for i in range(x):
        sector = stocks_chosen_df.iloc[i]["Sector"]
        if sector not in sector_weight_current:
            sector_weight_current[sector] = 0
        sector_weight_current[sector] += weights[i]

    for sector in sector_weight_current:
        if sector_weight_current[sector] > 0.40:
            factor = 0.40 / sector_weight_current[sector]
            
            for i in range(x):
                if stocks_chosen_df.iloc[i]["Sector"] == sector:
                    weights[i] *= factor
                    
    total = sum(weights)
    return [w / total for w in weights]

weights = apply_sector_cap(weights)

max_stock_cap = 0.15

while True:
    
    stocks_above_cap = []
    for i, w in enumerate(weights):
        if w > max_stock_cap:
            stocks_above_cap.append(i)

    if len(stocks_above_cap) == 0:
        break

    for i in stocks_above_cap:
        weights[i] = max_stock_cap

    total_capped = sum(weights[i] for i in stocks_above_cap)
    remaining = 1 - total_capped

    uncapped = []
    for z in range(x):
        if z not in stocks_above_cap:
            uncapped.append(z)

    if remaining <= 0 or len(uncapped) == 0:
        total = sum(weights)
        weights = [w / total for w in weights]
        break

    # FIXED: use z consistently here
    sum_uncapped = sum(weights[z] for z in uncapped)

    if sum_uncapped == 0:
        equal_weight2 = remaining / len(uncapped)
        for z in uncapped:
            weights[z] = equal_weight2
    else:
        for z in uncapped:
            weights[z] = (weights[z] / sum_uncapped) * remaining

weights = apply_sector_cap(weights)

min_weight = 1 / (2 * x)

for i in range(x):
    if weights[i] < min_weight:
        weights[i] = min_weight

total = sum(weights)
weights = [w / total for w in weights]

stocks_chosen_df["Weight"] = weights

print(f"Max Stock Weight: {stocks_chosen_df['Weight'].max():.4f}")
print(f"Max Sector Weight: {stocks_chosen_df.groupby('Sector')['Weight'].sum().max():.4f}")


Max Stock Weight: 0.0944
Max Sector Weight: 0.2831


In [18]:

sector_weight_final = {}

for i in range(len(stocks_chosen_df)):
    
    stock_sector = stocks_chosen_df.iloc[i]["Sector"]
    
    weight = stocks_chosen_df.iloc[i]["Weight"]
    
    if stock_sector not in sector_weight_final:
        
        sector_weight_final[stock_sector] = 0
        
    sector_weight_final[stock_sector] += weight

print("Final Sector Weights:")

for stock_sector in sector_weight_final:
    print(f"{stock_sector} {sector_weight_final[stock_sector]:.4f}")
    
print("Minimum Weight In Portfolio:", stocks_chosen_df["Weight"].min())
print("Maximum Weight In Portfolio:", stocks_chosen_df["Weight"].max())


Final Sector Weights:
Information Technology 0.2447
Healthcare 0.0844
Financials 0.2831
Consumer Discretionary 0.0871
Industrials 0.1378
Consumer Staples 0.0844
Communication Services 0.0785
Minimum Weight In Portfolio: 0.0281211372064277
Maximum Weight In Portfolio: 0.09435517099299547


In [19]:
initial_capital = 1_000_000
fee_flat_usd = 2.15
fee_per_share_usd = 0.001

total_budget_cad = initial_capital

cad_to_usd = yf.Ticker("CADUSD=X")
cad_to_usd_hist = cad_to_usd.history(period="1d")
cadusd = cad_to_usd_hist["Close"].iloc[-1]

x = len(stocks_chosen_df)

initial_shares_without_budget = []
cad_value_without_budget = []
cad_fees_without_budget = []

for i in range(x):
    
    row = stocks_chosen_df.iloc[i]
    price = row["Price"]
    currency = row["Currency"]
    weight = row["Weight"]

    if currency == "USD":
        
        price_cad = price / cadusd
        
    else:
        
        price_cad = price

    value_in_cad = total_budget_cad * weight
    shares = value_in_cad / price_cad   # <-- fixed (was value_cad)

    fees_in_usd = min(fee_flat_usd, fee_per_share_usd * shares)
    fees_in_cad = fees_in_usd / cadusd

    initial_shares_without_budget.append(shares)
    cad_value_without_budget.append(shares * price_cad)
    cad_fees_without_budget.append(fees_in_cad)

total_value_without_budget = sum(cad_value_without_budget)
total_fees_without_budget = sum(cad_fees_without_budget)
total_spent_without_budget = total_value_without_budget + total_fees_without_budget

if total_spent_without_budget > 0:
    
    scaling_of_cost = total_budget_cad / total_spent_without_budget
    
else:
    
    scaling_of_cost = 1

shares_final = []
cad_value_final = []
cad_fees_final = []

for i in range(x):
    
    row = stocks_chosen_df.iloc[i]
    price = row["Price"]
    currency  = row["Currency"]
    
    if currency == "USD":
        price_cad = price / cadusd
    else:
        price_cad = price

    shares = initial_shares_without_budget[i] * scaling_of_cost

    fees_in_usd = min(fee_flat_usd, fee_per_share_usd * shares)
    fees_in_cad = fees_in_usd / cadusd

    value_in_cad = shares * price_cad

    shares_final.append(shares)
    cad_value_final.append(value_in_cad)
    cad_fees_final.append(fees_in_cad)

total_value_cad = sum(cad_value_final)
total_fees_cad  = sum(cad_fees_final)
total_spent_cad = total_value_cad + total_fees_cad

print(f"Total Invested (CAD): ${total_value_cad:.2f}")
print(f"Total Fees (CAD): ${total_fees_cad:.2f}")
print(f"Total Spent (CAD): ${total_spent_cad:.2f}")

stocks_chosen_df["Shares"] = shares_final
stocks_chosen_df["Value"]  = cad_value_final

final_portfolio = stocks_chosen_df[["Ticker", "Price", "Currency", "Shares", "Value", "Weight"]].copy()

final_portfolio


Total Invested (CAD): $999989.43
Total Fees (CAD): $10.57
Total Spent (CAD): $1000000.00


Unnamed: 0,Ticker,Price,Currency,Shares,Value,Weight
0,QCOM,159.589996,USD,362.628162,81581.338239,0.081582
1,AAPL,266.25,USD,217.358975,81581.338239,0.081582
2,TXN,153.330002,USD,377.433159,81581.338239,0.081582
3,PFE,24.4,USD,817.550971,28120.840075,0.028121
4,BIIB,168.179993,USD,118.612464,28120.840075,0.028121
5,ABBV,229.449997,USD,86.939393,28120.840075,0.028121
6,BLK,992.52002,USD,67.436995,94354.174024,0.094355
7,BK,106.459999,USD,628.710954,94354.174024,0.094355
8,USB,46.490002,USD,1439.71962,94354.174024,0.094355
9,AMZN,217.139999,USD,284.690216,87143.70221,0.087145


In [22]:

# EDIT THESE DATES ACCCORDINGLY
contest_start = "2025-11-12"
contest_end   = "2025-11-20"

cadusd_hist = yf.Ticker("CADUSD=X").history(period="1d")
cadusd_end = cadusd_hist["Close"].iloc[-1]

final_values_cad = []

for i in range(len(final_portfolio)):
    
    row = final_portfolio.iloc[i]
    ticker = row["Ticker"]
    shares = row["Shares"]
    currency = row["Currency"]
    
    hist = yf.Ticker(ticker).history(start=contest_start, end=contest_end)
    
    if hist.empty or "Close" not in hist.columns:
        continue
    
    end_price = hist["Close"].iloc[-1]
    
    if currency == "USD":
        price_cad_end = end_price / cadusd_end
    else:
        price_cad_end = end_price
    
    final_values_cad.append(shares * price_cad_end)

final_value = sum(final_values_cad)

portfolio_return = (final_value - initial_capital) / initial_capital

tsx_symbol = "^GSPTSE"
spx_symbol = "^GSPC"

tsx_hist = yf.Ticker(tsx_symbol).history(start=contest_start, end=contest_end)
spx_hist = yf.Ticker(spx_symbol).history(start=contest_start, end=contest_end)

tsx_return = tsx_hist["Close"].iloc[-1] / tsx_hist["Close"].iloc[0] - 1
spx_return = spx_hist["Close"].iloc[-1] / spx_hist["Close"].iloc[0] - 1

benchmark_return = (tsx_return + spx_return) / 2

print("===== MARKET MEET PERFORMANCE CHECKS =====")
print(f"Initial Capital (CAD): {initial_capital:,.2f}")
print(f"Final Value (CAD): {final_value:,.2f}")
print(f"Portfolio Return: {portfolio_return:.4%}")
print(f"Benchmark Return: {benchmark_return:.4%}")

difference = portfolio_return - benchmark_return
print(f"Difference vs Benchmark: {difference:.4%}")


===== MARKET MEET PERFORMANCE CHECKS =====
Initial Capital (CAD): 1,000,000.00
Final Value (CAD): 1,013,970.83
Portfolio Return: 1.3971%
Benchmark Return: -2.4143%
Difference vs Benchmark: 3.8114%


## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.