In [27]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

## Group Assignment
### Team Number: 09
### Team Member Names: Aryan Singh, Jack Smith, and Samyak Jain
### Team Strategy Chosen: Market Meet (Market Beat, Market Meet, Risk-Free)

### Ticker Data:

In [28]:
# sample DataFrame of tickers for testing

# tickers_df = pd.DataFrame({
#      'Tickers': ['AAPL', 'MSFT', 'GOOGL', 'JPM', 'BAC', 'C', 'WFC', 'GS',
#                  'XOM', 'CVX', 'SLB', 'COP', 'AMZN', 'TSLA', 'HD', 'NKE',
#                  'JNJ', 'PFE', 'UNH', 'ABBV', 'KO', 'PEP', 'WMT', 'COST',
#                  'NEE', 'DUK', 'SO', 'D', 'AMT', 'PLD', 'EQIX', 'SPG',
#                  'FCX', 'NEM', 'LIN', 'APD', 'TD.TO', 'AZI', 'DML.TO', 'ORANY']
#  })

# tickers_df = pd.read_csv('ADD CSV FILE FOR WHENEVER WE GET IT HERE')

# AZI, ORANY, and DML.TO are stocks for testing


# tickers_df = pd.read_csv('Tickers_Example.csv', header=None, names=['Ticker'])
# tickers = tickers_df['Ticker'].tolist()

# tickers_df

In [41]:
start_date = '2024-10-01'
end_date = '2025-09-30'
min_volume = 5000
min_trading_days=18

tickers_df = pd.read_csv('Tickers_Example.csv', header=None, names=['Ticker'])
tickers = tickers_df['Ticker'].tolist()

def is_us_or_ca_ticker_by_currency(ticker):
    try:
        currency = yf.Ticker(ticker).info.get('currency', '')
        return currency in ['USD', 'CAD']
    except Exception:
        return False

def average_daily_volume_adjusted(df):
    df.index = pd.to_datetime(df.index)
    monthly_counts = df.groupby(df.index.to_period('M')).size()
    valid_months = monthly_counts[monthly_counts >= min_trading_days].index
    df_filtered = df[df.index.to_period('M').isin(valid_months)]
    if not df_filtered.empty:
        avg = df_filtered['Volume'].mean()
        # If avg is a series for some reason (it happeded idk how) extract just the number
        if isinstance(avg, pd.Series):
            avg = avg.values[0]
        return avg
    else:
        return 0

filtered_tickers = []

for ticker in tickers:

    if not is_us_or_ca_ticker_by_currency(ticker):
        print(f"[REMOVED] {ticker}: Not USD or CAD currency.")
        continue

    ticker_data = yf.Ticker(ticker)
    hist = ticker_data.history(start=start_date, end=end_date)

    if isinstance(hist.index, pd.DatetimeIndex) and hist.index.tz is not None:
        hist.index = hist.index.tz_convert(None)

    if hist.empty:
        print(f"[DELISTED] {ticker}: No historical data returned.")
        continue

    if 'Volume' not in hist.columns:
        print(f"[REMOVED] {ticker}: No Volume column in stock data.")
        continue

    avg_vol = average_daily_volume_adjusted(hist)

    if avg_vol < min_volume:
        print(f"[REMOVED] {ticker}: Avg volume {avg_vol:.2f} < {min_volume}.")
        continue

    filtered_tickers.append(ticker)

# Display filtered tickers
display(pd.DataFrame(filtered_tickers, columns=['Filtered Tickers']))

[REMOVED] AGN: Not USD or CAD currency.
[REMOVED] CELG: Not USD or CAD currency.
[REMOVED] MON: Not USD or CAD currency.
[REMOVED] RTN: Not USD or CAD currency.


Unnamed: 0,Filtered Tickers
0,AAPL
1,ABBV
2,ABT
3,ACN
4,AIG
5,AMZN
6,AXP
7,BA
8,BAC
9,BB.TO


### Optimal Benchmark Weighting On Different Sectors:

In [42]:
# what the benchmark weighting on different sectors should be

bench_weights = {
    'Financials': 0.229,
    'Information Technology': 0.198,
    'Industrials':0.1115,
    'Energy': 0.0875,
    'Consumer Discretionary': 0.00705,
    'Communication Services': 0.0635,
    'Materials': 0.0625,
    'Consumer Staples': 0.0515,
    #'Health Care': 0.0495,
    'Healthcare': 0.0495,  # Handle both naming conventions, if needed.
    'Utilities': 0.019,
    'Real Estate': 0.0105}
    
sector_mapping = {
    'Financial Services': 'Financials',
    'Technology': 'Technology',
    'Industrials': 'Industrials',
    'Energy': 'Energy',
    'Consumer Cyclical': 'Consumer Discretionary',
    'Communication Services': 'Communication Services',
    'Basic Materials': 'Materials',
    'Consumer Defensive': 'Consumer Staples',
    'Healthcare': 'Health Care',
    'Utilities': 'Utilities',
    'Real Estate': 'Real Estate'}


### DataFrame To Store The Neccesary Infomation (Ticker, Sector, Price, Currency, And Market Cap):

In [43]:
initial_capital = 1_000_000

tickers_list = []
sectors_list = []
prices_list = []
currencies_list = []
market_caps_list = []

for ticker in filtered_tickers:
    
    try:
        stock = yf.Ticker(ticker)
        info = stock.info or {}
        yf_sector = info.get('sector')
        sector = sector_mapping.get(yf_sector, 'Unknown')

        if sector == 'Unknown':
            continue
            
        hist = stock.history(period='1d')
        
        if hist.empty or 'Close' not in hist.columns:
            continue

        price = float(hist['Close'].iloc[-1])

        fast = stock.fast_info or {}

        currency = fast.get('currency', 'USD')
        market_cap = fast.get('market_cap', info.get('marketCap', 0))
        
        if market_cap is None or pd.isna(market_cap):
            market_cap = 0

        tickers_list.append(ticker)
        sectors_list.append(sector)
        prices_list.append(price)
        currencies_list.append(currency)
        market_caps_list.append(market_cap)
    
    except Exception as e:
        print(f"Skipped {ticker}: {e}")
        continue

df = pd.DataFrame({'Ticker': tickers_list, 'Sector': sectors_list, 'Price': prices_list, 'Currency': currencies_list, 
                   'Market Cap': market_caps_list})

print(df.to_markdown())
print(f"\nSector By Size:\n{df.groupby('Sector').size()}")

|    | Ticker   | Sector                 |     Price | Currency   |    Market Cap |
|---:|:---------|:-----------------------|----------:|:-----------|--------------:|
|  0 | AAPL     | Technology             |  266.74   | USD        | 3958525394944 |
|  1 | ABBV     | Health Care            |  229.54   | USD        |  405685436416 |
|  2 | ABT      | Health Care            |  124.21   | USD        |  216182407168 |
|  3 | ACN      | Technology             |  239.69   | USD        |  149291565056 |
|  4 | AIG      | Financials             |   75.46   | USD        |   41805127680 |
|  5 | AMZN     | Consumer Discretionary |  218.53   | USD        | 2336132956160 |
|  6 | AXP      | Financials             |  342.44   | USD        |  238297923584 |
|  7 | BA       | Industrials            |  178.685  | USD        |  135817658368 |
|  8 | BAC      | Financials             |   51.325  | USD        |  380193406976 |
|  9 | BB.TO    | Technology             |    5.86   | CAD        |    34839

### Calculating Annual Volatility:

In [5]:
# calculating annual volatility

# using data from one year of the current date
end_date = datetime.now()
start_date = datetime(end_date.year - 1, end_date.month, end_date.day) # 1 year ago

trading_days_per_year = 251 # around 251 trading days per year.
min_days_required = 60 # uses traded stocks

# list to store the volatilities
volatilities = []

for ticker in df['Ticker']:

    try:
        
        stock = yf.Ticker(ticker)
        hist = stock.history(start=start_date, end=end_date)
        
        if hist is None or hist.empty or len(hist) < min_days_required:
            volatilities.append(np.nan)
            continue
            
        returns = hist['Close'].pct_change().dropna()
            
        if returns.empty:
            volatilities.append(np.nan)
            continue
                
        annual_vol = returns.std() * np.sqrt(trading_days_per_year)
        volatilities.append(annual_vol)
    
    except Exception as e:
        volatilities.append(np.nan)
        continue
    
df['Volatility'] = volatilities

df = df.dropna(subset=['Volatility']).reset_index(drop=True) # dropping all stocks with nan volatility.

df


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility
0,AAPL,Technology,268.559998,USD,3985534877696,0.325356
1,MSFT,Technology,487.119995,USD,3620844077056,0.242896
2,GOOGL,Communication Services,292.809998,USD,3546550370304,0.329572
3,JPM,Financials,303.269989,USD,833917812736,0.242082
4,BAC,Financials,52.02,USD,385309409280,0.268398
5,C,Financials,99.830002,USD,183776837632,0.311693
6,WFC,Financials,84.160004,USD,269601619968,0.291623
7,GS,Financials,785.98999,USD,237935738880,0.310334
8,XOM,Energy,117.349998,USD,500292026368,0.233337
9,CVX,Energy,151.699997,USD,305648566272,0.245793


### Before Filtering High Volatile Stocks:

In [6]:
print("Before filtering:", len(df))
print(df.groupby("Sector").size())

Before filtering: 40
Sector
Communication Services    2
Consumer Discretionary    5
Consumer Staples          4
Energy                    5
Financials                6
Health Care               4
Materials                 4
Real Estate               4
Technology                2
Utilities                 4
dtype: int64


In [7]:
# I have already created the dataframe with each stock, its secor, its currency, market_cap, and annual volatility
#use this dataframe throughout the rest of the code. It is okay to manupulate the dataframe to remove stocks
#in which are not deemed fit to invest in.

# get rid of stocks with high volatility. i.e. get rid of the stocks with the highest std within each sector. I'm thinking maybe the top 25%

cutoff = 0.75

filtered_rows = []
seen_sectors = []

for i in range(len(df)):
    sector_name = df.iloc[i]['Sector']
    if sector_name not in seen_sectors:
        seen_sectors.append(sector_name)

for sector in seen_sectors:
    
    sector_df = df[df['Sector'] == sector].copy()
    if len(sector_df) < 4:
        filtered_rows.append(sector_df)
        continue
    
    sector_df = sector_df.sort_values(by='Volatility').reset_index(drop=True)
        
    keep_count = int(len(sector_df) * cutoff)
    
    if keep_count < 1:
        keep_count = 1
    
    sector_kept = sector_df.iloc[:keep_count]
    filtered_rows.append(sector_kept)

df = pd.concat(filtered_rows, ignore_index=True)

df


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility
0,AAPL,Technology,268.559998,USD,3985534877696,0.325356
1,MSFT,Technology,487.119995,USD,3620844077056,0.242896
2,GOOGL,Communication Services,292.809998,USD,3546550370304,0.329572
3,ORANY,Communication Services,15.85,USD,42138013696,0.235354
4,TD.TO,Financials,114.669998,CAD,197516541952,0.166901
5,JPM,Financials,303.269989,USD,833917812736,0.242082
6,BAC,Financials,52.02,USD,385309409280,0.268398
7,WFC,Financials,84.160004,USD,269601619968,0.291623
8,XOM,Energy,117.349998,USD,500292026368,0.233337
9,CVX,Energy,151.699997,USD,305648566272,0.245793


### After Filtering High Volatile Stocks:

In [8]:
print("After filtering:", len(df))
print(df.groupby("Sector").size())

After filtering: 29
Sector
Communication Services    2
Consumer Discretionary    3
Consumer Staples          3
Energy                    3
Financials                4
Health Care               3
Materials                 3
Real Estate               3
Technology                2
Utilities                 3
dtype: int64


In [9]:
# create the benchmark. Basically, create a dataframe that consists of the closing returns of the benchmark within the past year(1 year because we are going to 
#track the correlation between the benchmark and the stocks back by one year later. The benchmark is just an equally weighted average of the
#TSX composite and the S&P500 returns.

end_date = datetime.now()
start_date = datetime(end_date.year - 1, end_date.month, end_date.day)

tsx_symbol = "^GSPTSE"
spx_symbol = "^GSPC"

tsx_ticker = yf.Ticker(tsx_symbol)
spx_ticker = yf.Ticker(spx_symbol)

tsx_hist = tsx_ticker.history(start=start_date, end=end_date)
spx_hist = spx_ticker.history(start=start_date, end=end_date)

tsx_returns = tsx_hist['Close'].pct_change()
spx_returns = spx_hist['Close'].pct_change()

benchmark_df = (pd.DataFrame({'TSX Return': tsx_returns,'S&P 500 Return': spx_returns})).dropna()

benchmark_df['Benchmark Return'] = (benchmark_df['TSX Return'] + benchmark_df['S&P 500 Return']) / 2
benchmark_df.index = benchmark_df.index.strftime('%Y-%m-%d')

benchmark_df


Unnamed: 0_level_0,TSX Return,S&P 500 Return,Benchmark Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-11-20,0.001028,0.000022,0.000525
2024-11-21,0.014147,0.005340,0.009744
2024-11-22,0.002111,0.003468,0.002790
2024-11-25,-0.001332,0.003020,0.000844
2024-11-26,-0.000209,0.005722,0.002757
...,...,...,...
2025-11-13,-0.018620,-0.016557,-0.017588
2025-11-14,0.002410,-0.000502,0.000954
2025-11-17,-0.008254,-0.009162,-0.008708
2025-11-18,-0.001320,-0.008256,-0.004788


In [10]:
# implement a market_cap check. Basically get rid of any pennystocks as they don't tend to always follow the market very well.

min_small_cap = 1_000_000_000
limit_small_cap = 2_000_000_000

small_cap_count = 0

for i in range(len(df)):
    
    cap = df.iloc[i]['Market Cap']

    if cap is None:
        
        continue
        
    if cap <= 0:
        
        continue

    if cap < limit_small_cap:
        
        small_cap_count += 1

penny_stocks = []
small_cap_stocks = []

for i in range(len(df)):
    
    row = df.iloc[i]
    cap = row['Market Cap']

    if cap is None or cap <= 0:
        
        continue

    if cap < min_small_cap:
        
        penny_stocks.append(row)
        
    if cap < limit_small_cap:
        
        small_cap_stocks.append(row)

final_cap_rows = []

if small_cap_count == 0:
    
    for i in range(len(df)):
        row = df.iloc[i]
        cap = row['Market Cap']
        
        if cap is None or cap <= 0:
            
            continue
            
        if cap < min_small_cap:
            
            continue
            
        final_cap_rows.append(row)

else:
    
    small_stock_penny_all = True

    for i in small_cap_stocks:
        
        if i['Market Cap'] >= min_small_cap:
            
            small_stock_penny_all = False
            
            break

    if small_stock_penny_all:
        
        best_small_stock = small_cap_stocks[0]
        
        for i in small_cap_stocks:
            
            if i['Market Cap'] > best_small_stock['Market Cap']:
                
                best_small_stock = i

        for i in range(len(df)):
            
            row = df.iloc[i]
            cap = row['Market Cap']
            
            if cap is None or cap <= 0:
                
                continue

            if cap < min_small_cap and row['Ticker'] != best_small_stock['Ticker']:
                
                continue

            final_cap_rows.append(row)

    else:
        
        for i in range(len(df)):
            row = df.iloc[i]
            cap = row['Market Cap']
            
            if cap is None or cap <= 0:
                
                continue
                
            if cap < min_small_cap:
                
                continue
                
            final_cap_rows.append(row)

keep_df = pd.DataFrame(final_cap_rows)

tickers_keep = keep_df['Ticker']

df = df[df['Ticker'].isin(tickers_keep)].reset_index(drop=True)

df


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility
0,AAPL,Technology,268.559998,USD,3985534877696,0.325356
1,MSFT,Technology,487.119995,USD,3620844077056,0.242896
2,GOOGL,Communication Services,292.809998,USD,3546550370304,0.329572
3,ORANY,Communication Services,15.85,USD,42138013696,0.235354
4,TD.TO,Financials,114.669998,CAD,197516541952,0.166901
5,JPM,Financials,303.269989,USD,833917812736,0.242082
6,BAC,Financials,52.02,USD,385309409280,0.268398
7,WFC,Financials,84.160004,USD,269601619968,0.291623
8,XOM,Energy,117.349998,USD,500292026368,0.233337
9,CVX,Energy,151.699997,USD,305648566272,0.245793


In [11]:
# Pick the top 1-3 stocks in each sector with the best correlation to the benchmark over the past year.

correlations = []

for i in range(len(df)):
    
    ticker = df.iloc[i]['Ticker']
    
    try:
        
        stock = yf.Ticker(ticker)
        stock_hist = stock.history(start=start_date, end=end_date)

        if stock_hist is None or stock_hist.empty:
            
            correlations.append(np.nan)
            
            continue

        stock_returns = stock_hist['Close'].pct_change().dropna()

        if stock_returns.empty:
            
            correlations.append(np.nan)
            
            continue

        stock_returns.index = stock_returns.index.strftime('%Y-%m-%d')
        
        dates = []
        
        for x in stock_returns.index:
            
            if x in benchmark_df.index:
                
                dates.append(x)

        if len(dates) < 20:
            
            correlations.append(np.nan)
            
            continue

        stock_aligned = stock_returns.loc[dates]
        bench_aligned = benchmark_df.loc[dates, 'Benchmark Return']

        corr_between_stock_and_bench = stock_aligned.corr(bench_aligned)
        
        correlations.append(corr_between_stock_and_bench)

    except Exception:
        
        correlations.append(np.nan)

In [12]:

df['Correlation'] = correlations

sector_lst = []

for i in range(len(df)):
    
    sector = df.iloc[i]['Sector']
    
    if sector not in sector_lst:
        
        sector_lst.append(sector)

stocks_chosen = []

for sector in sector_lst:
    
    sector_df = df[df['Sector'] == sector].copy()
    sector_df = sector_df.dropna(subset=['Correlation'])
    
    if len(sector_df) == 0:
        
        continue
    
    sector_df = sector_df.sort_values(by='Correlation', ascending=False).reset_index(drop=True)
    
    if len(sector_df) <= 3:
        
        chosen = sector_df
        
    else:
        
        chosen = sector_df.iloc[:3]
    
    stocks_chosen.append(chosen)

stocks_chosen_df = pd.concat(stocks_chosen, ignore_index=True)

stocks_chosen_df


Unnamed: 0,Ticker,Sector,Price,Currency,Market Cap,Volatility,Correlation
0,AAPL,Technology,268.559998,USD,3985534877696,0.325356,0.71969
1,MSFT,Technology,487.119995,USD,3620844077056,0.242896,0.657577
2,GOOGL,Communication Services,292.809998,USD,3546550370304,0.329572,0.547507
3,ORANY,Communication Services,15.85,USD,42138013696,0.235354,0.217443
4,JPM,Financials,303.269989,USD,833917812736,0.242082,0.767203
5,BAC,Financials,52.02,USD,385309409280,0.268398,0.715481
6,WFC,Financials,84.160004,USD,269601619968,0.291623,0.695585
7,COP,Energy,87.980003,USD,109881950208,0.33245,0.579982
8,CVX,Energy,151.699997,USD,305648566272,0.245793,0.532443
9,XOM,Energy,117.349998,USD,500292026368,0.233337,0.45746


In [13]:

if len(stocks_chosen_df) > 25:
    
    stocks_chosen_df = stocks_chosen_df.sort_values(by="Correlation", ascending=False).reset_index(drop=True)
    stocks_chosen_df = stocks_chosen_df.iloc[:25].reset_index(drop=True)

x = len(stocks_chosen_df)

print("Number Of Stocks For Portfolio:", x)

Number Of Stocks For Portfolio: 25


In [14]:
# Create the weighting of the stocks baised on the benchmark weighting as define above. If there aren't any
#good stocks in a particular low-weighted sector within the benchmark, you can just re-distribute the weight
#into the higher weighted sectors and stocks. Also during this process you need to make sure that 
#the weightings follow the portfolio rules in terms of weightings and restrictions.

sector_totals = {}
total = 0

for i in range(len(stocks_chosen_df)):
    
    sector = stocks_chosen_df.iloc[i]["Sector"]
    marketcap = stocks_chosen_df.iloc[i]["Market Cap"]

    if sector not in sector_totals:
        
        sector_totals[sector] = 0.0

    sector_totals[sector] += marketcap
    total += marketcap

benchmark_sector_weights = {}

for sector in sector_totals:
    
    if total > 0:
        
        benchmark_sector_weights[sector] = sector_totals[sector] / total
        
    else:
        
        benchmark_sector_weights[sector] = 0

print("Benchmark Weighting:")

for sector in benchmark_sector_weights:
    print(f"{sector} {benchmark_sector_weights[sector]:.4f}")


Benchmark Weighting:
Financials 0.0805
Technology 0.4115
Consumer Discretionary 0.1518
Real Estate 0.0123
Materials 0.0187
Energy 0.0495
Communication Services 0.1941
Consumer Staples 0.0488
Health Care 0.0299
Utilities 0.0028


In [15]:

min_weight = 1 / (2 * x)
max_weight = 0.15 

sector_market_cap_chosen = {}
initial_weights = [0] * x

for i in range(x):
    
    stock_sector = stocks_chosen_df.iloc[i]["Sector"]
    cap = float(stocks_chosen_df.iloc[i]["Market Cap"])
    
    if stock_sector not in sector_market_cap_chosen:
        
        sector_market_cap_chosen[stock_sector] = 0
        
    sector_market_cap_chosen[stock_sector] += cap

for i in range(x):
    
    row = stocks_chosen_df.iloc[i]
    stock_sector = row["Sector"]
    cap = row["Market Cap"]

    sector_target = 0
    
    if stock_sector in benchmark_sector_weights:
        
        sector_target = benchmark_sector_weights[stock_sector]

    sector_cap_sum = sector_market_cap_chosen[stock_sector]

    if sector_target == 0 or sector_cap_sum <= 0:
        
        weight = 1 / x
        
    else:
        
        weight = sector_target * (cap / sector_cap_sum)

    if weight < min_weight:
        
        weight = min_weight
        
    if weight > max_weight:
        
        weight = max_weight

    initial_weights[i] = weight
    

In [16]:

weights = initial_weights[:]

sector_weight_current = {}

for i in range(x):
    
    stock_sector = stocks_chosen_df.iloc[i]["Sector"]
    weight = weights[i]
    
    if stock_sector not in sector_weight_current:
        
        sector_weight_current[stock_sector] = 0
    
    sector_weight_current[stock_sector] += weight

for stock_sector in sector_weight_current:
    
    if sector_weight_current[stock_sector] > 0.40:
        
        factor_portfolio_value = 0.40 / sector_weight_current[stock_sector]
        
        for i in range(x):
            
            if stocks_chosen_df.iloc[i]["Sector"] == stock_sector:
                
                weights[i] = weights[i] * factor_portfolio_value

total_weights = sum(weights)

if total_weights > 0:
    
    for i in range(x):
        
        weights[i] = weights[i] / total_weights
else:
    
    for i in range(x):
        
        weights[i] = 1 / x


stocks_chosen_df["Weight"] = weights

# stocks_chosen_df

# print("Total Weight:", stocks_chosen_df["Weight"].sum())


In [17]:

sector_weight_final = {}

for i in range(len(stocks_chosen_df)):
    
    stock_sector = stocks_chosen_df.iloc[i]["Sector"]
    
    weight = stocks_chosen_df.iloc[i]["Weight"]
    
    if stock_sector not in sector_weight_final:
        
        sector_weight_final[stock_sector] = 0
        
    sector_weight_final[stock_sector] += weight

print("Final Sector Weights:")

for stock_sector in sector_weight_final:
    print(f"{stock_sector} {sector_weight_final[stock_sector]:.4f}")
    
print("Minimum Weight In Portfolio:", stocks_chosen_df["Weight"].min())
print("Maximum Weight In Portfolio:", stocks_chosen_df["Weight"].max())


Final Sector Weights:
Financials 0.0830
Technology 0.2897
Consumer Discretionary 0.1630
Real Estate 0.0579
Materials 0.0579
Energy 0.0648
Communication Services 0.1642
Consumer Staples 0.0593
Health Care 0.0408
Utilities 0.0193
Minimum Weight In Portfolio: 0.019315815502938583
Maximum Weight In Portfolio: 0.14486861627203937


In [19]:
initial_capital = 1_000_000
fee_flat_usd = 2.15
fee_per_share_usd = 0.001

total_budget_cad = initial_capital

cad_to_usd = yf.Ticker("CADUSD=X")
cad_to_usd_hist = cad_to_usd.history(period="1d")
cadusd = cad_to_usd_hist["Close"].iloc[-1]

x = len(stocks_chosen_df)

initial_shares_without_budget = []
cad_value_without_budget = []
cad_fees_without_budget = []

for i in range(x):
    
    row = stocks_chosen_df.iloc[i]
    price = row["Price"]
    currency = row["Currency"]
    weight = row["Weight"]

    if currency == "USD":
        
        price_cad = price / cadusd
        
    else:
        
        price_cad = price

    value_in_cad = total_budget_cad * weight
    shares = value_in_cad / price_cad   # <-- fixed (was value_cad)

    fees_in_usd = min(fee_flat_usd, fee_per_share_usd * shares)
    fees_in_cad = fees_in_usd / cadusd

    initial_shares_without_budget.append(shares)
    cad_value_without_budget.append(shares * price_cad)
    cad_fees_without_budget.append(fees_in_cad)

total_value_without_budget = sum(cad_value_without_budget)
total_fees_without_budget = sum(cad_fees_without_budget)
total_spent_without_budget = total_value_without_budget + total_fees_without_budget

if total_spent_without_budget > 0:
    
    scaling_of_cost = total_budget_cad / total_spent_without_budget
    
else:
    
    scaling_of_cost = 1

shares_final = []
cad_value_final = []
cad_fees_final = []

for i in range(x):
    
    row = stocks_chosen_df.iloc[i]
    price = row["Price"]
    currency  = row["Currency"]
    
    if currency == "USD":
        price_cad = price / cadusd
    else:
        price_cad = price

    shares = initial_shares_without_budget[i] * scaling_of_cost

    fees_in_usd = min(fee_flat_usd, fee_per_share_usd * shares)
    fees_in_cad = fees_in_usd / cadusd

    value_in_cad = shares * price_cad

    shares_final.append(shares)
    cad_value_final.append(value_in_cad)
    cad_fees_final.append(fees_in_cad)

total_value_cad = sum(cad_value_final)
total_fees_cad  = sum(cad_fees_final)
total_spent_cad = total_value_cad + total_fees_cad

print("\nTotal Invested (CAD):", total_value_cad)
print("Total Fees (CAD):", total_fees_cad)
print("Total Spent (CAD):", total_spent_cad)

stocks_chosen_df["Shares"] = shares_final
stocks_chosen_df["Value"]  = cad_value_final

final_portfolio = stocks_chosen_df[["Ticker", "Price", "Currency", "Shares", "Value", "Weight"]].copy()

final_portfolio



Total Invested (CAD): 999992.9965368239
Total Fees (CAD): 7.003463176065905
Total Spent (CAD): 1000000.0


Unnamed: 0,Ticker,Price,Currency,Shares,Value,Weight
0,JPM,303.269989,USD,102.274577,43565.899568,0.043566
1,AAPL,268.559998,USD,384.043473,144867.60169,0.144869
2,BAC,52.02,USD,275.494632,20129.502897,0.02013
3,WFC,84.160004,USD,163.400998,19315.680225,0.019316
4,AMZN,222.690002,USD,397.612782,124368.562596,0.124369
5,SPG,180.130005,USD,76.343908,19315.680225,0.019316
6,MSFT,487.119995,USD,211.731637,144867.60169,0.144869
7,LIN,413.200012,USD,33.281288,19315.680225,0.019316
8,APD,253.25,USD,54.301396,19315.680225,0.019316
9,COP,87.980003,USD,156.306297,19315.680225,0.019316


In [None]:
# ADD BENCHMARK RETURN DIFFERENCE POSSIBLY

# AND CHECK EVERYTHING OVER

# ADD COMMENTS WHERE NEEDED


## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.