In [9]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime, timedelta

## Group Assignment
### Team Number: 07
### Team Member Names: Wendi Xue, 
### Team Strategy Chosen: Risk-Free(Market Beat, Market Meet, Risk-Free)

### Loading and filtering valid tickers
In the secret list of tickers that will be used to generate our portfolio, we will take into consideration the following factors:
- If there are any duplicate tickers in the list, we will drop them.
- If there are any tickers in the list that do not exist, using the function info.get() will prevent any KeyErrors
- We only want valid US and Canadian companies that have listed stocks in the markets, so we will only take stocks with United States or Canada as their country and CAD and USD as their currency

In [68]:
# Load the tickers
df = pd.read_csv('Tickers_Example.csv', header=None) # test with Tickers_Example.csv?
# If there are any duplicate tickers, we will drop them
df = df.drop_duplicates()
ticker_lst = list(df.iloc[:,0])

# Filter out tickers that don't exist; include only valid tickers
valid_tickers_lst = []
for ticker in ticker_lst:
    try: 
        ticker_data = yf.Ticker(ticker) # call up data
        country = ticker_data.info.get('country') 
        currency = ticker_data.info.get('currency')
        if country in {'Canada','United States'} and currency in {'CAD','USD'}: # filters us and canadian tickers and listed stocks
            valid_tickers_lst.append(ticker)
    except Exception as err:
        print(f"Error: {err}")

print(valid_tickers_lst)

['AAPL', 'ABBV', 'ABT', 'AIG', 'AMZN', 'AXP', 'BA', 'BAC', 'BB.TO', 'BIIB', 'BK', 'BLK', 'BMY', 'C', 'CAT', 'CL', 'KO', 'LLY', 'LMT', 'MO', 'MRK', 'PEP', 'PFE', 'PG', 'PM', 'PYPL', 'QCOM', 'RY.TO', 'SHOP.TO', 'T.TO', 'TD.TO', 'TXN', 'UNH', 'UNP', 'UPS', 'USB', 'PRL.TO']


## Average daily volume constraint
- If the stock does not have any information on their volume on a specific day, we will drop that day.
- Between October 1, 2024 and September 30, 2025, we will group together the data of the volume by months to see if there are months with less than 18 trading days. If there is we will drop that month when calculating the average daily volume.
- If any stock has an average daily volume below 5,000 shares we will exclude that stock from the list of tickers.

In [69]:
# Apply a filter to remove all stocks with volumes below 5000 (in the given period)
# period in which we look at the volumes 
volume_start_date = '2024-10-01'
volume_end_date = '2025-09-30' 
filtered_lst = []

for ticker in valid_tickers_lst: # goes through every ticker to filter
    data = yf.download(
        tickers=ticker,
        start=volume_start_date,
        end=volume_end_date
    )
    volume_data = data[['Volume']].dropna() # volume data

    keep_months = pd.DataFrame() # df of all the months with more than 18 trading days
    volume_data['Month'] = volume_data.index.to_period('M') # create new column of index only by (YYYY-MM)
    grouped_month_index = volume_data.groupby(['Month']) # group data by month

    for month, group in grouped_month_index:
        if len(group) >= 18: # if the month has more than 18 trading days
            keep_months = pd.concat([keep_months, group]) # add data of months with more than 18 trading days

    average_daily_volume = keep_months['Volume'][ticker].mean() # calculate average daily volume
    if average_daily_volume >= 5000: # determine if above or below 5000 shares
        filtered_lst.append(ticker) # add to filtered list if greater or equal to 5000 shares

print(filtered_lst)

# Get the daily data over chosen timeframe (2025-10-24 to 2025-10-31 for testing?)
start_date = '2025-10-24' # change to Nov 21 2025
end_date = '2025-10-31' # change to Nov 28 2025

daily_data = yf.download(
    tickers=filtered_lst,
    start=start_date,
    end=end_date)

  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed
  data = yf.download(
[*********************100%***********************]  1 of 1 completed

['AAPL', 'ABBV', 'ABT', 'AIG', 'AMZN', 'AXP', 'BA', 'BAC', 'BB.TO', 'BIIB', 'BK', 'BLK', 'BMY', 'C', 'CAT', 'CL', 'KO', 'LLY', 'LMT', 'MO', 'MRK', 'PEP', 'PFE', 'PG', 'PM', 'PYPL', 'QCOM', 'RY.TO', 'SHOP.TO', 'T.TO', 'TD.TO', 'TXN', 'UNH', 'UNP', 'UPS', 'USB', 'PRL.TO']


[*********************100%***********************]  37 of 37 completed


In [70]:
# Get/calculate volatility (std), beta, market cap, and sectors

metrics_df = pd.DataFrame(columns=['Ticker', 'Volatility', 'Beta', 'MarketCap', 'Sector'])

# ---- Market index for beta ----
market_index = "^GSPC"
market_hist = yf.download(market_index, start=start_date, end=end_date)
market_prices = market_hist["Close"].dropna()
market_returns = market_prices.pct_change().dropna()

# ---- Loop through filtered tickers ----
for ticker in filtered_lst:

    try:
        prices = daily_data['Adj Close'][ticker]
    except KeyError:
        prices = daily_data['Close'][ticker]

    prices = prices.dropna()
    if prices.empty:
        continue

    returns = prices.pct_change().dropna()
    if returns.empty:
        continue

    volatility = returns.std(ddof=0)     # daily std

    aligned = pd.concat([returns, market_returns], axis=1, join="inner")
    aligned.columns = ["Stock", "Market"]

    # covariance(stock, market)
    cov_sm = aligned.cov(ddof=0).iloc[0,1]

    # variance(market)
    var_market = aligned["Market"].var(ddof=0)

    if var_market == 0 or pd.isna(cov_sm):
        beta = np.nan
    else:
        beta = cov_sm / var_market

    info = yf.Ticker(ticker).info
    mcap = info.get("marketCap", np.nan)
    sector = info.get("sector", "Unknown")

    metrics_df.loc[len(metrics_df)] = [
        ticker,
        volatility,
        beta,
        mcap,
        sector
    ]

metrics_df = metrics_df.dropna(subset=['Volatility', 'MarketCap']).reset_index(drop=True)

print(metrics_df)

# Use the weighted scoring algorithm in the doc to provide a score /100 per stock; take from google docs
# Ammar

scored_df = metrics_df.copy()

# Volatility scoring function
def vol_points(v):
    # v is daily volatility (e.g., 0.02 = 2%)
    if v < 0.02:
        return 45
    elif v < 0.03:
        return 40
    elif v < 0.04:
        return 35
    elif v < 0.05:
        return 25
    elif v < 0.06:
        return 15
    else:
        return 5

# Beta scoring function
def beta_points(b):
    if pd.isna(b):
        return 20  # neutral if missing
    if b < 0.6:
        return 35
    elif b < 0.9:
        return 25
    elif b < 1.1:
        return 20
    elif b < 1.3:
        return 10
    else:
        return 0

# Market cap scoring function
def cap_points(m):
    # thresholds in dollars
    if m > 200e9:
        return 20
    elif m >= 50e9:
        return 16
    elif m >= 10e9:
        return 12
    elif m >= 2e9:
        return 8
    else:
        return 4

# Apply scoring table
scored_df['VolPts']  = scored_df['Volatility'].apply(vol_points)
scored_df['BetaPts'] = scored_df['Beta'].apply(beta_points)
scored_df['CapPts']  = scored_df['MarketCap'].apply(cap_points)

# Final score out of 100
scored_df['Score'] = scored_df['VolPts'] + scored_df['BetaPts'] + scored_df['CapPts']

print(scored_df[['Ticker','Volatility','Beta','MarketCap','Sector','Score']])

# After scoring, put all stocks in lists based on sector
# Ammar

sector_dict = {}   # dictionary: sector â†’ list of tickers

for _, row in scored_df.iterrows():
    sector = row['Sector']
    ticker = row['Ticker']
    
    # create key if doesn't exist
    if sector not in sector_dict:
        sector_dict[sector] = []
    
    # append ticker
    sector_dict[sector].append(ticker)

# Preview the grouping
for sec, tics in sector_dict.items():
    print(f"{sec}: {tics}")

  market_hist = yf.download(market_index, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


     Ticker  Volatility      Beta      MarketCap                  Sector
0      AAPL    0.008717  0.729520  3985534877696              Technology
1      ABBV    0.008665 -0.554801   411641544704              Healthcare
2       ABT    0.007958  0.093310   219558903808              Healthcare
3       AIG    0.006295  0.028356    41965789184      Financial Services
4      AMZN    0.018096  2.008883  2380604112896       Consumer Cyclical
5       AXP    0.006994  0.461516   239828860928      Financial Services
6        BA    0.029839  3.358293   141149732864             Industrials
7       BAC    0.006427  0.016106   385309409280      Financial Services
8     BB.TO    0.006029  0.600508     3531513344              Technology
9      BIIB    0.008653 -0.362861    24635711488              Healthcare
10       BK    0.008580 -0.212121    75820441600      Financial Services
11      BLK    0.009149 -0.121249   157428531200      Financial Services
12      BMY    0.034901 -3.632762    93217136640   

## Forming the top 25 stocks dataframe
- To maximize our diversification, we will select the top 25 stocks according to the scores allocated by our scoring algorithm, which is the maximum amount we are allowed to have. This will protect our portfolio from single-company risk.
- Using the concept of diversification, we will also spread our selection of stocks accross all sectors so that our portfolio is not entirely dependent on the how well a specific sector performs. Thus, we will pick the top 5 stocks, or the maximum amount of stocks if there are less than 5, with the best score from each sector and from that, we will pick the top 25 stocks with the best scores. This way we will not have more than 5 stocks per sector.


In [80]:
# Take the top 5 from each sector (based on their score /100) and put them in a new dataframe
def sector_top5(df,sector):
    top5_count = 0
    sector_top5 = pd.DataFrame() # top 5 tickers of a sector with the best scoring
    for i in range(len(df)):
        if df.iloc[i]['Sector'] == sector:
            top5_count += 1
            if top5_count > 5:
                top5_count = 5
                smallest_score = sector_top5['Score'].min()
                smallest_score_index = sector_top5['Score'].idxmin()
                if df.iloc[i]['Score'] > smallest_score:
                    sector_top5.loc[smallest_score_index] = df.iloc[i]
            else:
                sector_top5 = pd.concat([sector_top5,df.iloc[[i]]])
    return sector_top5

sector_lst = list(sector_dict.keys()) # creates a list of all the sectors
all_sectors_top5 = pd.DataFrame()
for sector in sector_lst:
    df_sector_top5 = sector_top5(scored_df,sector)
    all_sectors_top5 = pd.concat([all_sectors_top5,df_sector_top5])

# Then return the top 25
# sort stocks from highest score to lowest
all_sectors_top5 = all_sectors_top5.sort_values('Score', ascending=False)
top_25_stocks = all_sectors_top5.head(25) # top 25 stocks
# reset index 
top_25_stocks = top_25_stocks.reset_index(drop=True)
print(top_25_stocks)

     Ticker  Volatility      Beta      MarketCap                  Sector  \
0       LLY    0.018934 -1.635101   940920995840              Healthcare   
1       BAC    0.006427  0.016106   385309409280      Financial Services   
2        PG    0.008120 -0.389964   344027955200      Consumer Defensive   
3       PEP    0.015298 -0.051315   201404956672      Consumer Defensive   
4      ABBV    0.008665 -0.554801   411641544704              Healthcare   
5       ABT    0.007958  0.093310   219558903808              Healthcare   
6        KO    0.013712 -0.054101   306033786880      Consumer Defensive   
7       MRK    0.006050  0.389409   237414318080              Healthcare   
8        PM    0.007728 -0.947957   242228559872      Consumer Defensive   
9       AXP    0.006994  0.461516   239828860928      Financial Services   
10       CL    0.010405 -0.507057    63421087744      Consumer Defensive   
11      BLK    0.009149 -0.121249   157428531200      Financial Services   
12       BK 

## Market Cap Mix: Check for a small-cap
To meet the requirement of at least one small-cap:
- Check if the 25th stock in our dataframe is a small-cap (we will need the last row of the dataframe to be a small-cap for the mininum variance portfolio optimization). If it is we will proceed with the minimum varaicnace portfolio optimization.
- If the last row is not a small-cap, check if there already is a small-cap in the entire top 25 stocks. If there is a small-cap, we will move the first small-cap we find in the dataframe to the last row to fufill the requirement for our minimum variance optimization.
- If our top 25 stocks dataframe does not contain a small-cap, we will take the best scoring small-cap from the entire list of tickers and we will replace the last row of our top 25 stocks with it.

In [82]:
# check for small cap in the last row of the top 25 stocks as code determining weight of portfolio will consider this
small_cap = 2e9
last_row_market_cap = top_25_stocks['MarketCap'].iloc[-1]
market_cap = top_25_stocks['MarketCap']
print(last_row_market_cap)
if last_row_market_cap < small_cap:
    print("Small cap in the 25th row found")
# move small cap to the last row if there is a small cap in the top 25 stocks
elif (market_cap < small_cap).any():
    for row in top_25_stocks.itertuples():
        if row.MarketCap < small_cap:
            top_25_stocks = top_25_stocks.drop(row)
            top_25_stocks = pd.concat([top_25_stocks,row])
# get the best scoring small cap from the dataframe of the top 5 of all sectors and replace it with the stock of the last row of the top 25 stocks            
else:
    small_cap_df = scored_df[scored_df['MarketCap'] < small_cap].copy()   
    small_cap_df = small_cap_df.sort_values('Score', ascending=False)
    top_score_small_cap = small_cap_df.iloc[0]
    # replace the last stock of the top 25 with the small cap with the best score
    top_25_stocks.iloc[-1] = top_score_small_cap
print(top_25_stocks)

807942144
Small cap in the 25th row found
     Ticker  Volatility      Beta      MarketCap                  Sector  \
0       LLY    0.018934 -1.635101   940920995840              Healthcare   
1       BAC    0.006427  0.016106   385309409280      Financial Services   
2        PG    0.008120 -0.389964   344027955200      Consumer Defensive   
3       PEP    0.015298 -0.051315   201404956672      Consumer Defensive   
4      ABBV    0.008665 -0.554801   411641544704              Healthcare   
5       ABT    0.007958  0.093310   219558903808              Healthcare   
6        KO    0.013712 -0.054101   306033786880      Consumer Defensive   
7       MRK    0.006050  0.389409   237414318080              Healthcare   
8        PM    0.007728 -0.947957   242228559872      Consumer Defensive   
9       AXP    0.006994  0.461516   239828860928      Financial Services   
10       CL    0.010405 -0.507057    63421087744      Consumer Defensive   
11      BLK    0.009149 -0.121249   1574285312

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.