In [None]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex
import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime
import scipy as sp 
from scipy.optimize import minimize
import itertools as itr
from threading import Thread

## Group Assignment
### Team Number: 15
### Team Member Names: Neil Zhang, Rahim Rehan, Krish Patel
### Team Strategy Chosen: Risk-Free 

In [79]:
def valid_tickers(csv_file_path):
   """
   valid_tickers is the prelimary checker, it reads a csv and then removes any tickers that trade less than 5000 shares a month

   :param csv_file_path: A string that represents the csv file name that will be read
   :return: A list of names of the valid tickers
   """
   tickers_df = pd.read_csv(csv_file_path, header=None)
   tickers_df.columns = ["Ticker"]
   ticker_list = tickers_df["Ticker"].tolist()

   # Bulk download the ticker information
   hist = yf.download(
       tickers=ticker_list,
       start="2024-10-01",
       end="2025-10-01",
       group_by="ticker",
       auto_adjust=False,
       threads=True
    )
   
   # Count rows per month per ticker
   counts = hist.groupby(hist.index.to_period("M")).transform("count")

   # Keep only rows where all tickers in that month have >= 18 trading days
   valid_days = counts >= 18

   # Remove the months that do not meet threshold and any tickers that don't have a large enough trading volume
   hist_filtered = hist[valid_days]
   vol_filtered = hist_filtered.xs("Volume", axis=1, level=1)
   avg_vol_filtered = vol_filtered.mean()
   valid_tickers = avg_vol_filtered[avg_vol_filtered >= 5000].index.tolist()

   return valid_tickers

#Begin threading set up
def get_format_info(ticker, ticker_data):
   """
   get_format_info is the threading worker function, it is what will be done in parallel. It finds wanted ticker info
   and adds it to a list

   :param ticker: The ticker whose info is wanted
   :param ticker_data: A list that keeps track of the ticker info thats been extracted
   """
   ticker_info = yf.Ticker(ticker).info

   # Get the currency, recall we only want USD and CAD traded stocks
   currency = ticker_info.get("currency")
   if (currency not in ["USD", "CAD"]):
      currency = "NaN"

   ticker_data.append((ticker, ticker_info.get("sector"), currency, ticker_info.get("marketCap")))

#Threading occurs
def format_tickers(csv_file_path):
   """
   format_tickers is the main function that performs threading onto the worker function and gets the dataframe of tickers and wanted info

   :param csv_file_path: A string that represents the csv file name that will be read
   :return: A Dataframe that contains all the info we want for every valid ticker
   """
   ticker_list = valid_tickers(csv_file_path)
   
   # Threading set up, ticker_data is the shared list where threads store their results, threads is the list of the thread objects
   ticker_data = []
   threads = []

   # Loop and create threads of each ticker in the ticker_list
   for t in ticker_list:
      thread = Thread(target=get_format_info, args=(t, ticker_data))
      threads.append(thread)
      thread.start()

   # Once all threads complete, join them together 
   for th in threads:
      th.join()

   # Create and clean up the Dataframe
   ticker_df = pd.DataFrame(ticker_data, columns=["Ticker","Sector","Currency","MarketCap"]) 
   ticker_df = ticker_df[ticker_df["Currency"] != "NaN"]
   ticker_df = ticker_df.reset_index(drop=True)
   return ticker_df

In [80]:
# Krish, Info Extraction 

returns_start = "2024-11-14"
returns_end = "2025-11-14"

# Function to return a list of all tickers (first column elements)
def get_ticker_list (tickers_df):
     """
     get_ticker_list returns a Python list of tickers

     :param tickers_df: A Dataframe of tickers
     :return: A list of tickers
     """
     return tickers_df.iloc[:, 0].tolist()

# Gets weekly closes of all the stocks in a list of tickers
def get_weekly_closes (ticker_lst, start_date, end_date):
    """     
    get_weekly_closes performs data extraction to get weekly closing price info of stocks

    :param ticker_lst: List that holds all tickers
    :param start_date: start date for calculations
    :param end_date: end date for calculations
    :return: returns a Dataframe that holds the weekly closing price of each ticker on every week
    """
    #Define a dataframe to hold weekly close prices (checks every friday)
    cols = [] # list of Series to concat
    #Extract the weekly close prices and store them in the dataframe
    for i in ticker_lst:
        ticker = yf.Ticker(i)
        data = ticker.history(start=start_date, end=end_date)
        data.index = pd.to_datetime(data.index) # ensure datetime index
        #last() takes the last trading price of the week
        series = data['Close'].resample('W-FRI').last()
        series.name = f"Close {i}" # name each column
        cols.append(series) # store weekly closes for each ticker
        
    # Concatenate all ticker series at once to avoid fragmentation
    weekly_closes = pd.concat(cols, axis=1)
    # Strip time
    weekly_closes.index = weekly_closes.index.strftime('%Y-%m-%d')
    return weekly_closes

# Creates a df with the (weekly) %change for each column
def get_percent_change (closes, start_date, end_date):
    """     
    get_percent_change performs calculations that determines the percent change of closing prices

    :param closes: Dataframe that holds closing prices
    :param start_date: start date for calculations
    :param end_date: end date for calculations
    :return: returns a Dataframe that holds percent changes of closing prices
    """
    cols = [] # list of Series to concat
    
    for i in closes:
        col_name = i[6:] # name each column by the ticker from "Close ---"
        # calculate %change
        series = closes[i].pct_change(fill_method=None) # Fill_method=None to hande delisted stocks
        series.name = f"% Change {col_name}" # name each column
        cols.append(series) # store %change for each ticker

    # Concatenate all ticker series at once to avoid fragmentation
    percent_change = pd.concat(cols, axis=1)
    return percent_change

# Calculate covariance, correlation, variance, standard deviation
def get_calculations(ticker_list, start_date, end_date):
    """     
    get_calculations performs calculations for covariance, correlation, variance, standard deviation

    :param ticker_list: list of all tickers
    :param start_date: start date for calculations
    :param end_date: end date for calculations
    :return: returns a dictionary that holds covariance, correlation, standard deviation, and variance 
    """
    weekly_closes = get_weekly_closes(ticker_list, start_date, end_date)
    weekly_percent_change = get_percent_change(weekly_closes, start_date, end_date)
    covariance_matrix = {
        'Covariance': weekly_percent_change.cov(),
        'Correlation': weekly_percent_change.corr(),
        'Variance': weekly_percent_change.var(),
        'Std_Dev': weekly_percent_change.std()}
    return covariance_matrix

info_df = format_tickers("Extended_Tickers_Example.csv") #NOTE: CHANGE FILE NAME BEFORE SUBMITTING
ticker_list = (get_ticker_list(info_df)) # List of all tickers
primary_calculations = get_calculations(ticker_list, returns_start, returns_end) # Covariance matrix
"""
# Access each piece like:
display(primary_calculations['Covariance'])
display(primary_calculations['Std_Dev'])
"""

[*********************100%***********************]  143 of 143 completed

10 Failed downloads:
['DFS', 'GIB.A.TO', 'ATVI', 'AGN', 'CELG', 'BRK.B', 'PTR', 'MON', 'ZZZ.TO', 'RTN']: YFTzMissingError('possibly delisted; no timezone found')


"\n# Access each piece like:\ndisplay(primary_calculations['Covariance'])\ndisplay(primary_calculations['Std_Dev'])\n"

In [81]:
# Neil opimization models 
# This is the function we want to minimize, aka the minimum variance function
def port_variance(weights, cov_matrix):
    """     
    port_variance is the function that calculates the variance of a portfolio. It performs 
    dot product/matrix multiplication on the weights and covariance matrixes. 

    :param weights: an array that represents the weight of each asset
    :param cov_matrix: a 2D matrix that represents the covariance between each asset 
    :return: variance of the portfolio
    """
    weights_col = weights.reshape(-1, 1) # Turns into column vector
    port_var = np.dot(weights_col.transpose(), (np.dot(cov_matrix, weights_col))) # Doing dot product 
    return port_var[0][0]

In [82]:
# This code is what runs the primary function

# Primary minimization, there is bounds in this 
def primary_minimization(cov_matrix):
    """     
    primary_minimization is the function that finds the weightings that result in the mimimum variance. 
    It performs this using scipy optimization. This perimary version does not consider bounds. 

    :param cov_matrix: a 2D matrix that represents the covariance between each asset 
    :return: returns the minimum variance and the weightings associated with that
    """
    num_assets = cov_matrix.shape[0]
    initial_weight = [1/num_assets] * num_assets # The initial guess of the weights

    constraint = {
        'type':'eq', # Constraint type is equality
        'fun': lambda w: sum(w) - 1 # The function's weight's must sum to 1
        }
    
    weight_bounds = [(0, 1)] * num_assets # Does not allow short selling
    
    # Finds the resilt of the minimization of the port_variance function, using the initial guess, keeping the cov_matrix constant using the SLSQP method, and with the above listed constraint
    result = minimize(fun=port_variance, x0=initial_weight, args=(cov_matrix,), method='SLSQP', bounds=weight_bounds, constraints=constraint)
    return result.fun, result.x

pd_cov_matrix = primary_calculations['Covariance']
numpy_cov_matrix = pd_cov_matrix.to_numpy() # Matrix of covariances of assets
primary_var, primary_weights = primary_minimization(numpy_cov_matrix)

Secondary Optimization logic: Let n be the size of the tickers, we now have an optimized weighting for those n stocks
We want to find the most optimal set up of 10-25 stocks out of those n. To do so we will try every combination, however if we wanted to brute force from those n stocks it'd take an absurd amount of compute. Instead we will take into the fact that we have the weightings of the (unconstrained) optimization. The higher the weighting of an asset, the more important it is to the minimizing the  variance, thus we will sort the n-optimized assets from highest to lowest weighting. Starting from the top we will then build a 10-25 asset size portfolio and calculate the variance of each portfolio, finding the one with the least variance. 

We must also consider the fact that each portfolio has restraints, aka the min/max weighting of one stock, the max amount of sectors, and the mkt caps
In regards to the weighting rules, those can be implemented via scipy's minimization constraints, making all weightings are in a certain range
In regards to the max amount of sectors, when building the portfolios we will keep count of the sectors, if any sector exceeds a certain amount such that are over represented, we skip an asset and move on
In regards to the small and large mkt cap, we can check after building the portfolio, if one of them is missing we can delete the lowest weighted (least important) asset and then add in a new asset from the list that satisfies the missing mkt cap

In [83]:
# The secondary optimization that includes the bounds 
def secondary_minimization(cov_matrix):
    """     
    secondary_minimization is the function that finds the weightings that result in the mimimum variance while considering the bounds.
    That is, it ensures the weightings 

    :param cov_matrix: a 2D matrix that represents the covariance between each asset 
    :return: returns the minimum variance and the weightings associated with that
    """
    num_assets = len(cov_matrix[0]) 
    initial_weight = [1/num_assets] * num_assets # The initial guess of the weights

    min_weight = (100/(2*num_assets))/100 # Do not need to include portfolio value, because 1 is the portfolio value (and sum of weights)
    max_weight = 0.15 # Same as above

    constraint = {
        'type':'eq', # Constraint type is equality
        'fun': lambda w: sum(w) - 1 # The function's weight's must sum to 1
        }
    
    weight_bounds = [(min_weight, max_weight)] * num_assets
    
    # Finds the resilt of the minimization of the port_variance function, using the initial guess, keeping the cov_matrix constant using the SLSQP method, and with the above listed constraint
    result = minimize(fun=port_variance, x0=initial_weight, args=(cov_matrix,), method='SLSQP', bounds = weight_bounds, constraints=constraint)
    return result.fun, result.x

In [84]:
info_df['weight'] = primary_weights #Assume info_df holds the tickers, sector, market cap, etc and not the dates etc. We now add the weights.
ordered_info_df = info_df.copy().sort_values('weight', ascending = False).reset_index(drop=True)

ordered_info_df

Unnamed: 0,Ticker,Sector,Currency,MarketCap,weight
0,DG,Consumer Defensive,USD,22384783360,0.049195
1,FTS.TO,Utilities,CAD,36722364416,0.037503
2,CME,Financial Services,USD,98476187648,0.037267
3,DUK,Utilities,USD,95496806400,0.036014
4,AEP,Utilities,USD,64681492480,0.036006
...,...,...,...,...,...
127,CSCO,Technology,USD,300678283264,0.000000
128,CTAS,Industrials,USD,74868211712,0.000000
129,C,Financial Services,USD,181696626688,0.000000
130,BABA,Consumer Cyclical,USD,364890390528,0.000000


In [85]:
# Code that creates a portfolio that matches the requirements

# Determines all the indexes in a portfolio that are large cap stocks
def is_lg_cap(portfolio):
    """
     is_lg_cap determines all the indexes in a portfolio that are large cap stocks

     :param portfolio: A list of Series objects that contain a stock's info 
     :return: A list of indexes of ticker's that are large cap
     """
    lg_cap = []
    for i in range(len(portfolio)):
        if portfolio[i]['MarketCap'] > 10_000_000_000:
            lg_cap.append(i)
    return lg_cap

# Determines all the indexes in a portfolio that are small cap stocks
def is_sm_cap(portfolio):
    """
     is_sm_cap determines all the indexes in a portfolio that are small cap stocks

     :param portfolio: A list of Series objects that contain a stock's info 
     :return: A list of indexes of ticker's that are small cap
     """
    sm_cap = []
    for i in range(len(portfolio)):
        if portfolio[i]['MarketCap'] < 2_000_000_000:
            sm_cap.append(i)
    return sm_cap

# Determines if an individual stock is a large market cap
def is_lg(row):
    return row['MarketCap'] > 10_000_000_000

# Determines if an individual stock is a small market cap
def is_sm(row):
    return row['MarketCap'] < 2_000_000_000

# Determines the index of the least important stock. Least important in this case is the one with the lowest weighting in the ordered list, but
# if the least important is either the ONLY SMALL or LARGE cap stock then the second least important stock is now designated the least important
def find_least_imp(portfolio, lg_indxs, sm_indxs):
    """ 
    find_least_imp Determines the index of the least important stock. Least important in this case is the one with the lowest weighting in the ordered list, but 
    if the least important is either the ONLY SMALL or LARGE cap stock then the second least important stock is now designated the least important

    :param portfolio: A list of stock data in Series format
    :param lg_indxs: A list of all indexes that hold large cap stocks
    :param sm_indxs: A list of all indexes that hold small cap stocks
    :return: integer that represents the index that is desginated least important
    """

    only_large_idx = None
    only_small_idx = None

    # Determines the protected indexes
    if len(lg_indxs) == 1:
        only_large_idx = lg_indxs[0]
    if len(sm_indxs) == 1:
        only_small_idx = sm_indxs[0]

    for i in range(len(portfolio) - 1, -1, -1):
        if i != only_large_idx and i != only_small_idx:
            return i

def valid_port_check(ticker_list):
    """
    valid_port_check checks if a portfolio fits all sector and weighting constraints

    :param ticker_list: A list of ticker names
    :return: Boolean value depending on if its true or not
    """
    portfolio = [] # Will be a list of Series
    port_sectors = []
    max_sector_num = int(len(ticker_list) * 0.4)

    # Creates preliminary portfolio 
    for i in range(len(ticker_list)):
        cur = ordered_info_df[ordered_info_df["Ticker"] == ticker_list[i]].iloc[0]
        cur_sector = cur['Sector']
        portfolio.append(cur)
        port_sectors.append(cur_sector)
    
    for s in port_sectors:
        if ((port_sectors.count(s)) > max_sector_num):
            return False

    num_lg = len(is_lg_cap(portfolio))
    num_sm = len(is_sm_cap(portfolio))
    if (num_lg == 0 or num_sm == 0):  
        return False
    
    return True


# Creates a portfolio that is valid 
def create_portfolio(size):
    """ 
    create_portfolio creates a valid portfolio of a certain size

    :param size: the size of the portfolio
    :return: a list of tickers that are in the portfolio, or none if it cannot create a portfolio that satisfies all requirements
    """

    portfolio = [] # Will be a list of Series
    port_sectors = []
    i = 0
    max_sector_num = int(size * 0.4)
    ticker_only = []
    
    # Creates preliminary portfolio 
    while len(portfolio) < size:
        if i >= len(ordered_info_df):
            return None
        cur = ordered_info_df.iloc[i]
        cur_sector = cur['Sector']

        # Ensures that no sector is above max weight
        if (port_sectors.count(cur_sector) < max_sector_num):
            portfolio.append(cur)
            port_sectors.append(cur_sector)
        i += 1

    lg_idxs = is_lg_cap(portfolio)
    sm_idxs = is_sm_cap(portfolio)

    # Fixes the no large market cap issue
    while len(lg_idxs) == 0: 
        replaced = find_least_imp(portfolio, lg_idxs, sm_idxs)
        if i >= len(ordered_info_df):
            return None
        cur = ordered_info_df.iloc[i]
        cur_sector = cur['Sector']

        temp_sectors = port_sectors.copy()
        temp_sectors.pop(replaced)
        
        if (is_lg(cur)) and (temp_sectors.count(cur_sector) < max_sector_num):
            portfolio[replaced] = cur
            port_sectors[replaced] = cur_sector

            lg_idxs = is_lg_cap(portfolio)
            sm_idxs = is_sm_cap(portfolio)
        i += 1

    # Fixes the no small market cap issues
    while len(sm_idxs) == 0: 
        replaced = find_least_imp(portfolio, lg_idxs, sm_idxs)
        if i >= len(ordered_info_df):
            return None
        cur = ordered_info_df.iloc[i]
        cur_sector = cur['Sector']

        temp_sectors = port_sectors.copy()
        temp_sectors.pop(replaced)
        
        if (is_sm(cur)) and (temp_sectors.count(cur_sector) < max_sector_num):
            portfolio[replaced] = cur
            port_sectors[replaced] = cur_sector

            lg_idxs = is_lg_cap(portfolio)
            sm_idxs = is_sm_cap(portfolio)
        i += 1
    
    for stock in portfolio:
        ticker_name = stock["Ticker"]
        ticker_only.append(ticker_name)
    
    return ticker_only

In [86]:
# Code that creates the portfolio of 10-25, and then sees which one is the most optimal 

all_ports = []
all_variance = []
all_weights = []
count = 0
while (count + 10) < 26:
    port = create_portfolio(count+10)
    if port is None:
        print(f"Failed to build portfolio of size {count+10}, skipping.")
        count += 1
        continue

    all_ports.append(port)

    temp_cov = get_calculations(all_ports[count], returns_start, returns_end)
    cov_np = temp_cov['Covariance'].to_numpy()
    temp_var, temp_weights = secondary_minimization(cov_np)

    all_variance.append(temp_var)
    all_weights.append(temp_weights)
    count += 1

if not all_variance:
    print("No valid portfolios were generated for some reason. Please check ticker csv.")
else:
    smallest_var = min(all_variance)
    index = all_variance.index(smallest_var)
    target = [smallest_var, all_ports[index], all_weights[index]]
    print(f"The smallest variance found is {target[0]} which is determined from the following portfolio: {target[1]}, at the following weights {target[2]}.")

The smallest variance found is 7.889821209706994e-05 which is determined from the following portfolio: ['DG', 'FTS.TO', 'CME', 'DUK', 'AEP', 'EXC', 'ENB.TO', 'WN.TO', 'BNS.TO', 'UL', 'BB.TO', 'BTI', 'LMT', 'KO', 'T.TO', 'SLF.TO', 'BCE.TO', 'COST', 'ATD.TO', 'TRP.TO', 'GOOG', 'WCN.TO', 'RY.TO', 'NA.TO', 'KITS.TO'], at the following weights [0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04].


In [87]:
def get_close_prices_and_rate(tickers, target_date, end_date):
    """
    get_close_prices_and_rate finds the closing price of tickers on a date, and the exchange rate on a date

    :param tickers: list of tickers
    :param target_data: the day of price we want, normally most recent business day
    :param end_date: the day after, as yfinance is not inclusive
    :return: a Series that contains the target days close price
    :return: the USD to CAD exchange rate
    """
    data = yf.download(tickers, start=target_date, end=end_date)["Close"]
    close_prices = data.iloc[0]

    exchange_rate = yf.download("CAD=X", start=target_date, end=end_date)["Close"]
    exchange_rate = exchange_rate.iloc[0]

    return close_prices, exchange_rate.item()

def purchase_flat_fee(df, budget, exchange_rate):
    """
    purchase_flat_fee transforms a Dataframe to include the amount of shares bought with a flat fee, and the value of that

    :param df: The Dataframe that will be transformed
    :param budget: An Integer representing the budget
    :param exchange_rate: A series of the USD-CAD exchange rate
    :return: Transformed Dataframe
    """

    df["Shares Bought Flat Fee"] = (df["Weight"] * (budget - (2.5*exchange_rate))) / df["Price"]
    df["Flat Fee Worth"] = df["Shares Bought Flat Fee"] * df["Price"]

    return df 

def purchase_variable_fee(df, budget, exchange_rate):
    """
    purchase_variable_fee transforms a Dataframe to include the amount of shares bought with the share-dependent rate

    :param df: The Dataframe that will be transformed
    :param budget: An Integer representing the budget
    :param exchange_rate: A series of the USD-CAD exchange rate
    :return: Transformed Dataframe
    """
    df["Shares w/o Fee"] = (df["Weight"] * budget) / df["Price"]
    total_shares = df["Shares w/o Fee"].sum()
    
    variable_fee_usd = total_shares * 0.001
    variable_fee_cad = variable_fee_usd * exchange_rate

    adjusted_budget = budget - variable_fee_cad
    df["Shares Bought Variable Fee"] = (df["Weight"] * adjusted_budget) / df["Price"]
    df["Variable Fee Worth"] = df["Shares Bought Variable Fee"] * df["Price"]

    return df

def ideal_shares(df):
    """ 
    ideal_shares determines which fee strategy, flat or variable, is the most optimal and transforms Dataframe accordingly

    :param df: The Dataframe that will be transformed
    :return: Transformed Dataframe
    """
    sum_flat_fee = df["Flat Fee Worth"].sum()
    sum_variable_fee = df["Variable Fee Worth"].sum()

    if (sum_flat_fee < sum_variable_fee):
        df.drop(["Shares Bought Flat Fee", "Flat Fee Worth", "Shares w/o Fee"], axis=1, inplace=True)
        df.rename(columns={"Shares Bought Variable Fee":"Shares", "Variable Fee Worth":"Value"}, inplace=True)
    else:
        df.drop(["Shares Bought Variable Fee", "Variable Fee Worth", "Shares w/o Fee"], axis=1, inplace=True)
        df.rename(columns={"Shares Bought Flat Fee":"Shares", "Flat Fee Worth":"Value"}, inplace=True)
    
    return df 

def add_currency(df_small, df_large):
    """ 
    add_currency adds the currency type onto the Dataframe

    :param df_small: The Dataframe that holds the shares bought
    :param_df_large: The Dataframe the holds the Currency's
    :return: Transformed Dataframe
    """

    df_with_currency = df_small.merge(df_large[["Ticker", "Currency"]], on="Ticker", how="left")
    return df_with_currency

def convert_closing(df, exchange_rate):
    """
    convert_closing converts the price of any stock thats in USD to CAD

    :param df: Dataframe to be transformed
    :param exchange_rate: The USD-CAD rate
    :return: Dataframe with converted prices
    """

    df.loc[df["Currency"] == "USD", "Price"] = df["Price"] * exchange_rate
    return df

In [88]:
temp_df = pd.DataFrame({
    "Ticker": target[1],
    "Weight": target[2]
})

target_date = "2025-11-18" #Example
end_date = "2025-11-19" #Example
closing, usd_cad_rate = get_close_prices_and_rate(target[1], target_date, end_date)
temp_df["Price"] = closing.to_numpy()
temp_df = add_currency(temp_df,ordered_info_df)
temp_df_cad = convert_closing(temp_df, usd_cad_rate)
temp_df_cad = purchase_flat_fee(temp_df_cad, 1_000_000, usd_cad_rate)
temp_df_cad = purchase_variable_fee(temp_df_cad, 1_000_000, usd_cad_rate)
Portfolio_Final = ideal_shares(temp_df_cad)

Stocks_Final = Portfolio_Final.copy()
Stocks_Final = Stocks_Final.drop(columns=["Currency", "Weight", "Price"], errors="ignore")
#Stocks_Final.to_csv("Stocks_Group_15.csv", index=False)

sum_weight = Portfolio_Final["Weight"].sum() # Doesn't return 1 properly because floats can't sum to exact integers
sum_shares = Portfolio_Final["Shares"].sum()
sum_value = Portfolio_Final["Value"].sum()
print(f"The sum of the weights is {sum_weight}, the sum of the shares is {sum_shares}, the sum of the values is {sum_value}.")

  data = yf.download(tickers, start=target_date, end=end_date)["Close"]
[*********************100%***********************]  25 of 25 completed
  exchange_rate = yf.download("CAD=X", start=target_date, end=end_date)["Close"]
[*********************100%***********************]  1 of 1 completed

The sum of the weights is 1.0, the sum of the shares is 17500.606131598044, the sum of the values is 999996.4873749018.





In [None]:
Portfolio_Final

Unnamed: 0,Ticker,Weight,Price,Currency,Shares,Value
0,DG,0.04,173.537733,USD,230.496611,39999.859495
1,FTS.TO,0.04,69.510002,CAD,575.45473,39999.859495
2,CME,0.04,8.458401,USD,4729.009479,39999.859495
3,DUK,0.04,45.25666,USD,883.8447,39999.859495
4,AEP,0.04,133.198748,USD,300.302068,39999.859495
5,EXC,0.04,77.081046,USD,518.932495,39999.859495
6,ENB.TO,0.04,279.279999,CAD,143.224934,39999.859495
7,WN.TO,0.04,895.080017,CAD,44.688585,39999.859495
8,BNS.TO,0.04,103.330002,CAD,387.107895,39999.859495
9,UL,0.04,173.945199,USD,229.956674,39999.859495


In [90]:
# --- Third Optimization --- #

# Make a list of the top 30 most important stocks
ordered_ticker_list = get_ticker_list(ordered_info_df)
ordered_ticker_list = ordered_ticker_list[0:30]


def create_portfolio_combs(ticker_list):
    if not valid_port_check(ticker_list):
        return None, None, None
    else:
        temp_cov2 = get_calculations(ticker_list, returns_start, returns_end)
        cov_np2 = temp_cov2['Covariance'].to_numpy()
        temp_var2, temp_weights2 = secondary_minimization(cov_np2)

        # Returns the inputted list, the variance, and a list of the weights
        return ticker_list, temp_var2, temp_weights2


def create_optimal_portfolio(ordered_ticker_list):
    # Create list of every possible portfolio
    potential_portfolios = list(itr.combinations(ordered_ticker_list, 25))

    # Initialize the optimal portfolio data
    optimal_port = None
    optimal_var = float("inf")       # Infinitely high float
    optimal_weights = None

    for i in potential_portfolios:
        current_port, current_var, current_weights = create_portfolio_combs(i)

        # Current portfolio failed the requirements
        if current_port is None:
            continue # skip
        
        # If the current portfolio has a lower variance than any previous ones
        if current_var <= optimal_var:
            # Set the optimal portfolio data to the current
            optimal_port = current_port
            optimal_var = current_var
            optimal_weights = current_weights

    return optimal_port, optimal_var, optimal_weights


optimal_portfolio, optimal_variance, optimal_weights = create_optimal_portfolio(ordered_ticker_list)


KeyboardInterrupt: 

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here. 