In [60]:
from IPython.display import display, Math, Latex
import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime, timedelta
import scipy as sp 
from scipy.optimize import minimize
from threading import Thread

## Portfolio Optimizer

In [61]:
# CSV Ticker file path
csv_file_path = "Tickers.csv"

# Date ranges used to analyze the percent returns of stocks
returns_start = "2024-11-14" 
returns_end = "2025-11-14" 

# Date ranges used to determine volume 
vol_start = "2024-10-01"
vol_end = "2025-10-01"

# Volume filtering
min_avg_vol = 5000
min_trading_days_per_month = 18

# Portfolio size constraints
min_port_size = 15
max_port_size = 25

# Market cap constraints (in USD)
small_cap_threshold = 2_000_000_000
large_cap_threshold = 10_000_000_000

# Weight constraints (per stock)
min_weight = None # Later will be set to the formula: (100/(2*num_assets))/100
max_weight = 0.15

# Sector constraints
max_sector_weight = 0.40

# Budget
total_budget = 1_000_000

# Fee structure
flat_fee_cad = 2.5
variable_fee_per_share = 0.001

# Max iterations when trying to create a succesful portfolio
max_iterations = 20

In [62]:
def valid_tickers(csv_file_path):
   """
   valid_tickers is the prelimary checker, it reads a csv and then removes any tickers that trade less than the minimum volume

   :param csv_file_path: A string that represents the csv file name that will be read
   :return: A list of names of the valid tickers
   """
   
   print(f"Now loading tickers from {csv_file_path}.")

   # Scrape for unique tickers
   try:
      tickers_df = pd.read_csv(csv_file_path, header=None)
      tickers_df.columns = ["Ticker"]
      tickers_df["Ticker"] = tickers_df["Ticker"].str.strip()
      tickers_df = tickers_df.drop_duplicates()
      ticker_list = tickers_df["Ticker"].tolist()

      print(f"{len(ticker_list)} unique tickers have been found in the csv. Now downloading tickers.")
   except FileNotFoundError:
      raise FileNotFoundError(f"Could not find file path to {csv_file_path}.")

   # Bulk download the ticker information
   hist = yf.download(
       tickers=ticker_list,
       start=vol_start,
       end=vol_end,
       group_by="ticker",
       auto_adjust=False,
       threads=True,
       progress=False
    )
   
   if hist.empty:
      raise ValueError("No data downloaded, check ticker symbols and date range.")

   # Count the rows per month per ticker
   trading_days_per_month = hist.groupby(hist.index.to_period("M")).transform("count")

   # Keep only rows where all tickers in that month exceed configured minimum
   valid_days_mask = trading_days_per_month >= min_trading_days_per_month
   hist_filtered = hist[valid_days_mask]

   # Remove the months that do not meet threshold and any tickers that don't have a large enough trading volume
   vol_filtered = hist_filtered.xs("Volume", axis=1, level=1)
   avg_vol_filtered = vol_filtered.mean()
   valid_tickers = avg_vol_filtered[avg_vol_filtered >= min_avg_vol].index.tolist()

   return valid_tickers

In [63]:
#Begin threading set up
def get_format_info(ticker, ticker_data):
   """
   get_format_info is the threading worker function, it is what will be done in parallel. It finds wanted ticker info
   and adds it to a list

   :param ticker: The ticker whose info is wanted
   :param ticker_data: A list that keeps track of the ticker info thats been extracted
   """
   try:
      ticker_info = yf.Ticker(ticker).info

      # Check currency
      currency = ticker_info.get("currency")
      if (currency not in ["CAD", "USD"]):
         return
      
      # Check if its an equity
      if ticker_info.get("quoteType") != "EQUITY": 
         return
      
      ticker_data.append((
            ticker,
            ticker_info.get("sector"),
            currency,
            ticker_info.get("marketCap")))
      
   except:
      pass

#Threading occurs
def format_tickers(csv_file_path):
   """
   format_tickers is the main function that performs threading onto the worker function and gets the dataframe of tickers and wanted info

   :param csv_file_path: A string that represents the csv file name that will be read
   :return: A Dataframe that contains all the info we want for every valid ticker
   """
   ticker_list = valid_tickers(csv_file_path)

   print(f"Fetching info for {len(ticker_list)} tickers.")
   
   # Threading for parallel API calls
   ticker_data = []
   threads = []

   # Loop and create threads of each ticker in the ticker_list
   for t in ticker_list:
      thread = Thread(target=get_format_info, args=(t, ticker_data))
      threads.append(thread)
      thread.start()

   # Once all threads complete, join them together 
   for th in threads:
      th.join()

   # Create and clean up the Dataframe
   ticker_df = pd.DataFrame(ticker_data, columns=["Ticker","Sector","Currency","MarketCap"]) 
   ticker_df = ticker_df.dropna(subset=["Sector", "MarketCap"])
   ticker_df = ticker_df.reset_index(drop=True)

   print(f"{len(ticker_df)} tickers with complete information found.")

   return ticker_df

In [64]:
# Gets weekly closes of all the stocks in a list of tickers
def get_weekly_closes (ticker_list):
    """     
    get_weekly_closes performs data extraction to get weekly closing price info of stocks

    :param ticker_lst: List that holds all tickers
    :return: returns a Dataframe that holds the weekly closing price of each ticker on every week
    """
    print(f"Downloading price data for {len(ticker_list)} tickers.")

    price_hist = yf.download(
        tickers=ticker_list,
        start=returns_start,
        end=returns_end,
        group_by="ticker",
        auto_adjust=True,
        threads=True,
        progress=False,
        multi_level_index=False
    )

    if price_hist.empty:
        raise ValueError("No data downloaded, check date range and tickers.")
    
    # yf.download handles single ticker and multiple ticker download cases differently
    if len(ticker_list) == 1:
        close_data = price_hist[["Close"]]
        close_data.columns = ticker_list
    else:
        close_data = price_hist.xs('Close', axis=1, level=1)

    # Get the weekly closing price, and then clean the index
    weekly_closes = close_data.resample("W-FRI").last()
    weekly_closes.index = weekly_closes.index.strftime('%Y-%m-%d')

    print(f"Completed download of {len(weekly_closes)} tickers.")

    return weekly_closes

# Creates a df with the (weekly) %change for each column
def get_percent_change (closes):
    """     
    get_percent_change performs calculations that determines the percent change of closing prices

    :param closes: Dataframe that holds closing prices
    :return: returns a Dataframe that holds percent changes of closing prices
    """

    percent_change = closes.pct_change(fill_method=None)

    return percent_change

# Calculate covariance, correlation, variance, standard deviation
def get_calculations(ticker_list, start_date, end_date):
    """     
    get_calculations performs calculations for covariance, correlation, variance, standard deviation

    :param ticker_list: list of all tickers
    :param start_date: start date for calculations
    :param end_date: end date for calculations
    :return: returns a dictionary that holds covariance, correlation, standard deviation, and variance 
    """
    weekly_closes = get_weekly_closes(ticker_list)
    weekly_percent_change = get_percent_change(weekly_closes)

    covariance_matrix = {
        'Covariance': weekly_percent_change.cov(),
        'Correlation': weekly_percent_change.corr(),
        'Variance': weekly_percent_change.var(),
        'Std_Dev': weekly_percent_change.std(),
        'Returns': weekly_percent_change}
    
    return covariance_matrix

In [65]:
# This is the function we want to minimize, aka the minimum variance function
def port_variance(weights, cov_matrix):
    """     
    port_variance is the function that calculates the variance of a portfolio. It performs 
    dot product/matrix multiplication on the weights and covariance matrixes. 

    :param weights: an array that represents the weight of each asset
    :param cov_matrix: a 2D matrix that represents the covariance between each asset 
    :return: variance of the portfolio
    """
    weights_col = weights.reshape(-1, 1) # Turns into column vector
    port_var = np.dot(weights_col.transpose(), (np.dot(cov_matrix, weights_col))) # Doing dot product 
    return port_var[0][0]

In [66]:
# Primary minimization, there are no bounds in this 
def unbounded_optimization(cov_matrix):
    """     
    unbounded_optimization finds the weightings that result in the mimimum variance without considering any constraints. 
    This is the first-stage optimization. Used to create df that is used to create portfolios. 

    :param cov_matrix: a 2D matrix that represents the covariance between each asset 
    :return: returns the minimum variance and the weightings associated with that
    """
    num_assets = cov_matrix.shape[0]
    initial_weight = [1/num_assets] * num_assets # The initial guess of the weights

    constraint = {
        'type':'eq', # Constraint type is equality
        'fun': lambda w: sum(w) - 1 # The function's weight's must sum to 1
        }
    
    weight_bounds = [(0, 1)] * num_assets # Does not allow short selling
    
    # Finds the resilt of the minimization of the port_variance function, using the initial guess, keeping the cov_matrix constant using the SLSQP method, and with the above listed constraint
    result = minimize(fun=port_variance, x0=initial_weight, args=(cov_matrix,), method='SLSQP', bounds=weight_bounds, constraints=constraint)
    
    return result.fun, result.x

In [67]:
# The secondary optimization that includes the bounds 
def bounded_optimization(cov_matrix, min_weight, max_weight):
    """     
    bounded_optimization finds the weightings that result in the mimimum variance while considering the bounds.
    This is the second-stage optimization.

    :param cov_matrix: a 2D matrix that represents the covariance between each asset 
    :return: returns the minimum variance and the weightings associated with that
    """
    num_assets = len(cov_matrix[0]) 
    initial_weight = [1/num_assets] * num_assets # The initial guess of the weights

    constraint = {
        'type':'eq', # Constraint type is equality
        'fun': lambda w: sum(w) - 1 # The function's weight's must sum to 1
        }
    
    weight_bounds = [(min_weight, max_weight)] * num_assets
    
    # Finds the resilt of the minimization of the port_variance function, using the initial guess, keeping the cov_matrix constant using the SLSQP method, and with the above listed constraint
    result = minimize(fun=port_variance, x0=initial_weight, args=(cov_matrix,), method='SLSQP', bounds = weight_bounds, constraints=constraint)
    
    return result.fun, result.x

# Checks for small-cap and large-cap stock in portfolio

In [68]:
def ensure_cap_diversity(portfolio_df, all_tickers_df):
    """
    Checks if a portfolio has both small and large cap stocks

    :param portfolio_df: A dataframe of the current portfolio
    :param all_tickers_df: Full ticker dataframe to find replacements, has info regarding market cap, sector, currency, etc, pre-sorted
    :return: Fixed portfolio (or original if its already diversified)
    """

    num_small = (portfolio_df['MarketCap'] < small_cap_threshold).sum()
    num_large = (portfolio_df['MarketCap'] > large_cap_threshold).sum()

    small_cap_stocks = all_tickers_df[(all_tickers_df["MarketCap"] < small_cap_threshold) & (~all_tickers_df["Ticker"].isin(portfolio_df["Ticker"]))]
    large_cap_stocks = all_tickers_df[(all_tickers_df["MarketCap"] > large_cap_threshold) & (~all_tickers_df["Ticker"].isin(portfolio_df["Ticker"]))]
    
    # Passes cap diversity
    if num_small >=1 and num_large >= 1:
        print("Portfolio passes cap diversity.")
        return portfolio_df
    
    # Must add small cap
    if num_small == 0:
        print("Adding small cap stock.")

        if len(small_cap_stocks) == 0:
            print("No small cap stocks availaible. Cannot ensure diversity, returning nothing.")
            return None
        
        if num_large > 1:
            mid_large_mask = portfolio_df["MarketCap"] > small_cap_threshold
            replace_idx = portfolio_df[mid_large_mask].index[-1]
        else:
            mid_mask = (portfolio_df["MarketCap"] > small_cap_threshold) & (portfolio_df["MarketCap"] < large_cap_threshold)
            replace_idx = portfolio_df[mid_mask].index[-1]

        portfolio_df = portfolio_df.drop(replace_idx)
        portfolio_df = pd.concat([portfolio_df, small_cap_stocks.head(1)], ignore_index=True)
    
    # Must add large cap
    if num_large == 0:
        print("Adding large cap stock")

        if len(large_cap_stocks) == 0:
            print("No large cap stocks availaible. Cannot ensure diversity, returning nothing.")
            return None
        
        if num_small > 1:
            mid_small_mask = portfolio_df["MarketCap"] < large_cap_threshold
            replace_idx = portfolio_df[mid_small_mask].index[-1]
        else:
            mid_mask = (portfolio_df["MarketCap"] > small_cap_threshold) & (portfolio_df["MarketCap"] < large_cap_threshold)
            replace_idx = portfolio_df[mid_mask].index[-1]

        portfolio_df = portfolio_df.drop(replace_idx)
        portfolio_df = pd.concat([portfolio_df, large_cap_stocks.head(1)], ignore_index=True)

    num_small_after = (portfolio_df['MarketCap'] < small_cap_threshold).sum()
    num_large_after = (portfolio_df['MarketCap'] > large_cap_threshold).sum()
    print(f"There are now {num_small_after} small-cap and {num_large_after} large-cap stocks")

    return portfolio_df

In [69]:
def compute_sector_weights(tickers, weights, all_tickers_df):
    """
    Calculate how much of the portfolio, by weight, belongs to each sector.

    :param tickers: list of ticker names 
    :param weights: list of weights from the optimizer 
    :param all_tickers_df: dataframe that has info regarding market cap, sector, currency, etc, pre-sorted
    :return: the sector weight total
    """

    weight_df = pd.DataFrame({
        "Ticker": tickers,
        "Weight": weights
    })

    weight_df = weight_df.merge(all_tickers_df[["Ticker", "Sector"]], on="Ticker", how="left")
    sector_weights = (weight_df.groupby("Sector"))["Weight"].sum()

    return sector_weights

def get_overweight_sectors(tickers, weights, all_tickers_df):
    """
    Finds any sector that exceeds the weight limit.

    :param tickers: list of ticker names 
    :param weights: list of weights from the optimizer 
    :param all_tickers_df: dataframe that has info regarding market cap, sector, currency, etc, pre-sorted
    :return: dictionary of overweight sectors and their weights, or an empty dictionary if all within limit
    """
    sector_weights = compute_sector_weights(tickers, weights, all_tickers_df)

    overweight = sector_weights[sector_weights > max_sector_weight]

    return overweight.to_dict()

def find_replacement_ticker(current_tickers, overweight_sectors, used_tickers, all_tickers_df):
    """
    Finds a replacement ticker

    :param current_tickers: current portfolio tickers
    :param overweight_sectors: overweight sectors that should be avoided in our replacement
    :param used_tickers: set of tickers that have been tried already
    :param all_tickers_df: Full ticker dataframe to find replacements, has info regarding market cap, sector, currency, etc, pre-sorted
    :return: Ticker symbol, or none if theres no valid replacements
    """

    available_tickers = all_tickers_df[(~all_tickers_df["Ticker"].isin(used_tickers)) & 
                                       (~all_tickers_df["Sector"].isin(overweight_sectors)) &
                                       (~all_tickers_df["Ticker"].isin(current_tickers))]

    if len(available_tickers) == 0:
        return None
    
    return available_tickers.iloc[0]['Ticker']

def optimize_with_sector_limits(initial_tickers, all_tickers_df):
    """
    Optimize portfolio while respecting sector weight constraints and other constraints.

    Algorithm:
    1. Optimize weights for current portfolio
    2. Check sector constraints of the portfolio
    3. If violated, replace smallest-weight stock from overweight sector (make sure to protect small/large cap stocks)
    4. Repeat until constraints satisfied or max iterations reached

    :param initial_tickers: the initial tickers make up the first iteration of the current portfolio
    :param all_tickers_df: dataframe that has info regarding market cap, sector, currency, etc, pre-sorted
    :return: the list of tickers of the succesful portfolio, the variance of said portfolio, and its associated weights
    """

    current_tickers = initial_tickers
    used_tickers = initial_tickers

    for iteration in range(max_iterations):
        cov_matrix = get_calculations(current_tickers, returns_start, returns_end)['Covariance'].to_numpy()
        num_assets = len(current_tickers)
        min_weight = (100 / (2 * num_assets)) / 100
        
        # Conduct optimization and then create dataframe of the tickers with the weights, sector, and market caps aligned
        variance, weights = bounded_optimization(cov_matrix, min_weight, max_weight)
        portfolio_df = pd.DataFrame({
            'Ticker': current_tickers,
            'Weights': weights
        })
        portfolio_df = portfolio_df.merge(all_tickers_df[["Ticker", "Sector", "MarketCap"]], on="Ticker")

        # Begin checking for sector constraints
        overweight_info = get_overweight_sectors(current_tickers, weights, all_tickers_df)

        # Optimization succesful case, all constraints have been satisfied. 
        if len(overweight_info) == 0:
            print(f"All constraints satisfied on iteration {iteration+1}. Final variance: {variance:.6f}")
            sector_weights = compute_sector_weights(current_tickers, weights, all_tickers_df)

            print("Final sector distribution:")
            sector_df = pd.DataFrame({
                'Sector': sector_weights.index,
                'Weight': sector_weights.values
            }).sort_values("Weight", ascending=False)

            display(sector_df)
            
            return current_tickers, variance, weights

        # Constraints not satisfied yet, begin fixing them.

        # Find worst sector and fix it.
        worst_sector = max(overweight_info, overweight_info.get)

        # Set up protected large/small caps. 
        current_portfolio = all_tickers_df[all_tickers_df['Ticker'].isin(current_tickers)].copy()
        num_small = (current_portfolio['MarketCap'] < small_cap_threshold).sum()
        num_large = (current_portfolio['MarketCap'] > large_cap_threshold).sum()

        protected_tickers = []

        if num_small == 1:
            protected_tickers.append(current_portfolio[current_portfolio["MarketCap"] < small_cap_threshold]["Ticker"].iloc[0])
        
        if num_large == 1:
            protected_tickers.append(current_portfolio[current_portfolio["MarketCap"] > large_cap_threshold]["Ticker"].iloc[0])
        
        # Find stocks in worst overweight sector that can be replaced
        replaceable_df = portfolio_df[(portfolio_df["Sector"] == worst_sector) & (~portfolio_df["Ticker"].isin(protected_tickers))]

        # Find least important ticker and its corresponding information
        min_weight_row = replaceable_df.loc[replaceable_df['Weight'].idxmin()]
        removed_ticker = min_weight_row["Ticker"]
        removed_idx = current_tickers.index(removed_ticker)

        # Begin replacement algorithm
        all_overweight_sectors = list(overweight_info)
        replacement = find_replacement_ticker(current_tickers, all_overweight_sectors, used_tickers, all_tickers_df)

        if replacement is None:
            print("No replacement found outside overweight sectors")
            return None, None, None
        
        print(f"Iteration {iteration + 1}: Replacing {removed_ticker} with {replacement}")
        current_portfolio[removed_idx] = replacement
        used_tickers.add(replacement)

        # Continue loop
    
    print(f"{max_iterations} iterations reached. Terminated to prevent memory usage.")
    return None, None, None

In [70]:
# Creates a portfolio that follows the small/large cap constraints
def create_portfolio(size, all_tickers_df):
    """ 
    create_portfolio creates a portfolio of a given size that satisfies market cap diversity

    :param size: the size of the portfolio
    :param all_tickers_df: dataframe pre-sorted through first-stage optimization that has info regarding market cap, sector, currency, etc
    :return: a list of tickers that are in the portfolio, or none if it cannot create a portfolio that satisfies all requirements
    """

    if size > len(all_tickers_df):
        print(f"Cannot create portfolio of size {size} with only {len(all_tickers_df)} tickers")
        return None
    
    initial_portfolio_df = all_tickers_df.head(size).copy()

    initial_portfolio_df = ensure_cap_diversity(initial_portfolio_df, all_tickers_df)
    
    return initial_portfolio_df['Ticker'].tolist()

def find_optimized_portfolio(all_tickers_df):
    """
    Loops through all possible portfolio sizes and returns the optimized one. 

    :param all_tickers_df: dataframe pre-sorted through first-stage optimization that has info regarding market cap, sector, currency, etc
    :return: a list of the most optimal variance, its tickers, and the weightings associated
    """
    all_ports = []
    all_variance = []
    all_weights = []
    count = 0

    # Go through all possible portfolio sizes and then find the one that results in minimum variance 
    for i in range(min_port_size, max_port_size + 1):
        base_port_lst = create_portfolio(i, all_tickers_df)

        if base_port_lst is None:
            print(f"Failed to build portfolio of size {i}, skipping.") # portfolio requirements failed
            count += 1
            continue

        final_port, final_var, final_w = optimize_with_sector_limits(base_port_lst, all_tickers_df)
        if final_port is None:
            print(f"Sector constraint failed for portfolio size {count+10}, skipping.") # portfolio requirements failed
            count += 1
            continue

        all_ports.append(final_port)
        all_variance.append(final_var)
        all_weights.append(final_w)
        count += 1
    
    if not all_variance:
        print("No valid portfolios were generated. Please check ticker csv.") 
    else:
        smallest_var = min(all_variance)
        index = all_variance.index(smallest_var)
        target = [smallest_var, all_ports[index], all_weights[index]]
        return target

## Main Execution

In [71]:
# Execute filtering after reading in csv. 
valid_tickers_df = format_tickers(csv_file_path) 

covariance_matrix = ((get_calculations(valid_tickers_df["Ticker"].tolist(), returns_start, returns_end))['Covariance']).to_numpy()
unbound_var, unbound_weights = unbounded_optimization(covariance_matrix)

valid_tickers_df["weight"] = unbound_weights
all_tickers_df = valid_tickers_df.copy().sort_values('weight', ascending=False).reset_index(drop=True)

all_tickers_df

Now loading tickers from Tickers.csv.
39 unique tickers have been found in the csv. Now downloading tickers.



6 Failed downloads:
['SQ', 'HDFC.NS', 'INVALIDTIC', 'ASDFA.TO', 'CELG']: YFTzMissingError('possibly delisted; no timezone found')
['XZO']: YFPricesMissingError('possibly delisted; no price data found  (1d 2024-10-01 -> 2025-10-01) (Yahoo error = "Data doesn\'t exist for startDate = 1727755200, endDate = 1759291200")')


Fetching info for 32 tickers.
30 tickers with complete information found.
Downloading price data for 30 tickers.
Completed download of 53 tickers.


Unnamed: 0,Ticker,Sector,Currency,MarketCap,weight
0,FTG.TO,Industrials,CAD,287480128,0.1070169
1,ABBV,Healthcare,USD,396442042368,0.08379742
2,PEP,Consumer Defensive,USD,205471121408,0.08284512
3,SHOP.TO,Technology,CAD,290337030144,0.07174771
4,RY.TO,Financial Services,CAD,320141066240,0.06603676
5,COST,Consumer Defensive,USD,382946738176,0.06536393
6,BK,Financial Services,USD,80087146496,0.06515467
7,AUST,Basic Materials,USD,24784332,0.0647451
8,AXP,Financial Services,USD,261380325376,0.0576485
9,GM,Consumer Cyclical,USD,76651790336,0.0492432


In [72]:
target_values = find_optimized_portfolio(all_tickers_df)

Portfolio passes cap diversity.
Downloading price data for 15 tickers.
Completed download of 53 tickers.
All constraints satisfied on iteration 1. Final variance: 0.000289
Final sector distribution:


Unnamed: 0,Sector,Weight
5,Financial Services,0.2747
3,Consumer Defensive,0.240241
2,Consumer Cyclical,0.158841
7,Industrials,0.101406
4,Energy,0.077625
0,Basic Materials,0.046588
8,Technology,0.033932
6,Healthcare,0.033333
1,Communication Services,0.033333


Portfolio passes cap diversity.
Downloading price data for 16 tickers.
Completed download of 53 tickers.
All constraints satisfied on iteration 1. Final variance: 0.000290
Final sector distribution:


Unnamed: 0,Sector,Weight
5,Financial Services,0.284515
2,Consumer Cyclical,0.207554
3,Consumer Defensive,0.135645
6,Healthcare,0.099952
4,Energy,0.072422
1,Communication Services,0.061658
0,Basic Materials,0.044504
7,Industrials,0.03125
8,Real Estate,0.03125
9,Technology,0.03125


Portfolio passes cap diversity.
Downloading price data for 17 tickers.
Completed download of 53 tickers.
All constraints satisfied on iteration 1. Final variance: 0.000309
Final sector distribution:


Unnamed: 0,Sector,Weight
9,Technology,0.201893
5,Financial Services,0.186395
3,Consumer Defensive,0.157878
2,Consumer Cyclical,0.109434
7,Industrials,0.0962
4,Energy,0.072704
1,Communication Services,0.063188
0,Basic Materials,0.053484
6,Healthcare,0.029412
8,Real Estate,0.029412


Portfolio passes cap diversity.
Downloading price data for 18 tickers.
Completed download of 53 tickers.
All constraints satisfied on iteration 1. Final variance: 0.000288
Final sector distribution:


Unnamed: 0,Sector,Weight
5,Financial Services,0.246364
3,Consumer Defensive,0.154082
9,Technology,0.115783
6,Healthcare,0.113962
2,Consumer Cyclical,0.101015
7,Industrials,0.090852
4,Energy,0.055556
1,Communication Services,0.054589
8,Real Estate,0.04002
0,Basic Materials,0.027778


Portfolio passes cap diversity.
Downloading price data for 19 tickers.
Completed download of 53 tickers.
All constraints satisfied on iteration 1. Final variance: 0.000278
Final sector distribution:


Unnamed: 0,Sector,Weight
5,Financial Services,0.205532
1,Communication Services,0.193287
3,Consumer Defensive,0.173278
2,Consumer Cyclical,0.113624
9,Technology,0.092215
6,Healthcare,0.060411
0,Basic Materials,0.056391
4,Energy,0.052632
7,Industrials,0.026316
8,Real Estate,0.026316


Portfolio passes cap diversity.
Downloading price data for 20 tickers.
Completed download of 53 tickers.
All constraints satisfied on iteration 1. Final variance: 0.000271
Final sector distribution:


Unnamed: 0,Sector,Weight
5,Financial Services,0.276913
2,Consumer Cyclical,0.137175
1,Communication Services,0.118851
6,Healthcare,0.100737
3,Consumer Defensive,0.099458
9,Technology,0.087293
0,Basic Materials,0.07642
4,Energy,0.053153
8,Real Estate,0.025
7,Industrials,0.025


Portfolio passes cap diversity.
Downloading price data for 21 tickers.
Completed download of 53 tickers.
All constraints satisfied on iteration 1. Final variance: 0.000296
Final sector distribution:


Unnamed: 0,Sector,Weight
9,Technology,0.22426
5,Financial Services,0.184822
2,Consumer Cyclical,0.101305
6,Healthcare,0.09671
1,Communication Services,0.083544
8,Real Estate,0.082749
7,Industrials,0.077848
3,Consumer Defensive,0.056856
4,Energy,0.047619
0,Basic Materials,0.044287


Portfolio passes cap diversity.
Downloading price data for 22 tickers.
Completed download of 53 tickers.
All constraints satisfied on iteration 1. Final variance: 0.000304
Final sector distribution:


Unnamed: 0,Sector,Weight
9,Technology,0.275378
2,Consumer Cyclical,0.2283
5,Financial Services,0.112412
7,Industrials,0.074738
6,Healthcare,0.074668
0,Basic Materials,0.060659
3,Consumer Defensive,0.056613
1,Communication Services,0.049005
4,Energy,0.045501
8,Real Estate,0.022727


Portfolio passes cap diversity.
Downloading price data for 23 tickers.
Completed download of 53 tickers.
All constraints satisfied on iteration 1. Final variance: 0.000293
Final sector distribution:


Unnamed: 0,Sector,Weight
5,Financial Services,0.211551
9,Technology,0.20837
2,Consumer Cyclical,0.168782
6,Healthcare,0.102095
4,Energy,0.08258
7,Industrials,0.071188
3,Consumer Defensive,0.05059
1,Communication Services,0.043478
0,Basic Materials,0.039626
8,Real Estate,0.021739


Portfolio passes cap diversity.
Downloading price data for 24 tickers.
Completed download of 53 tickers.
All constraints satisfied on iteration 1. Final variance: 0.000286
Final sector distribution:


Unnamed: 0,Sector,Weight
9,Technology,0.214061
5,Financial Services,0.15647
3,Consumer Defensive,0.131914
4,Energy,0.128827
2,Consumer Cyclical,0.121637
6,Healthcare,0.066834
7,Industrials,0.066626
0,Basic Materials,0.051131
1,Communication Services,0.041667
8,Real Estate,0.020833


Portfolio passes cap diversity.
Downloading price data for 25 tickers.
Completed download of 53 tickers.
All constraints satisfied on iteration 1. Final variance: 0.000288
Final sector distribution:


Unnamed: 0,Sector,Weight
9,Technology,0.236198
5,Financial Services,0.189733
2,Consumer Cyclical,0.102729
0,Basic Materials,0.097717
4,Energy,0.080275
6,Healthcare,0.07624
3,Consumer Defensive,0.071236
1,Communication Services,0.064175
7,Industrials,0.061697
8,Real Estate,0.02


In [73]:
print("================================================================================================================================================")
print(f"The smallest variance found is {target_values[0]} which is determined from the following portfolio:'\n{target_values[1]},\nat the following weights:\n{target_values[2]}.")
print("================================================================================================================================================")

The smallest variance found is 0.000271329936518362 which is determined from the following portfolio:'
['FTG.TO', 'ABBV', 'PEP', 'SHOP.TO', 'RY.TO', 'COST', 'BK', 'AUST', 'AXP', 'GM', 'T.TO', 'TD', 'AMZN', 'SLB', 'SU.TO', 'SPG', 'DUOL', 'EXE.TO', 'GOOG', 'AW.TO'],
at the following weights:
[0.025      0.07573698 0.03764893 0.025      0.0990097  0.0618093
 0.05181846 0.07641962 0.05251138 0.03075053 0.025      0.07357392
 0.025      0.02815331 0.025      0.025      0.06229318 0.025
 0.09385058 0.08142411].


In [74]:
def get_close_prices_and_rate(tickers, date=None):
    """
    get_close_prices_and_rate finds the closing price and exchange rate on a day. Defaults to last trading day
    
    :param tickers: list of tickers
    :param date: date to get prices for (defaults to last trading day)
    :return: a Series that contains the target days close price and also the exchange rate
    """

    if date is None:
        date = datetime.today()
    # Step back one day at a time until it's a weekday, aka Monday or Friday
    while date.weekday() >= 5: # Saturday = 5, Sunday = 6
        date -= timedelta(days=1)
    
    end_date = date + timedelta(days=1)
    date_str = date.strftime('%Y-%m-%d')
    end_str = end_date.strftime('%Y-%m-%d')

    try:
        price_data = yf.download(tickers, start=date_str, end=end_str)["Close"] # Download data for tickers
        close_prices = price_data.iloc[0]

        exchange_rate = float(yf.download("CAD=X", start=date_str, end=end_str)["Close"].iloc[0]) # Download data for exchange rate

        return close_prices, exchange_rate
    except:
        print("There was an error getting prices and exchange rate. Please check ticker list")
        raise

def purchase_flat_fee(df, budget, exchange_rate):
    """
    purchase_flat_fee transforms a Dataframe to include the amount of shares bought with a flat fee, and the value of that

    :param df: The Dataframe that will be transformed
    :param budget: An Integer representing the budget
    :param exchange_rate: A series of the USD-CAD exchange rate
    :return: Transformed Dataframe
    """

    df["Shares Bought Flat Fee"] = (df["Weight"] * (budget - (2.5*exchange_rate))) / df["Price"]
    df["Flat Fee Worth"] = df["Shares Bought Flat Fee"] * df["Price"]

    return df 

def purchase_variable_fee(df, budget, exchange_rate):
    """
    purchase_variable_fee transforms a Dataframe to include the amount of shares bought with the share-dependent rate

    :param df: The Dataframe that will be transformed
    :param budget: An Integer representing the budget
    :param exchange_rate: A series of the USD-CAD exchange rate
    :return: Transformed Dataframe
    """
    df["Shares w/o Fee"] = (df["Weight"] * budget) / df["Price"]
    total_shares = df["Shares w/o Fee"].sum()
    
    # Calulate the variable fee
    variable_fee_usd = total_shares * 0.001
    variable_fee_cad = variable_fee_usd * exchange_rate

    adjusted_budget = budget - variable_fee_cad # adjust the budget to how much we would be able to after the trading fee
    df["Shares Bought Variable Fee"] = (df["Weight"] * adjusted_budget) / df["Price"] # buy stocks using the adjusted budget
    df["Variable Fee Worth"] = df["Shares Bought Variable Fee"] * df["Price"]

    return df

def ideal_shares(df):
    """ 
    ideal_shares determines which fee strategy, flat or variable, is the most optimal and transforms Dataframe accordingly

    :param df: The Dataframe that will be transformed
    :return: Transformed Dataframe
    """
    sum_flat_fee = df["Flat Fee Worth"].sum()
    sum_variable_fee = df["Variable Fee Worth"].sum()

    if (sum_flat_fee < sum_variable_fee):
        df.drop(["Shares Bought Flat Fee", "Flat Fee Worth", "Shares w/o Fee"], axis=1, inplace=True)
        df.rename(columns={"Shares Bought Variable Fee":"Shares", "Variable Fee Worth":"Value"}, inplace=True)
    else:
        df.drop(["Shares Bought Variable Fee", "Variable Fee Worth", "Shares w/o Fee"], axis=1, inplace=True)
        df.rename(columns={"Shares Bought Flat Fee":"Shares", "Flat Fee Worth":"Value"}, inplace=True)
    
    return df 

def add_currency(df_small, df_large):
    """ 
    add_currency adds the currency type onto the Dataframe

    :param df_small: The Dataframe that holds the shares bought
    :param_df_large: The Dataframe the holds the Currency's
    :return: Transformed Dataframe
    """

    df_with_currency = df_small.merge(df_large[["Ticker", "Currency"]], on="Ticker", how="left")
    return df_with_currency

def convert_closing(df, exchange_rate):
    """
    convert_closing converts the price of any stock thats in USD to CAD

    :param df: Dataframe to be transformed
    :param exchange_rate: The USD-CAD rate
    :return: Dataframe with converted prices
    """

    df.loc[df["Currency"] == "USD", "Price"] = df["Price"] * exchange_rate
    return df

In [75]:
purchase_df = pd.DataFrame({
    "Ticker": target_values[1],
    "Weight": target_values[2]
})
closing, usd_cad_rate = get_close_prices_and_rate(target_values[1])

# Merge the closing prices and the currency information onto the dataframe
purchase_df = purchase_df.merge(closing.rename("Price"), left_on="Ticker", right_index=True, how="left")
purchase_df = purchase_df.merge(all_tickers_df[["Ticker", "Currency"]], on="Ticker", how="left")

# Convert USD currencies into CAD
usd_mask = purchase_df["Currency"] == "USD"
purchase_df.loc[usd_mask, "Price"] = purchase_df.loc[usd_mask, "Price"] * usd_cad_rate

# Calculate fee options
purchase_df = purchase_flat_fee(purchase_df, total_budget, usd_cad_rate)
purchase_df = purchase_variable_fee(purchase_df, total_budget, usd_cad_rate)

Portfolio_Final = ideal_shares(purchase_df)
Portfolio_Final = Portfolio_Final[["Ticker", "Price", "Currency", "Shares", "Value", "Weight"]]

sum_weight = Portfolio_Final["Weight"].sum()
sum_shares = Portfolio_Final["Shares"].sum()
sum_value = Portfolio_Final["Value"].sum()

print("===========================================================================")
print("This is the final portfolio with basic info for each stock: ")
print("===========================================================================")
print("")
Portfolio_Final

print("=======================================================================================================================================================")
print(f"The sum of the weights is {sum_weight}, the total value of the portfolio minus the fees is ${sum_value}, and the sum of the shares is {sum_shares}.")
print("=======================================================================================================================================================")

  price_data = yf.download(tickers, start=date_str, end=end_str)["Close"] # Download data for tickers
[*********************100%***********************]  20 of 20 completed
  exchange_rate = float(yf.download("CAD=X", start=date_str, end=end_str)["Close"].iloc[0]) # Download data for exchange rate
[*********************100%***********************]  1 of 1 completed
  exchange_rate = float(yf.download("CAD=X", start=date_str, end=end_str)["Close"].iloc[0]) # Download data for exchange rate


This is the final portfolio with basic info for each stock: 

The sum of the weights is 1.0, the total value of the portfolio minus the fees is $999996.5538749099, and the sum of the shares is 41496.48989429401.
