In [12]:
from IPython.display import display, display_html, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import threading
from datetime import datetime

In [13]:
# Global Variables
start_date = '2023-01-01'
end_date = '2023-11-25'

## Overview

Our team has chosen to target the safest portfolio. This means that our algorithm will choose the optimal weighting for the portfolio so that the final portfolio value is as close as possible to the initial investment, which in this case is $750,000. 

There are three main concepts our algorithm utilizes to ensure the least risk: beta, volatility, and gradient descent. We will filter out any stocks with a large beta, calculate the volatility of each individual stock, and use gradient descent to arrive upon the optimal portfolio weighting for each of the stocks. 

### Reading and Cleaning CSV Data

In [14]:
# The code below filters out invalid stocks based on the criteria of the assignment and uses multithreading for efficiency

# Initializing a dataframe for 'raw' data extracted from the .csv file
tickers_raw = pd.read_csv("Tickers.csv", header=None)[0].tolist() 

# Empty data structures to store ticker data in
tickers = []
tickers_hist = {}

# Dictionary of all the tickers' closing prices between January 01, 2023 and October 31, 2023
stock_prices = {}

# Exchange rate to convert stock prices from USD to CAD. This is so calculations on the historical data can be consistent with
# the initial investment of $750,000 CAD 
exchange_ticker = yf.Ticker('USDCAD=x')
exchange_hist = exchange_ticker.history(start=start_date, end=end_date)
exchange_hist.index = pd.DatetimeIndex(exchange_hist.index).tz_localize(None)

# Function which consumes a ticker and determines the validation based on prerequisites
def validate_ticker(ticker):

    # Extracting ticker info from yFinance
    ticker_info = yf.Ticker(ticker).history_metadata

    # Trying every stock and excepting those that throw an error
    try:
        # If the stock is valid, we check for each prerequisite:
        # Checking for USD/CAD currency and ensuring it's on the US/Canadian market
        # Check if the ticker is a stock 
        # Convert the stock price of the data from USD to CAD 
        if (ticker_info['currency'] == 'USD' or ticker_info['currency'] == 'CAD') and ticker_info['instrumentType'] == 'EQUITY':
            
            ticker_hist = yf.Ticker(ticker).history(start=start_date, end=end_date).dropna()
            ticker_hist.index = pd.DatetimeIndex(ticker_hist.index).tz_localize(None)

            # Checking monthly volume
            ticker_monthly_trading_days = ticker_hist['Volume'].groupby(pd.Grouper(freq='MS')).count()
            ticker_monthly_volume = ticker_hist['Volume'].groupby(pd.Grouper(freq='MS')).sum()

            # Checking if the month has at least 18 trading days
            for month in ticker_monthly_trading_days.index:
                if ticker_monthly_trading_days.loc[month] < 18:
                    ticker_monthly_volume.drop(month, inplace=True)

             #Checking if the average monthly volume is greater than or equal to 150,000 USD
            if ticker_monthly_volume.mean() >= 150000:
                tickers.append(ticker)
                tickers_hist[ticker] = ticker_hist

                tickers_closing = yf.Ticker(ticker).history(start=start_date, end=end_date)['Close']
                tickers_closing.index = pd.DatetimeIndex(tickers_closing.index).tz_localize(None)
                
                #Adjusted for exchange rates
                if ticker_info['currency'] == 'USD':
                    stock_prices[ticker] = tickers_closing.mul(exchange_hist['Close']).dropna()
                elif ticker_info['currency'] == 'CAD':
                    stock_prices[ticker] = tickers_closing
            else:
                print(f'{ticker} Ticker does not meet average monthly volume requirements')
        else:
            print(f'{ticker} Ticker does not reference stock denominated in USD or is an ETF or index stock')
    except:
        print(f'Error: {ticker} Ticker does not reference a valid stock')

# Empty data structure for threading
threads = []

# Checking validity of each ticker in list of tickers given from threading
for ticker in tickers_raw:
    thread = threading.Thread(target=validate_ticker, args=[ticker])
    thread.start()
    threads.append(thread)

# Using threading
for thread in threads:
    thread.join()

# Creates a DataFrame that contains all the stocks and their closing prices between January and October 2023 

stock_prices = pd.DataFrame(stock_prices)
stock_prices.index = pd.to_datetime(stock_prices.index).date
stock_prices = stock_prices.sort_index(axis=1)
stock_prices = stock_prices.dropna() 
stock_prices

  if not _np.isnan(quotes["High"][n - 1]):
  quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
  if not _np.isnan(quotes["Low"][n - 1]):
  quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
  quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
  quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
  quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
  if not _np.isnan(quotes["High"][n - 1]):
  quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
  if not _np.isnan(quotes["Low"][n - 1]):
  quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
  quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
  quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
  quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
  if not _np.isnan(quotes["High"][n - 1]):
  quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
  if not _np.

Error: oiewrpoiwerpoi Ticker does not reference a valid stock


WEROJWOIEW: No data found, symbol may be delisted


Error: werojwoiew Ticker does not reference a valid stock


OWEIJR32R: No data found, symbol may be delisted


Error: oweijr32r Ticker does not reference a valid stock


GGEWOEOE: No data found, symbol may be delisted


Error: GGEWOEOE Ticker does not reference a valid stock


Unnamed: 0,AAPL,ABBV,ABT,ACN,AIG,AMZN,AXP,BA,BAC,BIIB,...,QCOM,RY.TO,SHOP.TO,T.TO,TD.TO,TXN,UNH,UNP,UPS,USB
2023-01-03,168.639789,211.621951,145.665501,360.714599,83.711187,116.363342,196.572714,264.929310,44.064212,369.659037,...,141.564543,122.739288,48.790001,25.214359,84.689217,214.563569,692.793878,276.074065,228.572688,57.969780
2023-01-04,171.825505,215.140097,149.087171,362.538234,85.669409,116.421282,202.849765,278.459360,45.273718,370.308284,...,148.532840,123.832169,50.610001,25.520916,85.790451,224.286737,679.626403,280.644095,232.919618,60.282251
2023-01-05,167.653608,211.907272,146.484505,349.084957,84.022266,112.088157,195.258906,276.431078,44.556414,366.241833,...,143.682400,123.467857,48.830002,25.492176,84.384216,218.257961,650.916300,268.617205,225.459288,58.984098
2023-01-06,174.786447,217.070925,149.331001,359.337033,85.871868,116.723626,201.356784,288.825881,45.250680,378.660222,...,152.324055,125.040092,49.560001,25.884954,84.345154,230.288869,654.580279,281.984521,233.373966,60.142445
2023-01-09,173.887177,208.759964,147.720630,362.036667,84.185868,117.369912,199.810619,280.217986,44.157003,369.091828,...,149.968037,125.970016,49.810001,25.827473,84.081490,230.151077,648.640063,278.666550,234.768279,59.975385
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-17,260.863597,190.191554,136.902164,450.835091,88.673548,199.652985,223.554142,286.098689,40.902340,313.547891,...,176.923240,120.489998,93.870003,24.219999,84.779999,212.634971,735.003941,301.459804,203.503577,51.762905
2023-11-20,262.650244,189.747480,138.836272,453.961689,88.295482,200.475748,224.785817,298.676350,40.844720,315.687901,...,176.551924,120.529999,95.449997,24.200001,85.099998,213.385311,731.607773,303.327090,205.195087,51.624597
2023-11-21,261.636253,190.394442,139.862314,453.252159,89.165483,197.489799,222.728419,299.555949,40.383366,315.064187,...,173.221861,119.779999,95.540001,24.100000,84.220001,210.568865,738.021048,304.675033,205.545860,50.792894
2023-11-22,262.113832,189.717752,140.709268,456.421427,89.015400,201.007383,224.408673,301.298701,40.274616,316.835630,...,173.583787,118.970001,97.129997,24.049999,83.360001,210.899486,742.472706,306.203661,206.542578,50.460786


### Filtering Stocks Based on Beta

$$
\beta_i = \frac {\mathrm{Covariance} (r_i,r_m)}{\mathrm{Variance} (r_m)}
$$

The formula above is for $ \beta_i $, the beta of a particular stock, where $r_i$ is the return of the stock and $r_m$ is the return of the market index. In this case, the market index will use the S&P 500 since it includes a wide breadth of large-cap companies in America. This is also ideal since our algorithm also finds the optimal number of stocks to diversify over and needs a market index that is representative of a wide range of stocks.

Since our algorithm is trying to design a portfolio with the least volatility, our portfolio will consist of stocks with the lowest beta. 

In [15]:
# The code below calculates the beta for each of the stocks in the DataFrame of valid stocks

def get_beta(ticker_list):
    market_index = yf.Ticker('^GSPC').history(start=start_date, end=end_date)['Close'].dropna()
    market_index.index = pd.DatetimeIndex(market_index.index).tz_localize(None)
    market_return = market_index.pct_change()
    market_variance = market_return.var()
    beta_list = []
    beta_df = pd.DataFrame()
    for i in range(len(ticker_list.columns)):
        stock_return = ticker_list.iloc[:, i].pct_change()
        covariance = stock_return.cov(market_return)
        beta_list.append(covariance/market_variance)
    beta_df['Ticker'] = ticker_list.columns
    beta_df['Beta'] = beta_list
    return beta_df

# Calls the function get_beta() and sorts the betas in descending order
beta_df = get_beta(stock_prices)
beta_df.sort_values(by=['Beta'], ascending=False, ignore_index=True, inplace=True)
beta_df

# Filters the resulting DataFrame to discard any stocks with an unreasonably high beta. Since the beta list has been sorted from
# greatest to least, the stocks with the greatest beta will always be filtered out first. There will always be at least 10 
# stocks in the resulting list of stocks.
#minimum = len(beta_df.index)
#for i in range(len(beta_df.index)):
#    if beta_df.iloc[i].iloc[1] > 1.5 and minimum > 10:
#        stock_prices = stock_prices.drop([beta_df.iloc[i].iloc[0]], axis=1)
#        minimum -= 1

stock_prices

Unnamed: 0,AAPL,ABBV,ABT,ACN,AIG,AMZN,AXP,BA,BAC,BIIB,...,QCOM,RY.TO,SHOP.TO,T.TO,TD.TO,TXN,UNH,UNP,UPS,USB
2023-01-03,168.639789,211.621951,145.665501,360.714599,83.711187,116.363342,196.572714,264.929310,44.064212,369.659037,...,141.564543,122.739288,48.790001,25.214359,84.689217,214.563569,692.793878,276.074065,228.572688,57.969780
2023-01-04,171.825505,215.140097,149.087171,362.538234,85.669409,116.421282,202.849765,278.459360,45.273718,370.308284,...,148.532840,123.832169,50.610001,25.520916,85.790451,224.286737,679.626403,280.644095,232.919618,60.282251
2023-01-05,167.653608,211.907272,146.484505,349.084957,84.022266,112.088157,195.258906,276.431078,44.556414,366.241833,...,143.682400,123.467857,48.830002,25.492176,84.384216,218.257961,650.916300,268.617205,225.459288,58.984098
2023-01-06,174.786447,217.070925,149.331001,359.337033,85.871868,116.723626,201.356784,288.825881,45.250680,378.660222,...,152.324055,125.040092,49.560001,25.884954,84.345154,230.288869,654.580279,281.984521,233.373966,60.142445
2023-01-09,173.887177,208.759964,147.720630,362.036667,84.185868,117.369912,199.810619,280.217986,44.157003,369.091828,...,149.968037,125.970016,49.810001,25.827473,84.081490,230.151077,648.640063,278.666550,234.768279,59.975385
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-17,260.863597,190.191554,136.902164,450.835091,88.673548,199.652985,223.554142,286.098689,40.902340,313.547891,...,176.923240,120.489998,93.870003,24.219999,84.779999,212.634971,735.003941,301.459804,203.503577,51.762905
2023-11-20,262.650244,189.747480,138.836272,453.961689,88.295482,200.475748,224.785817,298.676350,40.844720,315.687901,...,176.551924,120.529999,95.449997,24.200001,85.099998,213.385311,731.607773,303.327090,205.195087,51.624597
2023-11-21,261.636253,190.394442,139.862314,453.252159,89.165483,197.489799,222.728419,299.555949,40.383366,315.064187,...,173.221861,119.779999,95.540001,24.100000,84.220001,210.568865,738.021048,304.675033,205.545860,50.792894
2023-11-22,262.113832,189.717752,140.709268,456.421427,89.015400,201.007383,224.408673,301.298701,40.274616,316.835630,...,173.583787,118.970001,97.129997,24.049999,83.360001,210.899486,742.472706,306.203661,206.542578,50.460786


### Volatility/Gradient Descent Algorithm to Determine Optimal Weightings

**How does our algorithm work?**

Our algorithm works as follows. First, we construct a custom definition of volatility derived by modifying the standard deviation formula as follows:

$\sqrt{\sum_{t=1}^n \frac{(\bar{x}-x_i)^2}{n}}$

In the safe strategy, an ideal portfolio is one with zero growth (ignoring trading fees, which are negligible). Therefore, we replace average change $\bar{x}$ with 0:

$\sqrt{\sum_{t=1}^n \frac{(0-x_i)^2}{n}}$

Next, we recognize that we are not concerned with volatility over a daily range, but instead a weekly range. Therefore, we consider the squared distance from zero not of the daily change but the change over 5 days (which we use as a reasonable estimate of the number of trading days occurring over a week):

$\text{Volatility}=\sqrt{\sum_{i=1}^n \frac{\bigl(\prod_{j=0}^5 (1+x_{i+j})-1 \bigr)^2}{n}}$

The principal goal of our algorithm is to minimize this equation by changing the weightings of each stock.

**How we determine optimal stock weightings?**

To minimize volatility without having to use brute force, we repeatedly iterate through the list of stocks, changing the weightings of each one at a time. What we do next depends on the feedback received: If an increase in the weighting of stock results in an increase in the volatility of the portfolio, we know to reverse such a decision, and instead decrease the weightings. Through this process of constant trial and feedback (called gradient descent), we can gradually find the optimal weighting for each stock

**Cleaning the data and abiding by assignment rules:**

If we were simply to let the algorithm in its current description run without change, we could end up with weightings beyond the upper limit of 20%, below lower limits, or perhaps even negative. Therefore, every cycle we go through the results, and make a few corrections to it. First of all, if our algorithm produces a stock with a negative weighting, it is automatically corrected to zero. If a stock exceeds the maximum allowed weighting, it is also reduced to that limit. Lastly, if the weighting of a stock is positive but below the minimum requirement, we compare the volatility of two hypothetical portfolios: one where the weightings are abruptly set to the minimum requirement, and another where it is set to zero. Which ever one results in a lower volatility, we keep.

**Excluding stocks from our list:**

If we are given a list of stocks greater than the maximum 22, we need a process by which to eliminate some. We do so by dropping the stock with the least weighting every few iterations of our algorithm. The reason we do not drop several at a time is to allow the portfolio to adjust to the changes made.

**Finalizing the weights:**

Once the algorithm has been iterated a sufficient number of times, we are ready to construct the final portfolio. First, we renorm the weightings such that they add up to 100%. Over time, when we add and subtract weights, this results in changes to the sum. While this is not an issue during the process (for we can simply keep track of the sum), for purposes of presenting the data and calculating the correct investment amounts, we go through this renorming. Second, we go through the weightings again, this time more rigorously; to make sure all rules are being abided.

In [16]:
# Daniel Chung
# The code below is our main volatility/gradient descent algorithm

def volatility(changes):
    vol = 0
    length = len(changes.index)
    for n in range(1, length - 5):
        vol += pow((1 + changes.iloc[n]) * (1 + changes.iloc[n+1]) * (1 + changes.iloc[n+2]) 
        * (1 + changes.iloc[n+3]) * (1 + changes.iloc[n+4]) - 1, 2) / length       
    #vol = pow(vol, 1/2)
    return vol

def cleanup(old_volatility_data, can_drop):
    # as a result of changing weightings, sum will not neccesarily add up to 100%. This is okay as long as relative weightings
    # are preserved -i.e, if final weightings add up to 200%, we can just divide every weight by 2 to correct for this.
    weight_sum = 0
    for n in range(0, len(old_volatility_data.columns)):
        weight_sum = weight_sum + old_volatility_data.iloc[2,n]
    # number of stocks we actually invest into
    num_stocks = 0
    for n in range(0, len(old_volatility_data.columns)):
        if old_volatility_data.iloc[2,n] != 0:
            num_stocks = num_stocks + 1

    #finds stock with least weighting
    min_stock_index = n
    min_weighting = old_volatility_data.iloc[2,0]
    for n in range(0, len(old_volatility_data.columns)):
        if old_volatility_data.iloc[2,n] != 0 and old_volatility_data.iloc[2,n] < min_weighting:
            min_stock_index = n
            min_weighting = old_volatility_data.iloc[2,n]
    #drops stocks if it is permitted, and either there are stocks with negative weightings or we do not meet cap on number
    # of stocks
    if (can_drop and (num_stocks > 22 or min_weighting <= 0) and num_stocks > 10):
        old_volatility_data.iloc[1, min_stock_index] = 0 
        old_volatility_data.iloc[2, min_stock_index] = 0

    # makes sure upper limit on weightings is not breached
    for n in range(0, len(old_volatility_data.columns)):
        if (old_volatility_data.iloc[2,n] > weight_sum * 0.2):
            weight_sum = weight_sum - (volatility_data.iloc[2,n] - weight_sum * 0.2)
            old_volatility_data.iloc[2,n] = weight_sum * 0.2
    
    # number of stocks with positive investment
    pos_stocks = 0
    for n in range(0, len(old_volatility_data.columns)):
        if old_volatility_data.iloc[2,n] > 0:
            pos_stocks = pos_stocks + 1
    
    #makes sure lower limit on weightings is not breached
    for n in range(0, len(old_volatility_data.columns)):
        if (old_volatility_data.iloc[2,n] < weight_sum/(2*num_stocks)
            and old_volatility_data.iloc[2,n] != 0):
            min_weighting = 1/(2*num_stocks) * (weight_sum - old_volatility_data.iloc[2,n]) / (1 - 1/(2*num_stocks))            
            weight_diff = old_volatility_data.iloc[2,n] - min_weighting
            portfolio[old_volatility_data.columns[n]] = (investment * min_weighting 
                                / stock_prices.iloc[0,n] * stock_prices[columns[n]])
            vol_with = volatility(portfolio.sum(axis=1).pct_change())
            portfolio[old_volatility_data.columns[n]] = portfolio[old_volatility_data.columns[n]] * 0
            vol_without = volatility(portfolio.sum(axis=1).pct_change())
            portfolio[old_volatility_data.columns[n]] = (investment * old_volatility_data.iloc[2,n] 
                                / stock_prices.iloc[0,n] * stock_prices[columns[n]])
            if pos_stocks < 12:
                weight_sum = weight_sum + min_weighting - old_volatility_data.iloc[2,n]  
                old_volatility_data.iloc[2,n] = min_weighting
            elif vol_with < vol_without:
                weight_sum = weight_sum + min_weighting - old_volatility_data.iloc[2,n]
                old_volatility_data.iloc[2,n] = min_weighting
            else:
                weight_sum = weight_sum - 0.001 - old_volatility_data.iloc[2,n]
                old_volatility_data.iloc[2,n] = -0.001
                pos_stocks = pos_stocks - 1
                                                                                                                                           
    return old_volatility_data

# Gradient descent function that minimizes volatility
# Independent variable: Volatility
# Dependent variable: Weighting of stock
def min_search(stock, volatility_1, weight_1, weight_2):
    # Stock is not invested into at all
    if weight_1 == 0 and weight_2 == 0:
        portfolio[stock] = portfolio[stock] * 0
        volatility_2 = volatility(portfolio.sum(axis=1).pct_change())
        return [volatility_2, 0, 0]
    else:
        portfolio[stock] = (investment * weight_2 / stock_prices.iloc[0][stock]) * stock_prices[stock]
        volatility_2 = volatility(portfolio.sum(axis=1).pct_change())
        # average change in volatility as a result of changing weightings - approx. for derivative
        if weight_1 != weight_2:
            # if volatility increases as result of changing weighting in certain direction, we change weighting in opposite
            # direction. If volatility decreases, we continue
            change = (volatility_2 - volatility_1)/(weight_2 - weight_1)
            if change < 0:
                new_weight = weight_2 - max(change * 50, -0.01)
            else:
                new_weight = weight_2 - min(change * 50, 0.01)
        #extreme case where algo suggest to keep weightings unchanged (implies optimal weighting is precisley found)
        else:
            new_weight = weight_2
        if new_weight < -0.001:
            new_weight = -0.001
        return [volatility_2, weight_2, new_weight]

# Creation of initial portfolio
portfolio = pd.DataFrame()
portfolio.index = stock_prices.index

# At first, the portfolio is just an equal weighting of all valid stocks
columns = stock_prices.columns
investment = 750000
for n in range(0, len(columns)):
    portfolio[columns[n]] = ((investment/len(columns)) / stock_prices.iloc[0, n]) * stock_prices[columns[n]]
portfolio
    
# Dataframe used to keep track of optimization
volatility_data = pd.DataFrame()
volatility_data.index = ['Vol1', 'Weight1', 'Weight2']
volatility_data

# Creation of initial values to get optimization started
vol1 = volatility(portfolio.sum(axis=1).pct_change())
for j in range(0, len(portfolio.columns)):
    volatility_data[portfolio.columns[j]] = [vol1, 1/len(portfolio.columns), 1/len(portfolio.columns) + 0.005]

iterations = ((len(portfolio.columns) - 10) * 3 + 30)
    
for i in range(0, iterations):
    # Updates volatility to reflect changes made during cleanup
    volatility_data.iloc[0,0] = volatility(portfolio.sum(axis=1).pct_change())
    # Goes through every stock in portfolio
    for j in range (0, len(volatility_data.columns)-1):
        # Calls gradient descent on given stock
        results = min_search(volatility_data.columns[j],
                             volatility_data.iloc[0,j],
                             volatility_data.iloc[1,j],
                             volatility_data.iloc[2,j])
        #updates data for next stock so that it has volatility data to compare with
        if j < len(volatility_data.columns)-1:
            volatility_data.iloc[0,j+1] = results[0]
        else:
            volatility_data.iloc[0,0] = results[0] 
        #updates weightings
        volatility_data.iloc[1,j] = results[1]
        volatility_data.iloc[2,j] = results[2]
        
    # If portfolio optimization has only been through a few iterations, we avoid eliminating stocks unless their weightings
    # go negative. Otherwise, we unleash the Hunger Games. Also, we only drop stocks every other iteration to allow for
    # adjustment of rest of stocks.
    if i > 11 and i % 3 == 0:
        volatility_data = cleanup(volatility_data, True)
    else:
        volatility_data = cleanup(volatility_data, False)
    print(volatility_data.iloc[0,0], portfolio.sum(axis=1).pct_change().std())

# For data presentation, we drop stocks we do not invest at all in
dropped_stocks = [stocks for stocks in volatility_data.columns if volatility_data[stocks][2] <= 0]
volatility_data.drop(dropped_stocks, axis=1, inplace=True)
volatility_data

#renorms weightings for portfolio
weight_sum = 0
for n in range(0, len(volatility_data.columns)):
    weight_sum = weight_sum + volatility_data.iloc[2,n]
    
adjustment_factor = 1/weight_sum

for n in range(0, len(volatility_data.columns)):
    volatility_data.iloc[2,n] = volatility_data.iloc[2,n] * adjustment_factor
    
#Here, we go through the list of stocks one more time to make sure none of the requirements are violated. While we
# attempt to do that continously using cleanup(), the processes in cleanup sacrifice mathematical precision in exchange for
# faster computing, and thus may result in stocks barely not meeting requirements.
min_threshold = 1/(2*len(volatility_data.columns))

def set_lower_bound(target_value, old_volatility_data, index):
    #amount by which to increase weighting
    diff = target_value - old_volatility_data.iloc[2, index]
    old_volatility_data.iloc[2, index] = target_value
    #to prevent rest of weights from being messed up, we must decrease them -if possible -to account for change in weighting
    available = 0
    for n in range(0, len(old_volatility_data.columns)):
        if old_volatility_data.iloc[2,n] > target_value:
            available += old_volatility_data.iloc[2,n] - target_value
    #how much to subtract
    subtract = diff/available
    for n in range(0, len(old_volatility_data.columns)):
        if old_volatility_data.iloc[2,n] > min_threshold:
            old_volatility_data.iloc[2,n] = (1 - subtract) * (old_volatility_data.iloc[2,n] - target_value) + target_value
            
def set_upper_bound(old_volatility_data, index):
    #amount by which to increase weighting
    diff = 0.20 - old_volatility_data.iloc[2, index]
    old_volatility_data.iloc[2, index] = 0.20
    #to prevent rest of weights from being messed up, we must decrease them -if possible -to account for change in weighting
    available = 0
    for n in range(0, len(old_volatility_data.columns)):
        if old_volatility_data.iloc[2,n] < 0.20:
            available += 0.20 - old_volatility_data.iloc[2,n] - 0.20
    #how much to add
    add = diff/available
    for n in range(0, len(old_volatility_data.columns)):
        if old_volatility_data.iloc[2,n] > min_threshold:
            old_volatility_data.iloc[2,n] = 0.20 - (1 - add) * (0.20 - old_volatility_data.iloc[2,n])

        
for n in range(0, len(volatility_data.columns)):
    if volatility_data.iloc[2,n] < min_threshold:
        set_lower_bound(min_threshold, volatility_data, n)
    elif volatility_data.iloc[2,n] > 0.2:
        set_upper_bound(volatility_data, n)

#prints out list of stocks to invest in and their weightings
investment = 750000
investment = investment - 4.95 * len(volatility_data.columns)

0.00025512042071520406 0.008246863680736068
0.0002527137970314248 0.007330243884057864
0.00016410963552884942 0.007009159559137849
0.00013252435894759156 0.006929927328534156
0.0001252825940158909 0.006899864505511751
0.00012115664109759973 0.006850937960850791
0.0001162186721280825 0.006842331749623623
0.00011328305740400322 0.006840036961958657
0.00011105629977437125 0.006842506857282256
0.00010927632878986362 0.006867287654751489
0.00010876161176619209 0.006910286026603627
0.00010837124688156703 0.006894982508154713
0.0001081769933623134 0.0069489209670185815
0.00010767489015197868 0.006961507334698316
0.00010721951153992388 0.0069727906748025086
0.00010672510355732151 0.006984493118470369
0.00010612667076800144 0.007053316113919564
0.00010540928731729202 0.007029971227915549
0.00010576156319378189 0.007045941353444237
0.00010466203908652283 0.007045924929012061
0.00010493630336411909 0.007043452663356403
0.00010417791403545415 0.007044292748388611
0.00010390612993838739 0.007112550

  dropped_stocks = [stocks for stocks in volatility_data.columns if volatility_data[stocks][2] <= 0]


### Final Portfolio 

In [1]:
# Daniel Chung
# The code below collects, reorganizes, and assembles all the data into a final DataFrame 
# Retrieves the closing price for each stock on November 25, 2023
def get_price(volatility_data, date):
    price_list = []
    for i in range (len(volatility_data.columns)):
        ticker_hist = yf.Ticker(volatility_data.columns[i]).history(start=date, interval='1d').dropna()
        ticker_hist.index = pd.DatetimeIndex(ticker_hist.index).tz_localize(None)
        price_list.append(ticker_hist['Close'].loc[date])
    return price_list

# Retrieves the currency of each stock
def get_currency(volatility_data):
    currency_list = []
    for i in range(len(volatility_data.columns)):
        ticker_info = yf.Ticker(volatility_data.columns[i]).history_metadata
        if ticker_info['currency'] == 'USD':
            currency_list.append('USD')
        elif ticker_info['currency'] == 'CAD':
            currency_list.append('CAD')
    return currency_list

final_close_price = get_price(volatility_data, '2023-11-24') 
final_currency = get_currency(volatility_data)
final_weight = volatility_data.loc['Weight2']*100
final_value = final_weight/100*investment

# Organizes all the data into a DataFrame
Portfolio_Final_Dict = {"Ticker": volatility_data.columns,
                        "Price": final_close_price, 
                        "Currency": final_currency,
                        "Value (CAD)": final_value,
                        "Weight": final_weight} 
    
Portfolio_Final = pd.DataFrame(Portfolio_Final_Dict)
Portfolio_Final = Portfolio_Final.reset_index(drop=True)
Portfolio_Final.index += 1
Portfolio_Final['Shares'] = Portfolio_Final['Value (CAD)']/(Portfolio_Final['Price'])

Portfolio_Final

NameError: name 'volatility_data' is not defined

In [23]:
#Exports results to csv file
Stocks_Final = Portfolio_Final['Shares']
Stocks_Final.index = Portfolio_Final['Ticker']
Stocks_Final.to_csv('Portfolio.csv')

In [22]:
invested_amount = Portfolio_Final['Value (CAD)'].sum()
trading_fee = 4.95 * len(Stocks_Final.index)
print("Total CAD Spent:", invested_amount + trading_fee)
print("Weighting Sum: ", Portfolio_Final['Weight'].sum())

Total CAD Spent: 749999.9999999998
Weighting Sum:  99.99999999999999
