In [2]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime
import scipy as sp 
from scipy.optimize import minimize

## Group Assignment
### Team Number: 15
### Team Member Names: Neil Zhang, Rahim Rehan, Krish Patel
### Team Strategy Chosen: Risk-Free 

In [None]:
# Krish, Info Extraction 

# Function to return a list of all tickers (first column elements)
def get_ticker_list (tickers_df):
     return tickers_df.iloc[1:, 0].tolist()

ticker_list = (get_ticker_list(tickers)) # List of all tickers


# Gets weekly closes of all the stocks in a list of tickers
def get_weekly_closes (ticker_lst, start_date, end_date):
    #Define a dataframe to hold weekly close prices (checks every friday)
    weekly_closes = pd.DataFrame()
    #Extract the weekly close prices and store them in the dataframe
    for i in ticker_lst:
        ticker = yf.Ticker(i)
        data = ticker.history(start=start_date, end=end_date)
        data.index = pd.to_datetime(data.index) # ensure datetime index
        #last() takes the last trading price of the week
        prices = data['Close'].resample('W-FRI').last()
        weekly_closes[f'Close {i}'] = prices
    #Strip time
    weekly_closes.index = weekly_closes.index.strftime('%Y-%m-%d')
    return weekly_closes

weekly_closes = get_weekly_closes (ticker_list, start_date, end_date)

# Creates a df with the (weekly) %change for each column
def get_percent_change (closes, start_date, end_date):
    percent_change = pd.DataFrame()
    for i in closes:
        col_name = i[6:]
        #fill_method=None to deal with delisted stocks
        percent_change[f'% Change {col_name}'] = closes[i].pct_change(fill_method=None) * 100
    return percent_change

weekly_percent_change = get_percent_change (weekly_closes, start_date, end_date)


# Calculate covariance, correlation, variance, standard deviation
covariance_matrix = {
    'Covariance': weekly_percent_change.cov(),
    'Correlation': weekly_percent_change.corr(),
    'Variance': weekly_percent_change.var(),
    'Std_Dev': weekly_percent_change.std()}
"""
# Access each piece like:
display(covariance_matrix['Covariance'])
display(covariance_matrix['Std_Dev'])
"""

In [None]:
# Neil opimization models 
# This is the function we want to minimize, aka the minimum variance function
def port_variance(weights, cov_matrix):
    """     
    port_variance is the function that calculates the variance of a portfolio. It performs 
    dot product/matrix multiplication on the weights and covariance matrixes. 

    :param weights: an array that represents the weight of each asset
    :param cov_matrix: a 2D matrix that represents the covariance between each asset 
    :return: variance of the portfolio
    """
    weights_col = weights.reshape(-1, 1) # Turns into column vector
    port_var = np.dot(weights_col.transpose(), (np.dot(cov_matrix, weights_col))) # Doing dot product 
    return port_var[0][0]

In [None]:
# This code is what runs the primary function

# Primary minimization, there is bounds in this 
def primary_minimization(cov_matrix):
    """     
    primary_minimization is the function that finds the weightings that result in the mimimum variance. 
    It performs this using scipy optimization. This perimary version does not consider bounds. 

    :param cov_matrix: a 2D matrix that represents the covariance between each asset 
    :return: returns the minimum variance and the weightings associated with that
    """
    num_assets = cov_matrix.shape[0]
    initial_weight = [1/num_assets] * num_assets # The initial guess of the weights

    constraint = {
        'type':'eq', # Constraint type is equality
        'fun': lambda w: sum(w) - 1 # The function's weight's must sum to 1
        }
    
    # Finds the resilt of the minimization of the port_variance function, using the initial guess, keeping the cov_matrix constant using the SLSQP method, and with the above listed constraint
    result = minimize(fun=port_variance, x0=initial_weight, args=(cov_matrix,), method='SLSQP',constraints=constraint)
    return result.fun, result.x

pd_cov_matrix = covariance_matrix['Covariance']
numpy_cov_matrix = pd_cov_matrix.to_numpy() # Matrix of covariances of assets
primary_var, primary_weights = primary_minimization(numpy_cov_matrix)

NameError: name 'daily_returns_df' is not defined

Secondary Optimization logic: Let n be the size of the tickers, we now have an optimized weighting for those n stocks
We want to find the most optimal set up of 10-25 stocks out of those n. To do so we will try every combination, however if we wanted to brute force from those n stocks it'd take an absurd amount of compute. Instead we will take into the fact that we have the weightings of the (unconstrained) optimization. The higher the weighting of an asset, the more important it is to the minimizing the  variance, thus we will sort the n-optimized assets from highest to lowest weighting. Starting from the top we will then build a 10-25 asset size portfolio and calculate the variance of each portfolio, finding the one with the least variance. 

We must also consider the fact that each portfolio has restraints, aka the min/max weighting of one stock, the max amount of sectors, and the mkt caps
In regards to the weighting rules, those can be implemented via scipy's minimization constraints, making all weightings are in a certain range
In regards to the max amount of sectors, when building the portfolios we will keep count of the sectors, if any sector exceeds a certain amount such that are over represented, we skip an asset and move on
In regards to the small and large mkt cap, we can check after building the portfolio, if one of them is missing we can delete the lowest weighted (least important) asset and then add in a new asset from the list that satisfies the missing mkt cap

In [None]:
info_df['weight'] = primary_weights #Assume info_df holds the tickers, sector, market cap, etc and not the dates etc. We now add the weights.
ordered_info_df = info_df.copy().sort_values('weight', ascending = False).reset_index(drop=True)

# The secondary optimization that includes the bounds 
def secondary_minimization(cov_matrix):
    """     
    secondary_minimization is the function that finds the weightings that result in the mimimum variance while considering the bounds.
    That is, it ensures the weightings 

    :param cov_matrix: a 2D matrix that represents the covariance between each asset 
    :return: returns the minimum variance and the weightings associated with that
    """
    num_assets = len(cov_matrix[0]) 
    initial_weight = [1/num_assets] * num_assets # The initial guess of the weights

    min_weight = (100/2*num_assets)/100 # Do not need to include portfolio value, because 1 is the portfolio value (and sum of weights)
    max_weight = 0.15 # Same as above

    constraint = {
        'type':'eq', # Constraint type is equality
        'fun': lambda w: sum(w) - 1 # The function's weight's must sum to 1
        }
    
    weight_bounds = [(min_weight, max_weight)] * num_assets
    
    # Finds the resilt of the minimization of the port_variance function, using the initial guess, keeping the cov_matrix constant using the SLSQP method, and with the above listed constraint
    result = minimize(fun=port_variance, x0=initial_weight, args=(cov_matrix,), method='SLSQP',bounds = weight_bounds, constraints=constraint)
    return result.fun, result.x

KeyError: 1

In [None]:
# Code that creates a portfolio that matches the requirements

# Determines all the indexes in a portfolio that are large cap stocks
def is_lg_cap(portfolio):
    lg_cap = []
    for i in range(len(portfolio)):
        if portfolio[i]['market_cap'] > 10_000_000_000:
            lg_cap.append(i)
    return lg_cap

# Determines all the indexes in a portfolio that are small cap stocks
def is_sm_cap(portfolio):
    sm_cap = []
    for i in range(len(portfolio)):
        if portfolio[i]['market_cap'] < 2_000_000_000:
            sm_cap.append(i)
    return sm_cap

# Determines if an individual stock is a large market cap
def is_lg(row):
    return row['market_cap'] > 10_000_000_000

# Determines if an individual stock is a small market cap
def is_sm(row):
    return row['market_cap'] < 2_000_000_000

# Determines the index of the least important stock. Least important in this case is the one with the lowest weighting in the ordered list, but
# if the least important is either the ONLY SMALL or LARGE cap stock then the second least important stock is now designated the least important
def find_least_imp(portfolio, lg_indxs, sm_indxs):
    """ 
    :param portfolio: A list of stock data in Series format
    :param lg_indxs: A list of all indexes that hold large cap stocks
    :param sm_indxs: A list of all indexes that hold small cap stocks
    :return: integer that represents the index that is desginated least important
    """

    only_large_idx = None
    only_small_idx = None

    # Determines the protected indexes
    if len(lg_indxs) == 1:
        only_large_idx = lg_indxs[0]
    if len(sm_indxs) == 1:
        only_small_idx = sm_indxs[0]

    for i in range(len(portfolio) - 1, -1, -1):
        if i != only_large_idx and i != only_small_idx:
            return i

# Creates a portfolio that is valid 
def create_portfolio(size):
    """ 
    :param size: the size of the portfolio
    :return: a list of Series of stock data that represents the portfolio 
    """

    portfolio = []
    port_sectors = []
    i = 0
    max_sector_num = int(size * 0.4)
    
    # Creates preliminary portfolio 
    while len(portfolio) < size:
        cur = ordered_info_df.iloc[i]
        cur_sector = cur['sector']

        # Ensures that no sector is above max weight
        if (port_sectors.count(cur_sector) < max_sector_num):
            portfolio.append(cur)
            port_sectors.append(cur_sector)
        i += 1

    lg_idxs = is_lg_cap(portfolio)
    sm_idxs = is_sm_cap(portfolio)

    # Fixes the no large market cap issue
    while len(lg_idxs) == 0: 
        replaced = find_least_imp(portfolio, lg_idxs, sm_idxs)
        cur = ordered_info_df.iloc[i]
        cur_sector = cur['sector']

        temp_sectors = port_sectors.copy()
        temp_sectors.pop(replaced)
        
        if (is_lg(cur)) and (temp_sectors.count(cur_sector) < max_sector_num):
            portfolio[replaced] = cur
            port_sectors[replaced] = cur_sector

            lg_idxs = is_lg_cap(portfolio)
            sm_idxs = is_sm_cap(portfolio)
        i += 1

    # Fixes the no small market cap issues
    while len(sm_idxs) == 0: 
        replaced = find_least_imp(portfolio, lg_idxs, sm_idxs)
        cur = ordered_info_df.iloc[i]
        cur_sector = cur['sector']

        temp_sectors = port_sectors.copy()
        temp_sectors.pop(replaced)
        
        if (is_sm(cur)) and (temp_sectors.count(cur_sector) < max_sector_num):
            portfolio[replaced] = cur
            port_sectors[replaced] = cur_sector

            lg_idxs = is_lg_cap(portfolio)
            sm_idxs = is_sm_cap(portfolio)
        i += 1
    
    return portfolio 

In [7]:

ticker = yf.Ticker("AAPL")
print(ticker.fast_info.currency)

USD


## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here. 