In [1]:
## Authors: Tong Liu

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime

In [2]:
startdate = '2023-01-01'
enddate = '2023-10-31'

ticker_file = 'Tickers_Example.csv' 
tickers = pd.read_csv(ticker_file, header=None).values.flatten().tolist() 

#currency conversion
CAD_transfer = yf.Ticker('CADUSD=x')
transferRate = CAD_transfer.history(start=startdate, end=enddate)
CAD_to_USD = transferRate[['Close']]
CAD_to_USD.index = CAD_to_USD.index.strftime('%Y-%m-%d')
    
def currency_exchange(base, currency):
    base.index = base.index.strftime('%Y-%m-%d')
    if currency == 'USD':
        exchange_rates = pd.DataFrame()
        exchange_rates['Close'] = base['Close']/CAD_to_USD['Close']
        exchange_rates = exchange_rates.dropna()
        return exchange_rates
    else:
        return base

msft = yf.Ticker('MSFT')
hist = msft.history(start=startdate, end=enddate)
currency_exchange(hist[['Close']], 'USD')

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2023-01-03,321.991489
2023-01-04,310.520276
2023-01-05,297.152436
2023-01-06,302.322177
2023-01-09,302.458393
...,...
2023-10-24,451.529563
2023-10-25,467.186900
2023-10-26,451.666606
2023-10-27,454.676707


In [3]:
def is_valid_currency(ticker):
    stock_info = yf.Ticker(ticker).info
    currency = stock_info.get('currency', '').upper() # retrieving the stock currency
    return currency == 'USD' or currency == 'CAD' # returns T/F based on currency

In [4]:
def get_stock_data(ticker):
    # get stock info
    stock_info = yf.Ticker(ticker).info
    
    # check if the currency is valid
    if is_valid_currency(ticker):
        # download historical data
        data = yf.download(ticker, start=startdate, end=enddate)
        
        # returning daily volume
        monthly_volume = data['Volume'].resample('D').sum()
        
        #turning American stocks to CAD$
        stock_price = currency_exchange(data[['Close']], stock_info['currency'])
        
        # create a dictionary for the ticker
        result = {
            'ticker': ticker,
            'info': stock_info,
            'close': stock_price,
            'volume': pd.DataFrame(monthly_volume)
        }
        return result
    else:
        print(f"Skipping {ticker}: Invalid currency.")
        return None

# accumulates a list of dictionaries through get_stock_data    
def process_tickers(ticker_list):
    result_list = []
    for ticker in ticker_list:
        stock_data = get_stock_data(ticker)
        if stock_data is not None:
            result_list.append(stock_data)
    return result_list

result_list = process_tickers(tickers)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
Skipping AGN: Invalid currency.
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*

In [5]:
def filter_stocks(result_list, min_avg_volume=150000, min_trading_days=18):
    filtered_list = []

    for stock_data in result_list:
        ticker = stock_data['ticker']
        volume_data = stock_data['volume']

        # calculate average monthly volume
        monthly_avg_volume = {}
        for date, volume in volume_data.iterrows():
            month = date.strftime('%Y-%m')
            if month not in monthly_avg_volume:
                monthly_avg_volume[month] = {'total_volume': 0, 'days_counted': 0}
            if volume['Volume'] > 0 or monthly_avg_volume[month]['days_counted'] < min_trading_days:
                monthly_avg_volume[month]['total_volume'] += volume['Volume']
                monthly_avg_volume[month]['days_counted'] += 1

        # remove stocks with less than min_trading_days in any month or average volume less than min_avg_volume
        valid_months = [month for month, data in monthly_avg_volume.items() if data['days_counted'] >= min_trading_days]
        avg_monthly_volume = sum(monthly_avg_volume[month]['total_volume'] / data['days_counted'] for month, data in monthly_avg_volume.items() if data['days_counted'] > 0) / len(valid_months) if valid_months else 0

        if avg_monthly_volume >= min_avg_volume:
            filtered_list.append(stock_data)

    return filtered_list

filtered_list = filter_stocks(result_list)
filtered_list

[{'ticker': 'AAPL',
  'info': {'address1': 'One Apple Park Way',
   'city': 'Cupertino',
   'state': 'CA',
   'zip': '95014',
   'country': 'United States',
   'phone': '408 996 1010',
   'website': 'https://www.apple.com',
   'industry': 'Consumer Electronics',
   'industryKey': 'consumer-electronics',
   'industryDisp': 'Consumer Electronics',
   'sector': 'Technology',
   'sectorKey': 'technology',
   'sectorDisp': 'Technology',
   'longBusinessSummary': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digi

In [6]:
#returns a list of industries 
def return_industry(tickers):
    industries = []
    for i in range (len (tickers)):
        stock = tickers[i]
        sector = stock["info"]['sector']
        industries.append(sector)
    return industries

list_of_industries = return_industry(filtered_list)

#returns a dictionary of sectors and how many companies are in the sector from a list of industries
def industry_count(industry_list):
    industry_dict={}
    while len(industry_list) > 0:
        industry_dict[industry_list[0]] = industry_list.count(industry_list[0])
        industry_list = remove_items(industry_list, industry_list[0])
    return industry_dict

#Used to remove existing industries
def remove_items(lst, item): 
    resulting_list = [i for i in lst if i != item] 
    return resulting_list

count_of_industries = industry_count(list_of_industries)

#Sorts the dictionary of industries by descending number of stocks in the industry
sorted_dict_of_industries = sorted(count_of_industries.items(),key=lambda x:x[1], reverse = True)

In [7]:
#creating a dictionary of stocks within the given industry
def stocks_in_industry (industry_dict):
    industry_portfolio = {}
    for i in range (len (industry_dict)):
        industry_portfolio[(industry_dict[i][0])] = stock_grouper(filtered_list, (industry_dict[i][0]))
    return industry_portfolio

#finds all stocks in the given industry and returns it in a list
def stock_grouper (stock_list, industry):
    stocks = []
    for i in range (len (stock_list)):
        stock = stock_list[i]
        sector = stock["info"]['sector']
        if sector == industry:
            stocks.append(stock)
    return stocks
            
stocks_in_industry_list = stocks_in_industry(sorted_dict_of_industries)      

stocks_in_industry_list

{'Financial Services': [{'ticker': 'AIG',
   'info': {'address1': '1271 Avenue of the Americas',
    'city': 'New York',
    'state': 'NY',
    'zip': '10020',
    'country': 'United States',
    'phone': '212 770 7000',
    'website': 'https://www.aig.com',
    'industry': 'Insurance - Diversified',
    'industryKey': 'insurance-diversified',
    'industryDisp': 'Insurance - Diversified',
    'sector': 'Financial Services',
    'sectorKey': 'financial-services',
    'sectorDisp': 'Financial Services',
    'longBusinessSummary': "American International Group, Inc. offers insurance products for commercial, institutional, and individual customers in North America and internationally. It operates through General Insurance, and Life and Retirement segments. The General Insurance segment provides commercial and industrial property insurance, including business interruption and package insurance that cover exposure to made and natural disasters; general liability, environmental, commercial a

For each sector, we calculate its beta with the "market"

In [8]:
# calculates the beta of a list of tickers (relative to a market index)
# assumes that the stocks are all denominated in the same currency as the market index
# For our purposes, we will use daily data, since we want to assess how stocks perform relative to the market in
# the short term (we are only investing for 5 days).
# market data and stock data should have the same period and interval for consistency
def calculate_beta(market_data, stock_dict):
    # calculate market index returns
    returns = market_data
    returns = returns.rename(columns={'Close': 'Market'})
    returns = returns.pct_change()
    returns.drop(index=returns.index[0], inplace=True)
    betas = []
    
    # calculate returns for the stock and adds it to the overall dataframe
    stockReturns = stock_dict['close']
    stockReturns = stockReturns.rename(columns={'Close': stock_dict['ticker']})
    stockReturns = stockReturns.pct_change()
    stockReturns.drop(index=stockReturns.index[0], inplace=True)
    returns = returns.merge(stockReturns,left_index=True,right_index=True)
    
    # calculates covariance and market variance
    returnsCov = returns.cov()
    var = returns['Market'].var()
    beta = returnsCov/var
    
    return beta.iat[1,0]
    

In [30]:
# aggregate score for each industry is initially zero
industry_scores = {}
for sector in stocks_in_industry_list:
    industry_scores[sector] = 0

In [31]:
# get market data (we are using s&p 500)
market_data = yf.Ticker('^GSPC').history(start=startdate, end=enddate)[['Close']]#,period='1y',interval='1d'
market_data.index = market_data.index.strftime('%Y-%m-%d')

# consider beta for each industry (relative to s&p 500)
for sector in stocks_in_industry_list:
    stock_lst = stocks_in_industry_list[sector]
    beta_avg = 0
    for stock in stock_lst:
        ourbeta = calculate_beta(market_data,stock)
        beta_avg += ourbeta
    beta_avg /= len(stock_lst)
    industry_scores[sector] += beta_avg*0.3
print(industry_scores)

{'Financial Services': 0.3097982817831833, 'Healthcare': 0.10364006639539348, 'Consumer Defensive': 0.10043489741338855, 'Technology': 0.39960448181256747, 'Industrials': 0.24659159472199643, 'Consumer Cyclical': 0.46417170218430726, 'Communication Services': 0.11725599976322461}


In [32]:
# consider standard deviations for each industry
for sector in stocks_in_industry_list:
    stock_lst = stocks_in_industry_list[sector]
    std_avg = 0
    for stock in stock_lst:
        close_pct = stock['close'].pct_change()
        close_pct.drop(index=close_pct.index[0], inplace=True)
        std_avg += close_pct['Close'].std()
    std_avg /= len(stock_lst)
    industry_scores[sector] += std_avg*20
print(industry_scores)

{'Financial Services': 0.6416282269993216, 'Healthcare': 0.3814143955100109, 'Consumer Defensive': 0.31290079587900316, 'Technology': 0.7983445365110385, 'Industrials': 0.564918083947961, 'Consumer Cyclical': 0.9028671143852839, 'Communication Services': 0.32757425876332313}


In [33]:
# consider 52 week range (high divided by low) for each industry
for sector in stocks_in_industry_list:
    stock_lst = stocks_in_industry_list[sector]
    hilo_avg = 0
    for stock in stock_lst:
        hilo_avg += stock['info']['fiftyTwoWeekHigh']/stock['info']['fiftyTwoWeekLow']
    hilo_avg /= len(stock_lst)
    industry_scores[sector] += hilo_avg*0.1
print(industry_scores)

{'Financial Services': 0.7856951923987333, 'Healthcare': 0.5325265866863294, 'Consumer Defensive': 0.435340023750353, 'Technology': 0.9570715569757586, 'Industrials': 0.7036242767754374, 'Consumer Cyclical': 1.08429901908871, 'Communication Services': 0.4653341831489564}


In [34]:
# Adjust industry score by number of stocks within
for sector in stocks_in_industry_list:
    stock_lst = stocks_in_industry_list[sector]
    industry_scores[sector] *= 1-2**(-len(stock_lst))
print(industry_scores)

{'Financial Services': 0.7849279119374064, 'Healthcare': 0.5304464047070859, 'Consumer Defensive': 0.42853783587925376, 'Technology': 0.9271630708202662, 'Industrials': 0.681636018126205, 'Consumer Cyclical': 0.542149509544355, 'Communication Services': 0.2326670915744782}


In [35]:
# Format and print final result

# convert industry_scores to a list so we can sort it
industry_scores_temp = []
for sector in industry_scores:
    industry_scores_temp.append((sector,industry_scores[sector]))
industry_scores = industry_scores_temp

from functools import cmp_to_key
# comparison function that sorts tuples by their second value in descending order
def sort_by_second_desc(a, b):
    if a[1] < b[1]:
        return 1
    elif a[1] > b[1]:
        return -1
    else:
        return 0
    
#sort industries by aggregate score in descending order
industry_scores.sort(key=cmp_to_key(sort_by_second_desc))
print(industry_scores)

[('Technology', 0.9271630708202662), ('Financial Services', 0.7849279119374064), ('Industrials', 0.681636018126205), ('Consumer Cyclical', 0.542149509544355), ('Healthcare', 0.5304464047070859), ('Consumer Defensive', 0.42853783587925376), ('Communication Services', 0.2326670915744782)]


In [40]:
# Plan: consider the most volatile industries first and assign each stock in those industries the highest weighting allowed
N_stocks = 10
principal = 750000

#stock_weightings[i] is the weight of the ith stock considered
stock_weightings = [0.2,0.2,0.2,0.1,0.05,0.05,0.05,0.05,0.05,0.05]
# a list of tuples, each tuple stores a ticker string and its corresponding portfolio weighting
stock_percentages = []
for industry in industry_scores:
    
    if len(stock_percentages) >= N_stocks:
        break

    industry_name = industry[0]
    print(industry_name)
    industry_score = industry[1]
    stock_lst = stocks_in_industry_list[industry_name]
    
    # as a tie breaker, we first consider the stocks within the current industry with the highest std
    stock_stds = []
    for stock in stock_lst:
        stock_returns = stock['close'].pct_change()
        stock_returns.drop(index=stock_returns.index[0], inplace=True)
        stock_stds.append((stock['ticker'],stock_returns['Close'].std()))
    stock_stds.sort(key=cmp_to_key(sort_by_second_desc))
    print(stock_stds)
    for stock in stock_stds:
        if len(stock_percentages) >= N_stocks:
            break
        stock_percentages.append((stock[0],stock_weightings[len(stock_percentages)]))

print(stock_percentages)

Portfolio_Final = {'Ticker': [],
                   'Price': [],
                   'Currency': [],
                   'Shares': [],
                   'Value': [],
                   'Weight': []
                  }

curr_cad_usd = yf.Ticker('CADUSD=x').info['previousClose']
for stock in stock_percentages:
    stock_info = yf.Ticker(stock[0]).info
    curr_price = stock_info['currentPrice']
    Portfolio_Final['Ticker'].append(stock[0])
    Portfolio_Final['Price'].append(curr_price)
    Portfolio_Final['Currency'].append(stock_info['currency'])
    num_shares = 0
    if stock_info['currency'] == 'USD':
        num_shares = principal*stock[1]/(curr_price/curr_cad_usd)
    else:
        num_shares = principal*stock[1]/curr_price
    Portfolio_Final['Shares'].append(num_shares)
    Portfolio_Final['Value'].append(principal*stock[1])
    Portfolio_Final['Weight'].append(stock[1])
    
Portfolio_Final = pd.DataFrame(data=Portfolio_Final,index=[(i+1) for i in range(N_stocks)])
Portfolio_Final

Technology
[('SHOP.TO', 0.03525100825025024), ('QCOM', 0.020696286868656356), ('TXN', 0.015591193442479334), ('ACN', 0.014876924598013269), ('AAPL', 0.013269600515218569)]
Financial Services
[('USB', 0.02556325465929087), ('PYPL', 0.02270225473418308), ('AIG', 0.017686861508759193), ('AXP', 0.017153095145726868), ('BAC', 0.016648149352155615), ('BK', 0.016242535803549337), ('C', 0.016224530521288704), ('BLK', 0.013482523539544603), ('TD.TO', 0.010949856846754842), ('RY.TO', 0.00926191049681605)]
[('SHOP.TO', 0.2), ('QCOM', 0.2), ('TXN', 0.2), ('ACN', 0.1), ('AAPL', 0.05), ('USB', 0.05), ('PYPL', 0.05), ('AIG', 0.05), ('AXP', 0.05), ('BAC', 0.05)]


Unnamed: 0,Ticker,Price,Currency,Shares,Value,Weight
1,SHOP.TO,96.26,CAD,1558.279659,150000.0,0.2
2,QCOM,127.75,USD,857.476497,150000.0,0.2
3,TXN,153.59,USD,713.214548,150000.0,0.2
4,ACN,334.04,USD,163.966325,75000.0,0.1
5,AAPL,189.97,USD,144.157791,37500.0,0.05
6,USB,37.2,USD,736.173538,37500.0,0.05
7,PYPL,55.76,USD,491.134427,37500.0,0.05
8,AIG,65.21,USD,419.960982,37500.0,0.05
9,AXP,164.42,USD,166.559151,37500.0,0.05
10,BAC,29.73,USD,921.145497,37500.0,0.05


In [None]:
#(TSX:SHOP)*1558.279659+(NASDAQ:QCOM)*857.476497*(FX_IDC:CADUSD)+(NASDAQ:TXN)*713.214548*(FX_IDC:CADUSD)+(NYSE:ACN)*163.966325*(FX_IDC:CADUSD)+(NASDAQ:AAPL)*144.157791*(FX_IDC:CADUSD)+(NYSE:USB)*736.173538*(FX_IDC:CADUSD)+(NASDAQ:PYPL)*491.134427+(NYSE:AIG)*419.960982*(FX_IDC:CADUSD)+(NYSE:AXP)*166.559151+(NYSE:BAC)*921.145497*(FX_IDC:CADUSD)

In [None]:
TSX:SHOP*1558.279659*(FX:USDCAD)+NASDAQ:QCOM*857.476497+NASDAQ:TXN*713.214548+NYSE:ACN*163.966325+NASDAQ:AAPL*144.157791+NYSE:USB*736.173538+NASDAQ:PYPL*491.134427+NYSE:AIG*419.960982+NYSE:AXP*166.559151+NYSE:BAC*921.145497