# Portfolio Project: Safety Option

**Names:** Deniz Makul, Richard Sha, Mitchel Shen

**Due:** 11/25/2022 at 11:59pm ET on Dropbox

**What to submit:** your .ipynb (python code in jupyter notebooks) file as well as a pdf with all the output from your code.  




In [2]:
from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from threading import Thread
import time

In [3]:
#Getting tickers from csv file
csv_tickers = pd.read_csv('Tickers_Example.csv', header = None)

In [4]:
# Returns a list of of months that have less than 20 trading days

#Setting start date and end date for volume filtering, uses 11-01 to get October as the last month
volume_start_date = '2022-01-01'
volume_end_date = '2022-11-01'

#Function to remove a month from a dataframe for volume analysis if there are less than 20 days
def remove_month(ticker):
    hist = ticker.history(start = volume_start_date, end = volume_end_date)
    
    months_to_remove = []
    #Loops through each month and creates a dataframe for each day containing data for the stock for the month
    for i in range(1,11,1):
        if i <= 9: #appends a 0 if single digit
            i = str(0) + str(i)
        df = hist.filter(like='2022-' + str(i), axis=0)
        
        #Adds to month_to_remove array if there are less than 20 rows (signalling that there were less than 20 trading days
        #for that stock)
        if len(df.index) < 20:
            months_to_remove.append('2022-' + str(i))
    return months_to_remove

In [5]:
#list to store which stocks to remove (based on indicies)
volume_index_drop = []

#Filter function, uses threading to speed up the process
def filter_stocks(ticker, i):
    stock_data = yf.Ticker(ticker)
    stock_hist = stock_data.history(start = volume_start_date, end = volume_end_date, interval = '1mo', Threads = True)
    
    #If no data exists for a particular data, then the stock has been delisted, so we remove that stock
    if stock_hist.empty:
        volume_index_drop.append(i)
        return
    
    stock_volume = stock_hist['Volume']
    stock_volume = stock_volume.dropna()
    #drops the month where there are less than 20 trading days
    stock_volume = stock_volume.drop(remove_month(stock_data))
    
    mean = (stock_volume.mean())
    
    #if the average monthly volume was less than 200000, or if the stock is not in the US market, add the index to the list to 
    #drop
    if mean<200000:
        volume_index_drop.append(i)         
    elif(stock_data.info['market'] != 'us_market'):
        volume_index_drop.append(i)
    
    #volume_index_drop

In [6]:
#Drops the tickers that are delisted or have a average monthly volume from 
# January 1st to October 31st of less that 200,000

# thread_list contains all the threads
thread_list = []

for i in range(len(csv_tickers)):
    ticker = csv_tickers[0][i]
    filter_thread = Thread(target = filter_stocks, args = (ticker,i,))
    thread_list.append(filter_thread)
    #starts the threading
    filter_thread.start()

for i in thread_list:
    i.join()


- AGN: No data found, symbol may be delisted
- CELG: No data found, symbol may be delisted
- PCLN: No data found for this date range, symbol may be delisted
- RTN: No data found, symbol may be delisted
- TWX: No data found for this date range, symbol may be delisted


In [7]:
#Drops invalid tickers based on volume_index_drop
csv_tickers.drop(index = volume_index_drop, inplace = True)
csv_tickers.reset_index(drop = True, inplace = True)
csv_tickers.head()

Unnamed: 0,0
0,AAPL
1,ABBV
2,ABT
3,ACN
4,AIG


In [8]:
close_all = pd.DataFrame()
#Gets the current date
close_date = datetime.today().strftime('%Y-%m-%d')

#Gets the close daily close price for all valid stocks in the csv file from January to the current date
def stock_history(ticker):
    close_start = '2022-01-01'
    stock_data = yf.Ticker(ticker)
    stock_hist = stock_data.history(start = close_start, end = close_date, interval = '1d')
    stock_close = stock_hist['Close']
    stock_close = stock_close.dropna()
    close_all[ticker] = stock_close

In [9]:
for i in range(0,len(csv_tickers)):
    ticker = csv_tickers[0][i]
    stock_history(ticker)
    
close_all.head()

Unnamed: 0_level_0,AAPL,ABBV,ABT,ACN,AIG,AMZN,AXP,BA,BAC,BIIB,...,SLB,SO,SPG,T,TGT,TXN,UNH,UNP,UPS,USB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-03,180.959732,130.341797,136.745361,401.729431,56.390324,170.404495,166.056244,207.860001,45.393047,244.139999,...,31.361927,65.57843,153.253525,17.874916,227.143723,185.358856,497.45755,243.742813,206.66069,55.597622
2022-01-04,178.663071,130.091568,133.529343,398.858582,57.972286,167.522003,171.3871,213.630005,47.172207,241.729996,...,32.884541,65.57843,155.815704,18.022528,229.317719,185.55336,486.186798,247.933548,210.329269,57.053822
2022-01-05,173.91066,130.774918,132.929413,391.834412,57.176388,164.356995,169.560791,213.070007,46.376007,239.270004,...,32.884541,65.65538,153.951431,18.423185,224.117737,181.6828,484.988434,248.681198,207.686722,56.752872
2022-01-06,171.007507,130.15892,132.909729,372.912567,58.158974,163.253998,171.114563,211.339996,47.309818,237.300003,...,33.665623,65.57843,155.987823,18.352894,226.556168,181.624435,465.140808,249.369827,209.700089,58.29644
2022-01-07,171.176514,129.822067,133.3228,365.760132,59.681973,162.554001,172.579315,215.5,48.341923,232.600006,...,34.63456,66.030563,154.180893,18.979887,225.997971,174.505737,454.19693,250.638855,211.171417,59.2187


In [10]:
#Gets the daily percentage change of each stock
close_all_pct = close_all.pct_change() * 100
close_all_pct.head()

Unnamed: 0_level_0,AAPL,ABBV,ABT,ACN,AIG,AMZN,AXP,BA,BAC,BIIB,...,SLB,SO,SPG,T,TGT,TXN,UNH,UNP,UPS,USB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-03,,,,,,,,,,,...,,,,,,,,,,
2022-01-04,-1.269156,-0.191979,-2.35183,-0.714623,2.80538,-1.691559,3.210272,2.775909,3.919454,-0.98714,...,4.854974,0.0,1.671857,0.825803,0.957101,0.104934,-2.265671,1.719327,1.77517,2.619176
2022-01-05,-2.659985,0.525284,-0.449287,-1.761068,-1.372895,-1.889309,-1.065605,-0.262134,-1.687858,-1.017661,...,0.0,0.117341,-1.19646,2.223094,-2.267588,-2.085955,-0.246482,0.301553,-1.256386,-0.527483
2022-01-06,-1.669336,-0.471036,-0.014808,-4.829041,1.718517,-0.671098,0.916351,-0.811945,2.013565,-0.823338,...,2.375226,-0.117203,1.32275,-0.381538,1.088013,-0.032125,-4.092392,0.276912,0.969425,2.719805
2022-01-07,0.09883,-0.258801,0.31079,-1.917993,2.618682,-0.428778,0.856007,1.968394,2.181586,-1.980614,...,2.87812,0.689454,-1.158379,3.416318,-0.246384,-3.919461,-2.35281,0.508894,0.701635,1.582018


In [11]:
#Gets the expected return as a percentage for each stock over the period from January 1st, 2022 to October 31st, 2022
expected_return = close_all_pct.mean()
expected_return.head()

AAPL   -0.054789
ABBV    0.100161
ABT    -0.098412
ACN    -0.116549
AIG     0.057415
dtype: float64

In [12]:
#Gets the absolute differnce of the expected return from 0
expected_return_from_0 = abs(0 - expected_return)
expected_return_from_0

#Sorts the values in expected_return_from_0 from least to greatest
expected_return_sorted = sorted(expected_return_from_0.items(), key=lambda x:x[1])

In [13]:
#Gets the standard deviation of each stock from January 1st to October 31st, 2022
std_lst = close_all_pct.std()
std_lst = sorted(std_lst.items(), key=lambda x:x[1])

In [14]:
correlation_pct_stocks = pd.DataFrame()
index = []

#Creates a DataFrame that stores correlation between any two given stocks given that the ticker is acceptable
for i in range(len(close_all_pct.columns)):
    
    #Tracks the correlation of one stock to every stock in the portfolio
    correlation_temp = {}
    
    for j in range(len(close_all_pct.columns)):
        #correlation_df: Temporary dataframe so that .corr() function can be applied to the dataframe
        correlation_df = pd.DataFrame({
            csv_tickers[0][i]: close_all_pct[csv_tickers[0][i]]
        })
        
        correlation_df[csv_tickers[0][j]] = close_all_pct[csv_tickers[0][j]]
        correlation = correlation_df.corr()[csv_tickers[0][i]][csv_tickers[0][j]]
        
        correlation_temp[csv_tickers[0][j]] = correlation
        
    correlation_pct_stocks[csv_tickers[0][i]] = list(correlation_temp.values())
    index.append(csv_tickers[0][i])

#Sets resets index values for better understanding of data
correlation_pct_stocks.index = index
correlation_pct_stocks.head()

Unnamed: 0,AAPL,ABBV,ABT,ACN,AIG,AMZN,AXP,BA,BAC,BIIB,...,SLB,SO,SPG,T,TGT,TXN,UNH,UNP,UPS,USB
AAPL,1.0,0.222581,0.61935,0.798654,0.554765,0.691117,0.653674,0.537584,0.570125,0.29734,...,0.206058,0.394055,0.596477,0.396791,0.521212,0.734527,0.481119,0.482695,0.563516,0.484424
ABBV,0.222581,1.0,0.457485,0.328319,0.313107,0.208196,0.280862,0.115592,0.302678,0.237922,...,0.178286,0.377696,0.275195,0.271573,0.266804,0.254632,0.498843,0.251951,0.234951,0.392162
ABT,0.61935,0.457485,1.0,0.696369,0.536109,0.504969,0.598784,0.430043,0.580337,0.325455,...,0.117437,0.459859,0.535696,0.381756,0.406746,0.545282,0.558016,0.497194,0.491663,0.566946
ACN,0.798654,0.328319,0.696369,1.0,0.591425,0.694861,0.668331,0.50678,0.592195,0.374402,...,0.197518,0.416222,0.633159,0.375827,0.525819,0.738584,0.51851,0.555304,0.595139,0.539502
AIG,0.554765,0.313107,0.536109,0.591425,1.0,0.458671,0.730973,0.542381,0.761023,0.247939,...,0.389725,0.292853,0.605094,0.377248,0.337922,0.494066,0.385524,0.503319,0.519038,0.728714


In [15]:
#Tracks the average correlation of a stock to the rest of the stocks
correlation_pct_mean = pd.DataFrame()
correlation_pct_mean = correlation_pct_stocks.sum()

#Subtracts 1 to account for the case in the dataframes where a stock is compared to itself for correlation
#the correlation would be 1 in this case (such as AAPL's correlation with AAPL)
correlation_pct_mean = correlation_pct_mean - 1

#One less stock (len(correlation_pct_stocks) - 1)since we don't take into account the stock itself in the 
#correlation calculations
correlation_pct_mean = correlation_pct_mean/(len(correlation_pct_stocks) - 1)

correlation_pct_mean.head()

AAPL    0.500083
ABBV    0.297329
ABT     0.474111
ACN     0.521697
AIG     0.471928
dtype: float64

In [16]:
#Gets the absolute value of the correlation from 0 (least correlation) and sorts it from least to greatest
correlation_from_0 = abs(0 - correlation_pct_mean)
correlation_from_0 = sorted(correlation_from_0.items(), key=lambda x:x[1])

In [17]:
#Creates points system
points_system = pd.DataFrame()
expected_lst = []
std_deviation_lst = []
correlation_lst = []

#Appends each stock and their rankings in the expected_lst, std_deviation and correlation_lst dataframes
for i in expected_return_sorted:
    expected_lst.append(i[0])
    
for i in std_lst:
    std_deviation_lst.append(i[0])

for i in correlation_from_0:
    correlation_lst.append(i[0])

points_system['Expected Returns'] = expected_lst
points_system['Standard Deviation'] = std_deviation_lst
points_system['Correlation'] = correlation_lst
points_system.index += 1 

#Inverses the index for the points system
points_system = points_system.set_index([points_system.index[::-1]])

points_system.head()

Unnamed: 0,Expected Returns,Standard Deviation,Correlation
55,ORCL,MON,MON
54,COST,BMY,LMT
53,NEE,PEP,OXY
52,TXN,KO,SLB
51,CVS,MRK,BMY


In [18]:
#Score dataframe keeps track of the points accumulated by a stock in all three categories (expected returns, standard deviation,
#correlation)
score = pd.DataFrame()
    
score['Tickers'] = expected_lst
score['Points'] = None

for i in range(len(score)):
    score['Points'][i] = (int(points_system[points_system['Expected Returns'] == expected_lst[i]].index.values)) + (int(points_system[points_system['Standard Deviation'] == expected_lst[i]].index.values)) + (int(points_system[points_system['Correlation'] == expected_lst[i]].index.values)) 

score = score.sort_values("Points")
score = score[::-1]
score = score.reset_index(drop=True)
score.index += 1 

score.head()

Unnamed: 0,Tickers,Points
1,MON,157
2,MO,139
3,SO,136
4,CL,134
5,PG,130


In [19]:
number_of_stocks = 25

#Creates a dataframe of the top stocks to pick (25 stocks if there are 25 or more stocks in the list, the same dataframe as 
#score if there are less)
score_dropped = score.copy()

#Accounts for the case where there are less than the number of stocks we want (25) valid tickers in the list; only when 
#there are more than 25 valid tickers would we take the first 25 tickers from the score dataframe
if (len(score) > number_of_stocks):
    score_dropped.drop(labels = range(number_of_stocks + 1, len(score) + 1), inplace = True)
score_dropped.head()

Unnamed: 0,Tickers,Points
1,MON,157
2,MO,139
3,SO,136
4,CL,134
5,PG,130


In [20]:
#Minimum weighting possible for any given stock so that there is no case where the weighting of one stock drops below the 
#minimum weighting requirement for a given stock
min_weighting = 100/(2*number_of_stocks)
base_weighting_total = min_weighting * number_of_stocks

#Gets the finalized weightings of each stock
score_dropped['Base Weighting'] = min_weighting
weighting_sum = score_dropped['Points'].sum()

score_dropped['Points-based Weighting'] = (score_dropped['Points']/weighting_sum) * (100 - base_weighting_total)
score_dropped['Total Weighting'] = score_dropped['Base Weighting'] + score_dropped['Points-based Weighting']
score_dropped.head()

Unnamed: 0,Tickers,Points,Base Weighting,Points-based Weighting,Total Weighting
1,MON,157,2.0,2.745715,4.745715
2,MO,139,2.0,2.43092,4.43092
3,SO,136,2.0,2.378454,4.378454
4,CL,134,2.0,2.343477,4.343477
5,PG,130,2.0,2.273522,4.273522


In [21]:
tickers_list = score_dropped['Tickers']
#Gets the closing price for last five days. Five days incase the program is run on the weekend and Friday is a holiday
price_last_five_days = pd.DataFrame()
Portfolio_Final = pd.DataFrame()
date_difference = 5

#initialize start_date for getting close dates from yfinance
start_date = datetime.today() - timedelta(days=date_difference)

#Getting price info for the last date (in this case 2022-11-25)
for x in tickers_list:
    stock_info = yf.Ticker(x)
    stock_hist = stock_info.history(start = start_date, end = close_date)
    
    price_last_five_days[x] = stock_hist['Close']
    
price_last_five_days = price_last_five_days.transpose()
#Gets the last column from price_last_five_days
Portfolio_Final = price_last_five_days.iloc[:,-1]
Portfolio_Final = Portfolio_Final.reset_index()
Portfolio_Final.index += 1

Portfolio_Final.head()

Unnamed: 0,index,2022-11-23 00:00:00
1,MON,10.01
2,MO,45.02
3,SO,66.360001
4,CL,76.93
5,PG,146.449997


In [22]:
total_amount_to_spend = 500000
#To reset column names
column_names = ['Ticker', 'Price']
Portfolio_Final.columns = column_names

#Getting the rest of the columns for the Portfolio_Final dataframe
Portfolio_Final['Shares'] = (total_amount_to_spend*(score_dropped['Total Weighting']/100))/Portfolio_Final['Price']
Portfolio_Final['Value'] = Portfolio_Final['Shares'] * Portfolio_Final['Price']
Portfolio_Final['Weight'] = score_dropped['Total Weighting']
Portfolio_Final


Unnamed: 0,Ticker,Price,Shares,Value,Weight
1,MON,10.01,2370.487101,23728.576425,4.745715
2,MO,45.02,492.105715,22154.59951,4.43092
3,SO,66.360001,329.901595,21892.270024,4.378454
4,CL,76.93,282.30058,21717.383701,4.343477
5,PG,146.449997,145.9038,21367.611053,4.273522
6,PM,98.110001,215.118555,21105.281567,4.221056
7,CVS,100.059998,209.17845,20930.395243,4.186079
8,PFE,48.849998,424.882489,20755.508919,4.151102
9,NEE,84.919998,242.353074,20580.622595,4.116125
10,BMY,79.139999,256.738606,20318.293109,4.063659


In [23]:
#Generating Stocks_Final portfolio
Stocks_Final = pd.DataFrame()

Stocks_Final['Ticker'] = Portfolio_Final['Ticker'] 
Stocks_Final['Shares'] = Portfolio_Final['Shares']

Stocks_Final

Unnamed: 0,Ticker,Shares
1,MON,2370.487101
2,MO,492.105715
3,SO,329.901595
4,CL,282.30058
5,PG,145.9038
6,PM,215.118555
7,CVS,209.17845
8,PFE,424.882489
9,NEE,242.353074
10,BMY,256.738606


In [26]:
#Writing to csv file
Stocks_Final.to_csv("Stocks_Group_08.csv")