## Portfolio Development
March 28,2023
Shyam Parikh

In [7]:
import numpy as np 
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import scipy as stats
from datetime import datetime,timedelta
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from matplotlib.ticker import FuncFormatter
from pandas_datareader import data as pdr
from scipy.stats import t, laplace, expon, uniform, norm
import statsmodels.api as sm
import random
import bs4 as bs
import requests
from itertools import chain
import yahoo_fin.stock_info as si
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.metrics import silhouette_score
from statsmodels.tsa.stattools import coint
from scipy.stats import t
from scipy.stats import norm
import yfinance as yf
import pandas_datareader.data as web
from datetime import datetime
from functools import reduce


In [8]:

def rrsp500Portfolio(TAU):
    #These list of stocks represent the top 10 stocks in each of the sector ETFs.
    yf.pdr_override()
    #START_DATE = (datetime.today() - timedelta(days = TAU * 365)).strftime('%Y-%m-%d') # data start date
    #END_DATE = datetime.today().strftime('%Y-%m-%d') # data end date
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    counter = 0
    for row in table.findAll('tr')[1:]:
        ticker = str(row.findAll('td')[0].text[:-1])
        if("." in ticker):
            ticker = ticker.replace(".","-")
        tickers.append(ticker)
    START_DATE = (datetime.today() - timedelta(days = TAU * 365)).strftime('%Y-%m-%d') # data start date
    END_DATE = datetime.today().strftime('%Y-%m-%d') # data end date
    sp500List = pdr.get_data_yahoo(tickers, START_DATE, END_DATE)['Adj Close'] # retreiving asset price data from yahoo finance
    sp500LogRetList = np.log(sp500List).diff().dropna()
    sp500sum_stats = pd.DataFrame({'Mean':sp500LogRetList.mean(), 'SD': sp500LogRetList.std(), 'Skewness':sp500LogRetList.skew(), 'Excess Kurtosis':sp500LogRetList.kurtosis() , 'RR ratio':(sp500LogRetList.mean()/sp500LogRetList.std())})
    sp500sum_stats = sp500sum_stats.sort_values(by='RR ratio', ascending=False)
    listOfStocks = list(sp500sum_stats.index.values)
    return sp500sum_stats     
def get_stock(ticker,TAU):
    START_DATE = (datetime.today() - timedelta(days = TAU * 365)).strftime('%Y-%m-%d') # data start date
    END_DATE = datetime.today().strftime('%Y-%m-%d') # data end date
    data = pdr.get_data_yahoo(f"{ticker}",START_DATE,END_DATE)
    data[f'{ticker}'] = data["Close"]
    data = data[[f'{ticker}']] 
    print(data.head())
    return data 

def combine_stocks(tickers):
    data_frames = []
    for i in tickers:
        data_frames.append(get_stock(i))
        
    df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['Date'], how='outer'), data_frames)
    print(df_merged.head())
    return df_merged

In [9]:
newPortfolio = rrsp500Portfolio(3)
finalList = list(newPortfolio[:10].index)
portfolio = combine_stocks(finalList,5)

[*********************100%***********************]  503 of 503 completed


  result = func(self.values, **kwargs)


TypeError: combine_stocks() takes 1 positional argument but 2 were given

In [None]:
#Using Mean Variance Optimization to calculate the Ideal diversiification of Assets
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices


mu = mean_historical_return(portfolio)
S = CovarianceShrinkage(portfolio).ledoit_wolf()

ef = EfficientFrontier(mu, S)
weights = ef.max_sharpe()

cleaned_weights = ef.clean_weights()
print(dict(cleaned_weights))
ef.portfolio_performance(verbose=True)
##Final Stocks, LLY, COP,ANET, AND ENGH

{'GE': 0.0, 'BKNG': 0.0, 'CRM': 0.0, 'META': 0.0, 'WST': 0.36118, 'ULTA': 0.00954, 'CPRT': 0.00287, 'MKTX': 0.0, 'CDNS': 0.62641, 'NVDA': 0.0}
Expected annual return: 38.1%
Annual volatility: 30.8%
Sharpe Ratio: 1.17


(0.38073081080170323, 0.30846178711535344, 1.1694505636343313)

In [None]:
from pypfopt import HRPOpt
returns = portfolio.pct_change().dropna()
hrp = HRPOpt(returns)
hrp_weights = hrp.optimize()
hrp.portfolio_performance(verbose=True)
dictHRP = dict(hrp_weights)
print(dict(hrp_weights))

Expected annual return: 25.9%
Annual volatility: 26.2%
Sharpe Ratio: 0.91
{'BKNG': 0.08309223987009184, 'CDNS': 0.11169539395510798, 'CPRT': 0.11355964805391419, 'CRM': 0.0929912834352311, 'GE': 0.06053399956620375, 'META': 0.0571320079945535, 'MKTX': 0.15726468986339373, 'NVDA': 0.06233928073655871, 'ULTA': 0.10839270378540025, 'WST': 0.15299875273954494}


In [None]:
new = pd.DataFrame.from_dict(dict(hrp_weights),orient ='index')
new.to_csv('basePortfolioStocks.csv')
