In [1]:
import pandas as pd
import yfinance as yf
from pathlib import Path  
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import talib
from os import path
import seaborn as sns
import numpy as np
import bs4 as bs
from urllib.request import Request, urlopen

### Load Or Scrape S & P 500 data

In [6]:
def reformat_ticker_dataframe(ticker_history):
    cols = {i:i.replace("_close","") for i in list(ticker_history.columns) if "_close" in i}
    ticker_history = ticker_history[[i for i in list(cols.keys())]]
    ticker_history = ticker_history.rename(columns=cols)
    return ticker_history        

def get_sp_index():
    sp_index_path =  Path('data/sp_index.xlsx')  
    if(sp_index_path.is_file()):
        df = pd.read_excel('data/sp_index.xlsx')
    else:
        req = Request('https://www.slickcharts.com/sp500', headers={'User-Agent': 'Mozilla/5.0'})
        webpage = urlopen(req).read()
        soup = bs.BeautifulSoup(webpage,'lxml')
        table = soup.find('table', attrs={'class':'table table-hover table-borderless table-sm'})
        df = pd.read_html(str(table))[0]
        df = df.drop(columns=['#'])
        df.to_excel("data/sp_index.xlsx")
    return df


def get_ticker_historical(symbol_list):
    ticker_history_file = Path('data/ticker_history.xlsx')  
    if ticker_history_file.is_file():
        print("found")
        ticker_history = pd.read_excel('data/ticker_history.xlsx')
        ticker_history = ticker_history.set_index("Date")
        return reformat_ticker_dataframe(ticker_history.dropna())
    else:     
        print("not found")
        ticker_history = pd.DataFrame(list(), columns=[])   
        for i in symbol_list:
            ticker_df = yf.download(i, start="2015-03-30", end="2022-03-31")[["Close"]]    
            if len(ticker_df) > 250:        
                ticker_df = ticker_df.rename(columns={"Close": f"{i}_close"})
                ticker_history = ticker_df.join(ticker_history) 
                
        ticker_history.to_excel("data/ticker_history.xlsx")
        return reformat_ticker_dataframe(ticker_history.dropna())
    


### Modern Portfolio Theory

In [7]:
df = get_sp_index()
df_history = get_ticker_historical(list(set(df["Symbol"])))
df_history

found


Unnamed: 0_level_0,KLAC,WMT,JNJ,CL,SHW,PWR,EQIX,HLT,FDX,ROP,...,GPN,NEM,ANET,ANSS,LEN,GM,MTCH,HII,WELL,SBUX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-03-19,127.360001,119.449997,127.050003,66.650002,148.373337,26.980000,550.000000,58.029999,111.680000,300.010010,...,130.520004,41.740002,44.470001,221.089996,31.610001,17.709999,50.759998,169.470001,40.919998,61.410000
2020-03-20,114.019997,113.970001,119.889999,62.080002,137.566666,25.129999,506.519989,61.599998,111.059998,283.149994,...,119.000000,40.560001,41.119999,213.110001,34.080002,18.139999,47.349998,159.320007,42.419998,58.029999
2020-03-23,116.360001,114.279999,111.139999,60.470001,132.233337,23.799999,489.140015,63.330002,111.760002,254.300003,...,116.070000,41.660000,40.470001,208.350006,29.350000,17.600000,48.709999,153.970001,43.490002,56.549999
2020-03-24,136.360001,115.029999,119.180000,61.180000,151.339996,27.600000,545.890015,69.389999,120.040001,282.880005,...,141.229996,47.500000,46.639999,217.429993,35.599998,21.110001,57.860001,165.380005,45.369999,64.879997
2020-03-25,137.470001,109.400002,119.400002,59.889999,150.553329,30.410000,553.919983,72.250000,118.309998,285.179993,...,146.699997,47.540001,47.082500,212.309998,40.709999,21.490000,65.599998,177.440002,46.160000,65.809998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-03-24,365.890015,142.830002,175.240005,73.650002,249.929993,132.570007,711.630005,151.889999,225.119995,461.450012,...,134.929993,78.230003,139.669998,310.959991,83.879997,44.349998,107.059998,203.589996,94.660004,87.120003
2022-03-25,366.440002,143.449997,176.919998,74.779999,245.210007,134.300003,710.140015,151.500000,226.559998,465.859985,...,136.190002,78.949997,138.399994,308.970001,82.779999,43.650002,106.730003,205.970001,96.220001,87.449997
2022-03-28,375.350006,146.000000,177.830002,75.389999,250.580002,135.539993,730.919983,150.679993,230.050003,472.260010,...,135.509995,78.139999,139.389999,318.399994,82.510002,44.220001,107.809998,200.350006,96.260002,88.379997
2022-03-29,384.329987,147.229996,177.740005,76.199997,259.350006,134.460007,761.380005,155.309998,238.570007,476.630005,...,140.389999,78.550003,140.800003,327.649994,85.739998,46.259998,111.160004,202.259995,98.309998,91.230003
