## Import Packages

In [3]:
# Import packages
import os
import sys
import warnings
import inspect
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.patches import Patch
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from scipy.interpolate import make_interp_spline
from scipy.interpolate import UnivariateSpline
import bs4 as bs
import requests
import yfinance as yf
import json


# Set directories
notebook_dir = os.getcwd()
base_dir = os.path.join(notebook_dir, '..')
data_dir = os.path.join(base_dir, 'data', 'processed')
raw_data_dir = os.path.join(base_dir, 'data', 'raw')
factor_dir = os.path.join(base_dir, 'data', 'processed', 'factors')
sample_dir = os.path.join(base_dir, 'data', 'processed', 'sample_stock')
src_dir = os.path.join(base_dir, 'src')
graph_dir = os.path.join(base_dir, 'results', 'graphs')
sys.path.append(src_dir)

# Import optimal portfolio
from optimal_portfolios import OptimalPortfolios

# Ignore future warning
warnings.simplefilter(action='ignore', category=FutureWarning)

## Get S&P 1500 Stock Tickers and Industry

In [36]:
def get_sp_tickers(num):
    wiki_url = 'http://en.wikipedia.org/wiki/List_of_S%26P_' + str(num) + '_companies'
    resp = requests.get(wiki_url)
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    sectors = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        sector = row.findAll('td')[2].text
        tickers.append(ticker)
        sectors.append(sector)
    tickers = [s.replace('\n', '') for s in tickers]
    sectors = [s.replace('\n', '') for s in sectors]
    
    # Create a DataFrame
    data = pd.DataFrame({'tickers': tickers,'sector': sectors})
    return data

stock_sector = pd.concat([get_sp_tickers(500), get_sp_tickers(400), get_sp_tickers(600)]).drop_duplicates(keep='first').reset_index(drop=True)

In [38]:
# Show stock tickers and sectors
display(stock_sector)
print(stock_sector['sector'].unique())

# Save to csv
# stock_sector.to_csv(os.path.join(data_dir, 'stock_sector.csv'), index=False) 

Unnamed: 0,tickers,sector
0,MMM,Industrials
1,AOS,Industrials
2,ABT,Health Care
3,ABBV,Health Care
4,ACN,Information Technology
...,...,...
1501,XPEL,Consumer Discretionary
1502,XPER,Information Technology
1503,XRX,Information Technology
1504,YELP,Communication Services


['Industrials' 'Health Care' 'Information Technology' 'Utilities'
 'Financials' 'Materials' 'Consumer Discretionary' 'Real Estate'
 'Communication Services' 'Consumer Staples' 'Energy']
