In [94]:
import pandas as pd
import numpy as np

In [95]:
def read_weeklys() -> pd.DataFrame:
    """gets weekly cboe symbols"""

    dls = "http://www.cboe.com/products/weeklys-options/available-weeklys"
    df = pd.read_html(dls)[0]

    return df

In [96]:
def rename_weekly_columns(df: pd.DataFrame) -> pd.DataFrame:
    """standardizes column names of cboe"""
    df.columns=['name', 'symbol']

    return df



In [97]:
def remove_non_char_symbols(df: pd.DataFrame) -> pd.DataFrame:
    """removes symbols with non-chars - like dots (BRK.B)"""
    df = df[df.symbol.str.extract("([^a-zA-Z])").isna()[0]]

    return df



In [166]:
def split_stocks_and_index(df: pd.DataFrame) -> pd.DataFrame:
    """differentiates stocks and index"""
    
    df = df.assign(secType=np.where(df.name.str.contains('Index'), 'IND', 'STK'))

    return df

In [167]:
def create_weekly_symbols() -> pd.DataFrame:
    """Generates a weekly symbols dataframe"""

    df = (read_weeklys()
        .pipe(rename_weekly_columns)
        .pipe(remove_non_char_symbols)
        .pipe(split_stocks_and_index)
    )

    return df

In [168]:
df_weeklies = create_weekly_symbols()

In [169]:
def read_snps() -> pd.Series:
    """gets snp symbols from wikipedia"""
    snp_url =  "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    df = pd.read_html(snp_url)[0]['Symbol']
    return df

In [170]:
snp = read_snps()

In [171]:
def filt_weekly_snps(df_weeklies: pd.DataFrame, snp_df: pd.DataFrame) -> pd.DataFrame:
    """gets a clean list of weekly snp 500s"""
    df_snp_weekly = df_weeklies[df_weeklies.symbol.isin(snp_df)].reset_index(drop=True)
    return df_snp_weekly

In [172]:
df_snp_weeklies = filt_weekly_snps(df_weeklies, snp)

In [173]:
def weekly_indexes(df_weeklies: pd.DataFrame) -> pd.DataFrame:
    """Extract weekly indexes"""
    df = df_weeklies[df_weeklies.secType == 'IND'].reset_index(drop=True)
    return df

In [175]:
df_ind_weeklies = weekly_indexes(df_weeklies)

In [176]:
df_symbols = pd.concat([df_snp_weeklies, df_ind_weeklies], ignore_index=True)

In [183]:
df_symbols.symbol.to_list()

['MMM',
 'ABT',
 'ABBV',
 'ACN',
 'ADBE',
 'AMD',
 'AFL',
 'ABNB',
 'ALB',
 'ALGN',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AAL',
 'AXP',
 'AIG',
 'AMGN',
 'ADI',
 'APA',
 'AAPL',
 'AMAT',
 'ADM',
 'ANET',
 'T',
 'ADSK',
 'ADP',
 'BAC',
 'BBWI',
 'BAX',
 'BBY',
 'BIIB',
 'BLK',
 'BX',
 'BA',
 'BKNG',
 'BSX',
 'BMY',
 'AVGO',
 'CZR',
 'CPB',
 'COF',
 'CAH',
 'KMX',
 'CCL',
 'CAT',
 'CNC',
 'CF',
 'CHTR',
 'CVX',
 'CMG',
 'CSCO',
 'C',
 'CLX',
 'CME',
 'KO',
 'CL',
 'CMCSA',
 'CMA',
 'CAG',
 'COP',
 'STZ',
 'GLW',
 'COST',
 'CTRA',
 'CSX',
 'CVS',
 'CBOE',
 'DHI',
 'DHR',
 'DE',
 'DAL',
 'DVN',
 'FANG',
 'DLR',
 'DFS',
 'DIS',
 'DG',
 'DLTR',
 'DPZ',
 'DOW',
 'DD',
 'EBAY',
 'EA',
 'ELV',
 'LLY',
 'EMR',
 'ENPH',
 'EOG',
 'EQT',
 'ETSY',
 'EXPE',
 'XOM',
 'FDX',
 'FITB',
 'FSLR',
 'FI',
 'F',
 'FTNT',
 'FOXA',
 'FCX',
 'GEHC',
 'GNRC',
 'GD',
 'GE',
 'GIS',
 'GM',
 'GILD',
 'GS',
 'HAL',
 'HSY',
 'HES',
 'HD',
 'HON',
 'HRL',
 'HPQ',
 'HUM',
 'ILMN',
 'INTC',
 'IBM',
 'IP',
 'INTU',
 'ISRG'

In [182]:
df_symbols[df_symbols.symbol.str.contains('SPY')]

Unnamed: 0,name,symbol,secType
