In [1]:
import pandas as pd
import numpy as np
import datetime as dt

from pandas_datareader import data as pdr
import yfinance as yfin
from IPython.display import clear_output

yfin.pdr_override()

In [2]:
def historic_close(ticker, start_date, end_date):
    df = pdr.get_data_yahoo(ticker, start=start_date, end=end_date)
    df['Diff'] = df['Adj Close'].diff().fillna(0)
    df['Percent Diff'] = 100*df['Diff']/(df['Adj Close']-df['Diff'])
    return df 

In [3]:
def get_sp500_list():
    table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    return table[0]

In [37]:
def get_sp500_volatilities(sp500list, start, end):
    vol_dic = {}
    for ticker in sp500list:
        df = historic_close(ticker, start, end)
        old_price = df['Adj Close'].iloc[0]
        new_price = df['Adj Close'].iloc[-1]
        ret = 100*(new_price - old_price)/old_price
        vol_dic[ticker] = (np.std(df['Percent Diff']), old_price, new_price, ret)
    return vol_dic

In [46]:
sp500 = get_sp500_list()
sp500tickers = set(sp500['Symbol'].tolist()) - {'BRK.B', 'BF.B'}
start_date = "2000-01-01"
end_date = "2023-05-28"

vol_dic = get_sp500_volatilities(sp500tickers, start_date, end_date)
clear_output(wait=True)
vol_sorted = [(k, v) for k, v in sorted(vol_dic.items(), key=lambda item: item[1][0])]
vol_sorted

[('JNJ',
  (1.2173742197541562,
   25.267574310302734,
   154.35000610351562,
   510.8619854363311)),
 ('GIS',
  (1.2330574826935905,
   8.552310943603516,
   84.18000030517578,
   884.2953660160834)),
 ('ED',
  (1.2414459994832914,
   11.414536476135254,
   92.69000244140625,
   712.034747404735)),
 ('PEP',
  (1.2663613996777916,
   20.817401885986328,
   183.5800018310547,
   781.8583742413862)),
 ('WEC',
  (1.289375707277801,
   4.507284164428711,
   86.9000015258789,
   1827.9903009375337)),
 ('SO',
  (1.303488719349973,
   4.663519382476807,
   69.47000122070312,
   1389.647528468241)),
 ('KO',
  (1.314814256755698,
   14.894366264343262,
   60.2599983215332,
   304.5824928167244)),
 ('KMB',
  (1.3152652669593865,
   28.92879867553711,
   136.3000030517578,
   371.15680322742327)),
 ('PG',
  (1.3338612278595277,
   28.78843879699707,
   145.39999389648438,
   405.0638380281014)),
 ('MDLZ',
  (1.345309488857653,
   12.123147010803223,
   75.12999725341797,
   519.7235518670841)),
 

In [48]:
N = 100

low_vol_avg_return = np.mean([vol_sorted[i][1][3] for i in range(N)])
high_vol_avg_return = np.mean([vol_sorted[-i-1][1][3] for i in range(N)])

print(low_vol_avg_return, high_vol_avg_return)

1027.9897162604875 3587.6570953671985
