In [1]:
import numpy as np  
import pandas as pd  
from pandas_datareader import data as wb  
import matplotlib.pyplot as plt  
from scipy.stats import norm
%matplotlib inline
pd.options.plotting.backend = "plotly"


In [50]:
tickers = pd.read_csv('NYSE.txt', sep='\t')
tickers = tickers[~(tickers['Symbol'].str.contains('-')) & 
                  ~(tickers['Symbol'].str.contains('.', regex=False))]
print(tickers.shape)

ticker_list = tickers['Symbol'].sample(200).tolist()

print(len(ticker_list))

(2552, 2)
200


In [51]:

data = pd.DataFrame()

i = 0
data = wb.DataReader(ticker_list, data_source='yahoo', start='2020-1-1')['Adj Close']

In [56]:
data.shape

(273, 65)

In [98]:
daily_returns = data.pct_change()
annual_returns = daily_returns.mean() * 250

daily_covariance = daily_returns.cov()
annual_covariance = daily_covariance * 250

print(annual_returns.shape)

(200,)


In [99]:
portfolio_returns = []
portfolio_risk = []
stock_weights = []

n = 200

In [125]:
for port in range(n):
    weights = np.random.random(len(ticker_list))
    weights /= np.sum(weights)
        
    returns = np.nansum(weights * annual_returns.values)
    
    temp_risk = np.nansum(annual_covariance.values * weights)
    
    print(returns)

0.32097059922197685
0.40720345457355134
0.16606272645133818
0.21652480671003838
0.4801683533383154
0.37761746596475954
0.28649383940183915
0.36731121705708625
0.20186571545582688
0.5616758691688544
0.5102849131056628
0.32381976174550947
0.45619437139681746
0.34971086704845594
0.4031391018393247
0.5325501434860678
0.44295045596312754
0.3666971354162157
0.49334097534162386
0.43592932044030575
0.37434686108574655
0.3291768311829389
0.26644702387988917
0.5953119883388015
0.27795646294653176
0.3036408598303416
0.46858783132050735
0.5512171717163764
0.21276751388134058
0.2655146042559264
0.494275391020206
0.3618749527585444
0.5576068056936834
0.290099523636224
0.45144905185218176
0.2805376498828206
0.4167539813451932
0.5223648465929458
0.2728953485915462
0.41807946542101704
0.22600425561481183
0.5031608316148741
0.5476553730383983
0.23127336964230785
0.4286026555929745
0.3436897405536155
0.23729274390633484
0.5372238327835502
0.5006352768602813
0.2126465511513925
0.5785458372276843
0.5693378

In [142]:
temp_risk = np.sqrt(weights * annual_covariance.values * weights.T)

print(temp_risk.shape)

(200, 200)


  temp_risk = np.sqrt(weights * annual_covariance.values * weights.T)


In [None]:
np.dot

In [124]:
temp = weights * annual_returns.values

print(temp)

[ 9.32146844e-03  2.25834312e-04  7.06062275e-05  4.49700853e-04
  8.16368722e-04 -6.40800487e-03  3.63021041e-03 -4.50800834e-04
  1.51695733e-03 -2.52176161e-04  8.39151031e-05 -8.80998850e-04
  2.77550481e-03  9.29606826e-04  1.95960693e-03 -5.50687709e-04
  1.48183267e-03  2.04189042e-03 -6.33818756e-04  1.15213169e-03
  3.35456166e-03  1.91901807e-03 -5.57561166e-05  2.37340632e-04
 -6.81530234e-04 -5.14682508e-04  3.27910455e-03  6.52998205e-04
 -3.75954223e-04  7.91738019e-04  2.28848665e-03 -8.22700814e-05
  2.52330241e-04  5.93202891e-04 -2.15215943e-03 -3.15902880e-04
  5.80943012e-03  3.02838879e-03  7.90274539e-04 -2.01114818e-04
  7.15882974e-03  2.29018675e-03  2.54211990e-03  7.25296837e-04
  2.11278710e-03  1.20293565e-03  9.93383500e-04  1.01824514e-03
  2.51132964e-04  4.87442931e-04  1.19771123e-02  3.95738428e-04
  2.81130020e-04 -1.50141828e-03  9.74365388e-04  6.82909526e-04
 -9.60408695e-04 -1.15065175e-02  9.02322219e-04 -5.55177638e-03
 -4.66258970e-04 -5.94058

In [121]:
temp = annual_covariance.values * weights

print(temp.shape)

(200, 200)


In [28]:
high_yield = pd.read_csv('high_yield.txt')
hy_tickers = high_yield['TICKER'].tolist()

stuff_to_remove = ['SHYD','SJB','HYD','KCCB']
hy_tickers = [x for x in hy_tickers if x not in stuff_to_remove]

hy_tickers.append('SPY')

In [57]:
data = wb.DataReader(hy_tickers, data_source='yahoo', start='2019-11-01')['Adj Close']
data.to_pickle('data/hy_etf_ticker_data.pkl')

In [None]:
data = pd.read_pickle('data/hy_etf_ticker_data.pkl')

In [58]:
hyg = data['HYG'].pct_change()[1:]
spy = data['SPY'].pct_change()[1:]

pct_chg = data.pct_change()
pct_chg = pct_chg.iloc[1:]

In [59]:
import plotly.express as px

fig = px.line(pct_chg)
fig.show()

In [33]:
import statsmodels.api as sm
from statsmodels import regression

beta_frame = []

for ticker in hy_tickers:
    print(ticker)
    x = pct_chg['SPY'].values
    y = pct_chg[ticker].values
    
    x = sm.add_constant(x)
    model = regression.linear_model.OLS(y,x).fit()
    
    x = x[:, 1]
    
    alpha = model.params[0]
    beta = model.params[1]
    
    temp_dict = {
        'alpha' : alpha,
        'beta' : beta,
        'ticker' : ticker
    }
    beta_frame.append(temp_dict)
    
beta_frame = pd.DataFrame(beta_frame)

HYG
JNK
HYLB
USHY
SHYG
BKLN
ANGL
SJNK
HYLS
SRLN
HYS
FTSL
HYMB
BSJL
PHB
BSJM
BSJK
HYEM
BSJN
FALN
PTBD
EMHY
JPHY
PGHY
HYGV
BSJO
GHYG
SPHY
GHYB
FDHY
BSJP
HYZD
HYLV
HYLD
WFHY
IHY
HYBB
HYXF
NUHY
HYGH
BKHY
BSJQ
PHYL
SNLN
KDFI
HYXU
HYDW
HYDB
SHYL
FLRT
FJNK
SFHY
SOVB
BSJR
ESHY
HYTR
HYUP
BSJS
SEIX
IBHF
FLBL
FLHY
HYHG
UJB
SPY


In [34]:
import plotly.express as px
fig = px.histogram(beta_frame, x="beta")
fig.show()

In [35]:
def calc_beta(pct_chg, ticker):
    x = pct_chg['SPY'].values
    y = pct_chg[ticker].values
    
    x = sm.add_constant(x)
    model = regression.linear_model.OLS(y,x).fit()
    
    x = x[:, 1]
    
    alpha = model.params[0]
    beta = model.params[1]
    
    temp_dict = {
        'alpha' : alpha,
        'beta' : beta,
        'ticker' : ticker
    }
    
    return temp_dict

In [13]:
# data = wb.DataReader(hy_tickers, data_source='yahoo', start='2019-7-1')['Adj Close']


In [36]:
pct_chg = data.pct_change()
pct_chg = pct_chg.iloc[1:]

In [37]:
min_date = data.index.min() + pd.DateOffset(1)

In [38]:
window_start = data.index.min()

window_stop = data.index.max() - pd.DateOffset(60)

window_total = window_stop - window_start
window_total = window_total.days

In [39]:
i = 0
start_date = window_start

rolling_beta_frame = []
while(i <= window_total):
    print(i)
    window_end = start_date + pd.DateOffset(60)
    filtered_pct_chg = pct_chg[(pct_chg.index >= start_date) & 
                        (pct_chg.index < window_end)]
    
    for ticker in hy_tickers:
        temp_dict = calc_beta(filtered_pct_chg, ticker)
        temp_dict['date'] = window_end
        rolling_beta_frame.append(temp_dict)
        
    print(start_date)
    
    i+=1
    start_date = start_date + pd.DateOffset(1)

0
2020-01-02 00:00:00
1
2020-01-03 00:00:00
2
2020-01-04 00:00:00
3
2020-01-05 00:00:00
4
2020-01-06 00:00:00
5
2020-01-07 00:00:00
6
2020-01-08 00:00:00
7
2020-01-09 00:00:00
8
2020-01-10 00:00:00
9
2020-01-11 00:00:00
10
2020-01-12 00:00:00
11
2020-01-13 00:00:00
12
2020-01-14 00:00:00
13
2020-01-15 00:00:00
14
2020-01-16 00:00:00
15
2020-01-17 00:00:00
16
2020-01-18 00:00:00
17
2020-01-19 00:00:00
18
2020-01-20 00:00:00
19
2020-01-21 00:00:00
20
2020-01-22 00:00:00
21
2020-01-23 00:00:00
22
2020-01-24 00:00:00
23
2020-01-25 00:00:00
24
2020-01-26 00:00:00
25
2020-01-27 00:00:00
26
2020-01-28 00:00:00
27
2020-01-29 00:00:00
28
2020-01-30 00:00:00
29
2020-01-31 00:00:00
30
2020-02-01 00:00:00
31
2020-02-02 00:00:00
32
2020-02-03 00:00:00
33
2020-02-04 00:00:00
34
2020-02-05 00:00:00
35
2020-02-06 00:00:00
36
2020-02-07 00:00:00
37
2020-02-08 00:00:00
38
2020-02-09 00:00:00
39
2020-02-10 00:00:00
40
2020-02-11 00:00:00
41
2020-02-12 00:00:00
42
2020-02-13 00:00:00
43
2020-02-14 00:00:0

In [40]:
rolling_beta = pd.DataFrame(rolling_beta_frame)

In [41]:
rolling_beta.index = rolling_beta['date']

In [42]:
rolling_beta = rolling_beta[['alpha','beta','ticker']]

In [43]:
rolling_beta = rolling_beta.reset_index()

rolling_beta

Unnamed: 0,date,alpha,beta,ticker
0,2020-03-02,1.061360e-04,0.287891,HYG
1,2020-03-02,1.038623e-04,0.304574,JNK
2,2020-03-02,5.493256e-05,0.289457,HYLB
3,2020-03-02,2.527733e-04,0.288749,USHY
4,2020-03-02,1.462253e-04,0.217877,SHYG
...,...,...,...,...
21900,2021-02-01,4.679605e-04,0.131521,FLBL
21901,2021-02-01,2.677304e-04,0.130063,FLHY
21902,2021-02-01,1.709961e-04,0.174488,HYHG
21903,2021-02-01,4.185255e-04,0.263375,UJB


In [44]:
rolling_b = rolling_beta.pivot(index='date', columns='ticker', values='beta')

In [45]:
import plotly.express as px

fig = px.line(rolling_b)
fig.show()

In [25]:
rolling_beta[rolling_beta['beta'] < 0]['ticker'].value_counts()

'SHYD','SJB','HYD','KCCB'

SJB     519
SEIX    307
HYD     209
SHYD    203
HYMB    202
SOVB    176
FLRT    160
KCCB    146
PTBD    111
KDFI    106
HYXU     40
PGHY     36
HYTR     34
HYZD     27
FTSL     23
HYLD     17
IHY       6
FJNK      5
FALN      2
FLBL      2
BSJK      2
Name: ticker, dtype: int64

In [50]:
rolling_b.mean(axis=1).reset_index().plot.line('date',0)

In [51]:
gme_data = wb.DataReader(['GME'], data_source='yahoo', start='2020-1-1')['Volume']

In [54]:
gme_data[gme_data.index >= '03/02/2020'].plot.line()