In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, LassoLarsIC
from sklearn.metrics import r2_score
import os

## Observations

 - Food, Servs, Whlsl are a bit different. Especially Food.
 - The others are reasonably good

## Data

In [2]:
ind = pd.read_csv('30ValueWeightedIndustry.csv')
ind.columns = [s.strip() for s in ind.columns]
ind.set_index('Date',inplace=True)
ind_names = ind.columns.tolist()

rf = pd.read_csv('tbillrt.csv')
rf.rename(columns={'Unnamed: 0':'Date'},inplace=True)
rf.set_index('Date',inplace=True)

for col in ind.columns:
    ind[col] = ind[col] - rf['RF']

for col in ind.columns:
    ind[col+'_lead'] = ind[col].shift(-1)

ind = ind[ind.index>195911]
ind = ind[ind.index<201701]

----
Example

In [3]:
X = ind[ind_names].values[:-1,:]
Y = ind.drop(ind_names,axis=1).values[:-1,:]

lasso = LassoLarsIC(criterion='aic')
lasso = lasso.fit(X,Y[:,1])

x_non0_index = np.where(abs(lasso.coef_) > 1e-6)[0]

ols = LinearRegression()
ols.fit(X[:,x_non0_index], Y[:,1])

ols.coef_

l = [np.nan]*X.shape[1]
for i in range(len(ols.coef_)):
    l[x_non0_index[i]] = ols.coef_[i]

----

## Table 2

In [4]:
def lasso_ols(df, model):
    """
    1. Apply Lasso for selection of X,
    2. Drop x with zero coefficient,
    3. Apply OLS to estimate coefficients and r-squared
    """
    X = df[ind_names].values[:-1,:]
    Y = df.drop(ind_names,axis=1).values[:-1,:]
    n_x = X.shape[1]
    result = []
    
    for i in range(Y.shape[1]):
        y = Y[:,i]
        # Lasso
        lasso = LassoLarsIC(criterion='aic').fit(X,y)
        x_non0_index = np.where(abs(lasso.coef_) > 1e-6)[0]
        # OLS
        if x_non0_index.size == 0:
            result.append([np.nan]*n_x)
            continue
        ols = LinearRegression().fit(X[:,x_non0_index], y)
        y_pred = ols.predict(X[:, x_non0_index])
        rsquare = r2_score(y, y_pred)*100
        estimate = [np.nan]*n_x
        for i in range(len(ols.coef_)):
            estimate[x_non0_index[i]] = ols.coef_[i]
        estimate.append(rsquare)
        result.append(estimate)
        
    output = pd.DataFrame(result).T
    output.index = ind_names+['R-squared']
    output.columns = list(ind_names)
    return output

In [5]:
test = lasso_ols(ind,LassoLarsIC(criterion='aic'))

 - Food, Servs, Whlsl are a bit different. Especially Food.

In [6]:
test.iloc[:,0:10]

Unnamed: 0,Food,Beer,Smoke,Games,Books,Hshld,Clths,Hlth,Chems,Txtls
Food,,0.124806,,,,,,,,
Beer,,,,,,,,,,
Smoke,,,,,,,,,,
Games,,,,,0.028013,,,,,
Books,,,,0.177221,0.035296,,,0.09522,,
Hshld,,,,,,,,,,
Clths,,0.052488,,0.053002,,0.098236,0.105597,,0.075449,0.084505
Hlth,,,,,,,,,,
Chems,,,,,,,,,,
Txtls,,,0.065474,,,,,,,


In [7]:
test.iloc[:,10:20]

Unnamed: 0,Cnstr,Steel,FabPr,ElcEq,Autos,Carry,Mines,Coal,Oil,Util
Food,,,,,,,,,,0.096359
Beer,,,,,,,,-0.270312,-0.080476,-0.101563
Smoke,,,,,,,,-0.093483,,0.016952
Games,,,,,,,,,,
Books,,,,,,,,0.131739,,
Hshld,,,,,-0.300332,,,,,-0.075963
Clths,0.035858,,,,0.036953,,,,,
Hlth,,,,,,,,,-0.128711,-0.083205
Chems,,,,,,,,,,
Txtls,,,,,,,,,,


In [8]:
test.iloc[:,20:30]

Unnamed: 0,Telcm,Servs,BusEq,Paper,Trans,Whlsl,Rtail,Meals,Fin,Other
Food,,,,,,-0.171283,,,,
Beer,-0.061603,,,,,,,,,
Smoke,-0.030345,-0.096173,-0.144139,,,-0.050044,,-0.063862,,
Games,,,,,,,,,,
Books,0.087984,0.074627,0.121394,,,0.127926,,0.058937,,
Hshld,-0.071095,,,,,,,,,
Clths,,,,0.05978,,,,0.0964,,0.074243
Hlth,,,,,,,,,,
Chems,,,,,,,,,,
Txtls,,,,,,,,,,


In [9]:
os.system('jupyter nbconvert --to html_toc replication.ipynb')

0