In [1]:
# pacotes
import numpy as np
import pandas as pd
import statsmodels.formula.api as sm

In [2]:
# oculta mensagens de avisos
import warnings
warnings.filterwarnings("ignore")

In [3]:
f_lasso = pd.read_csv('../../output/data/20030102_f_lasso.csv', index_col=0)

In [4]:
f_bmk = pd.read_csv('../../output/data/20030102_f_bmk.csv', index_col=0)

In [5]:
y = pd.read_csv('../../output/data/20030102_y.csv', index_col=0)


$$
r_{n, t+1}=\bar{a}_n+\bar{b}_n \cdot\left(\frac{f_{n, t}^{\text {LASSO }}-\bar{m}_n^{\text {LASSO }}}{s_n^{\text {LASSO }}}\right)+e_{n, t+1}
$$

In [6]:
# padronização
f_lasso = (f_lasso - f_lasso.mean()) / f_lasso.std()

In [7]:
# não conseguimos estimar todos ativos, então vamos deixar apenas os que tiveram suas previsões estimadas
f_lasso = f_lasso.dropna(axis=1)

In [8]:
# mesma coisa para as ações
y = y[f_lasso.columns]

In [9]:
# também para o índice que começa a partir das 10:04 e termina em 15:59
y = y.loc[100400:155900]

In [10]:
rsquared_lasso = pd.DataFrame(columns=['Adj. R-Squared', 'a', 'b'], index=y.columns)

In [11]:
ols = pd.DataFrame()

In [12]:
for col in f_lasso.columns:
    ols['y'] = y[col]
    ols['x'] = f_lasso[col]
    result = sm.ols(formula="y ~ x", data=ols).fit()
    rsquared_lasso.at[col, 'Adj. R-Squared'] = result.rsquared_adj
    rsquared_lasso.at[col, 'a'] = result.params[0]
    rsquared_lasso.at[col, 'b'] = result.params[1]

In [13]:
rsquared_lasso

Unnamed: 0,Adj. R-Squared,a,b
FITB(t),0.002949,2.9e-05,5.6e-05
AGN(t),-0.002414,3.2e-05,1.1e-05
ZBRA(t),-0.002788,4e-05,-6e-06
ADBE(t),-0.000545,4.8e-05,7.2e-05
CKFR(t),0.005305,0.000149,-0.00017
MEDI(t),0.001135,6.7e-05,7.8e-05
TXT(t),0.013634,7.9e-05,0.000113
CMCSA(t),-0.000957,3.8e-05,-5.6e-05
HON(t),0.012695,6.2e-05,0.000187
SCH(t),0.008707,0.000107,-0.000145


In [14]:
rsquared_lasso['Adj. R-Squared'].mean()

0.003980107980533139

$$
r_{n, t+1}=\bar{a}_n+\bar{c}_n \cdot\left(\frac{f_{n, t}^{\mathrm{Bmk}}-\bar{m}_n^{\mathrm{Bmk}}}{\bar{s}_n^{\mathrm{Bmk}}}\right)+e_{n, t+1} 
$$

In [15]:
# padronização
f_bmk = ( f_bmk - f_bmk.mean() ) / f_bmk.std()

In [16]:
# mesma coisa para as ações
f_bmk = f_bmk[f_lasso.columns]

In [17]:
rsquared_bmk = pd.DataFrame(columns=['Adj. R-Squared', 'a', 'c'], index=y.columns)

In [18]:
for col in f_bmk.columns:
    ols['y'] = y[col]
    ols['x'] = f_bmk[col]
    result = sm.ols(formula="y ~ x", data=ols).fit()
    rsquared_bmk.at[col, 'Adj. R-Squared'] = result.rsquared_adj
    rsquared_bmk.at[col, 'a'] = result.params[0]
    rsquared_bmk.at[col, 'c'] = result.params[1]

In [19]:
rsquared_bmk

Unnamed: 0,Adj. R-Squared,a,c
FITB(t),0.00154,2.9e-05,4.9e-05
AGN(t),0.000557,3.2e-05,3.3e-05
ZBRA(t),-0.001561,4e-05,-3.4e-05
ADBE(t),-0.001475,4.8e-05,-5.6e-05
CKFR(t),0.001507,0.000149,0.000124
MEDI(t),0.00907,6.7e-05,0.000134
TXT(t),0.010367,7.9e-05,-0.000101
CMCSA(t),-0.001719,3.8e-05,4.3e-05
HON(t),-0.000291,6.2e-05,7.5e-05
SCH(t),0.000439,0.000107,7.7e-05


In [20]:
rsquared_bmk['Adj. R-Squared'].mean()

0.0031043900154227116

$$
r_{n, t+1}=\bar{a}_n+\bar{b}_n \cdot\left(\frac{f_{n, t}^{\mathrm{L} A S S O}-\bar{m}_n^{\mathrm{LASSO}}}{\bar{s}_n^{\mathrm{LASSO}}}\right)+\bar{c}_n \cdot\left(\frac{f_{n, t}^{\mathrm{Bmk}}-\bar{m}_n^{\mathrm{Bmkk}}}{\bar{B}_n^{\mathrm{Bmk}}}\right)+e_{n, t+1}
$$

In [21]:
rsquared_both = pd.DataFrame(columns=['Adj. R-Squared', 'a', 'b', 'c'], index=y.columns)

In [22]:
for col in f_bmk.columns:
    ols['y'] = y[col]
    ols['x1'] = f_lasso[col]
    ols['x2'] = f_bmk[col]
    result = sm.ols(formula="y ~ x1 + x2", data=ols).fit()
    rsquared_both.at[col, 'Adj. R-Squared'] = result.rsquared_adj
    rsquared_both.at[col, 'a'] = result.params[0]
    rsquared_both.at[col, 'b'] = result.params[1]
    rsquared_both.at[col, 'c'] = result.params[2]

In [23]:
rsquared_both

Unnamed: 0,Adj. R-Squared,a,b,c
FITB(t),0.003115,2.9e-05,4.9e-05,4.1e-05
AGN(t),-0.001384,3.2e-05,1.7e-05,3.6e-05
ZBRA(t),-0.004385,4e-05,-3e-06,-3.3e-05
ADBE(t),-0.001036,4.8e-05,8.9e-05,-7.5e-05
CKFR(t),0.009134,0.000149,-0.000195,0.000155
MEDI(t),0.009607,6.7e-05,7.1e-05,0.000131
TXT(t),0.023654,7.9e-05,0.000111,-9.9e-05
CMCSA(t),-0.002527,3.8e-05,-5.9e-05,4.6e-05
HON(t),0.012667,6.2e-05,0.000188,7.9e-05
SCH(t),0.009838,0.000107,-0.000149,8.4e-05


In [24]:
rsquared_both['Adj. R-Squared'].mean()

0.00724414635974443

In [25]:
np.random.seed(123)

ols['y'] = np.random.normal(2, 1, 356)
ols['x'] = np.random.normal(0, 3, 356) + ols['y'] * 4

In [26]:
ols

Unnamed: 0_level_0,y,x,x1,x2
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100400,0.914369,0.784732,-0.070119,1.337158
100500,2.997345,18.158784,-0.070119,-4.160602
100600,2.282978,3.466437,1.308417,-1.064912
100700,0.493705,-1.410171,-0.070119,0.145608
100800,1.421400,4.481356,-0.070119,-0.413734
...,...,...,...,...
155500,1.811703,5.755332,-1.788132,0.077631
155600,1.099991,6.474233,-0.236489,0.009532
155700,1.068998,10.105472,-1.643033,0.236525
155800,0.777263,-0.240642,-0.080157,-0.164391


In [27]:
result = sm.ols(formula="y ~ x", data=ols).fit()

In [28]:
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.660
Model:                            OLS   Adj. R-squared:                  0.659
Method:                 Least Squares   F-statistic:                     687.6
Date:                Wed, 22 Feb 2023   Prob (F-statistic):           5.70e-85
Time:                        15:59:08   Log-Likelihood:                -313.26
No. Observations:                 356   AIC:                             630.5
Df Residuals:                     354   BIC:                             638.3
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.6906      0.057     12.022      0.0

In [29]:
result.rsquared

0.6601446801133858

In [30]:
result.rsquared_adj

0.6591846368368699