## LASSO Forecast

Esse notebook tem como objetivo estimar a variável de previsão a partir do LASSO, $f_{n, t}^{\mathrm{LASSO}}$.

$$
f_{n, t}^{\mathrm{LASSO}} \stackrel{\text { def }}{=} \tilde{\alpha}_n+\sum_{n^{\prime}=1}^{3 \cdot N} \tilde{\beta}_{n, n^{\prime}} \cdot x_{n^{\prime}, t}
$$

onde $x_{n^{\prime}, t}$ é o retorno do ativo $n^{\prime}$ padronizado para ter média nula e variância unitária dentro da janela de estimação de 30 minutos.

In [1]:
# pacotes
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV

In [2]:
# oculta mensagens de avisos
import warnings
warnings.filterwarnings("ignore")

In [3]:
x = pd.read_csv('../../output/data/20030102_x.csv', index_col=0)

In [4]:
x = x[3:-1]

In [5]:
x

Unnamed: 0_level_0,A(t-1),A(t-2),A(t-3),AA(t-1),AA(t-2),AA(t-3),AAAB(t-1),AAAB(t-2),AAAB(t-3),AAC(t-1),...,ZOOM(t-3),ZQK(t-1),ZQK(t-2),ZQK(t-3),ZRAN(t-1),ZRAN(t-2),ZRAN(t-3),ZTEL(t-1),ZTEL(t-2),ZTEL(t-3)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
93400,0.005464,0.001645,0.000000,0.002167,-0.001734,0.002602,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.000000,0.000000,-0.002073,-0.002068,0.016667,0.0,0.0,-0.140357
93500,-0.001091,0.005464,0.001645,0.000865,0.002167,-0.001734,0.0,0.0,0.0,0.0,...,0.0,0.000371,0.000000,0.000000,0.005724,-0.002073,-0.002068,0.0,0.0,0.000000
93600,0.000000,-0.001091,0.005464,0.000000,0.000865,0.002167,0.0,0.0,0.0,0.0,...,0.0,0.000370,0.000371,0.000000,-0.007800,0.005724,-0.002073,0.0,0.0,0.000000
93700,0.001635,0.000000,-0.001091,-0.002599,0.000000,0.000865,0.0,0.0,0.0,0.0,...,0.0,0.003697,0.000370,0.000371,-0.004863,-0.007800,0.005724,0.0,0.0,0.000000
93800,0.000545,0.001635,0.000000,-0.002171,-0.002599,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.003697,0.000370,-0.000871,-0.004863,-0.007800,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155500,0.000522,0.000000,0.000000,0.000000,0.000000,-0.000424,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.000000,0.000000,0.001923,0.000000,-0.000995,0.0,0.0,0.000000
155600,0.000000,0.000522,0.000000,0.001696,0.000000,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.000000,0.000000,-0.000596,0.001923,0.000000,0.0,0.0,0.000000
155700,0.000000,0.000000,0.000522,-0.000424,0.001696,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.001096,0.000000,0.000000,0.001655,-0.000596,0.001923,0.0,0.0,0.000000
155800,-0.001043,0.000000,0.000000,0.000000,-0.000424,0.001696,0.0,0.0,0.0,0.0,...,0.0,0.000730,0.001096,0.000000,-0.001655,0.001655,-0.000596,0.0,0.0,0.000000


In [6]:
y = pd.read_csv('../../output/data/20030102_y.csv', index_col=0)

In [7]:
y = y[3:-1]

In [8]:
y

Unnamed: 0_level_0,IFUL(t),RMD(t),NI(t),HYSQ(t),HSC(t),ACDO(t),GNLB(t),DRVR(t),BJCT(t),SP(t),...,BPRX(t),DLX(t),RRGB(t),PLUM(t),CALA(t),DHB(t),RRA(t),RMHT(t),FDTR(t),DRRX(t)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
93400,0.0,0.000000,0.000499,0.000000,0.000313,-0.000568,0.005731,0.0,0.0,0.003069,...,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,-0.004728
93500,0.0,0.000000,0.000000,0.000000,0.000000,-0.001421,-0.028988,0.0,0.0,0.000000,...,0.000000,-0.000953,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000
93600,0.0,0.000000,0.000998,0.000000,0.000000,-0.001708,0.000000,0.0,0.0,-0.001022,...,0.000000,0.000477,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000
93700,0.0,0.000000,-0.002497,0.000000,0.000313,-0.003426,0.000000,0.0,0.0,0.004082,...,0.000000,0.000238,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000
93800,0.0,0.000000,0.002497,0.000000,0.000000,-0.003437,0.000000,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.004728
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155500,0.0,0.000000,-0.002440,0.000000,0.000000,0.000827,0.000000,0.0,0.0,0.000000,...,0.000000,0.000000,0.003920,-0.003824,0.0,0.000000,0.000000,0.000000,0.0,0.000000
155600,0.0,0.000000,-0.001956,-0.000602,0.000000,-0.001103,0.000000,0.0,0.0,-0.001043,...,0.000000,0.000238,-0.000039,0.000000,0.0,0.000000,0.002732,0.000000,0.0,0.000000
155700,0.0,0.000322,0.000489,0.000000,0.001547,0.000000,0.000000,0.0,0.0,-0.001044,...,0.003656,0.000000,0.000039,0.000000,0.0,0.000000,0.004084,0.007737,0.0,0.000000
155800,0.0,0.000000,-0.000979,0.000000,0.000927,0.000000,0.000000,0.0,0.0,0.000000,...,-0.001461,-0.000238,0.004294,0.003824,0.0,0.000000,0.000000,-0.007737,0.0,0.000000


In [9]:
# parou aqui 
def LASSO_reg(x, y, t):
    """
    função que recebe dois dataframes e um argumento t
    dataframe x: dataframe com candidatos a preditores
    dataframe y: dataframe com 250 ações escolhidas aleatoriamente
    t: variável de corte da janela de estimação (t = {0,...,356})
    """
    x_temp = x[t:30+t]
    x_temp = ( x_temp - x_temp.mean() ) / x_temp.std()
    x_temp = x_temp.dropna(axis=1)
    y_temp = y[t:30+t]
    model = LassoCV(cv=10, random_state=0, max_iter=10000)
    reg = Lasso(alpha=model.alpha_)
    reg.fit(x_temp, y_temp)