## Benchmark Forecast


**Input**: 
* YYYYMMDD_y.csv - retornos de 250 ações escolhidas aleatóriamente.

**Output**: 
* YYYYMMDD_f_bmk.csv - previsões de retornos 250 das ações escolhidas aleatóriamente.


Esse notebook tem como objetivo fazer previsões de retornos do ativo usando um modelo Benchmark. O modelo benchmark escolhido será um AR(3), cujo especificação segue abaixo.

$$
f_{n,t}^{Bmk} \stackrel{\text { def }}{=} \hat{\alpha}_n + \hat{\beta}_{n, 1} \cdot r_{n,t-1} + \hat{\beta}_{n, 2} \cdot r_{n,t-2} + \hat{\beta}_{n, 3} \cdot r_{n,t-3}
$$

In [1]:
# pacotes
import numpy as np
import pandas as pd
import datetime as dt
from statsmodels.tsa.ar_model import AutoReg

In [2]:
# oculta mensagens de avisos
import warnings
warnings.filterwarnings("ignore")

In [3]:
y = pd.read_csv('../../output/data/20030102_y.csv', index_col=0)

In [15]:
y

Unnamed: 0_level_0,IFUL(t),RMD(t),NI(t),HYSQ(t),HSC(t),ACDO(t),GNLB(t),DRVR(t),BJCT(t),SP(t),...,BPRX(t),DLX(t),RRGB(t),PLUM(t),CALA(t),DHB(t),RRA(t),RMHT(t),FDTR(t),DRRX(t)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
93100,0.0,0.000000,0.001001,-0.011561,0.000000,-0.000284,0.000000,0.0,0.0,0.000000,...,0.015456,0.000119,-0.021070,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.023867
93200,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,-0.011990,0.0,0.0,0.000000,...,-0.000844,-0.000119,0.000000,0.000000,0.0,0.002972,0.000000,0.000000,0.0,0.000000
93300,0.0,0.000000,0.001000,0.011561,0.000939,-0.000851,0.023551,0.0,0.0,0.000000,...,-0.016952,0.000000,0.000000,0.000000,0.0,-0.002972,0.000000,0.000000,0.0,0.000000
93400,0.0,0.000000,0.000499,0.000000,0.000313,-0.000568,0.005731,0.0,0.0,0.003069,...,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,-0.004728
93500,0.0,0.000000,0.000000,0.000000,0.000000,-0.001421,-0.028988,0.0,0.0,0.000000,...,0.000000,-0.000953,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155600,0.0,0.000000,-0.001956,-0.000602,0.000000,-0.001103,0.000000,0.0,0.0,-0.001043,...,0.000000,0.000238,-0.000039,0.000000,0.0,0.000000,0.002732,0.000000,0.0,0.000000
155700,0.0,0.000322,0.000489,0.000000,0.001547,0.000000,0.000000,0.0,0.0,-0.001044,...,0.003656,0.000000,0.000039,0.000000,0.0,0.000000,0.004084,0.007737,0.0,0.000000
155800,0.0,0.000000,-0.000979,0.000000,0.000927,0.000000,0.000000,0.0,0.0,0.000000,...,-0.001461,-0.000238,0.004294,0.003824,0.0,0.000000,0.000000,-0.007737,0.0,0.000000
155900,0.0,0.000645,0.002934,0.000000,-0.000618,-0.001656,0.000000,0.0,0.0,0.000000,...,0.000000,0.000238,-0.016893,0.011197,0.0,0.006192,0.000000,0.000000,0.0,0.000000


In [6]:
f_bmk = pd.DataFrame(index = y.iloc[33:-1].index, columns = y.columns)

In [7]:
f_bmk

Unnamed: 0_level_0,IFUL(t),RMD(t),NI(t),HYSQ(t),HSC(t),ACDO(t),GNLB(t),DRVR(t),BJCT(t),SP(t),...,BPRX(t),DLX(t),RRGB(t),PLUM(t),CALA(t),DHB(t),RRA(t),RMHT(t),FDTR(t),DRRX(t)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100400,,,,,,,,,,,...,,,,,,,,,,
100500,,,,,,,,,,,...,,,,,,,,,,
100600,,,,,,,,,,,...,,,,,,,,,,
100700,,,,,,,,,,,...,,,,,,,,,,
100800,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155500,,,,,,,,,,,...,,,,,,,,,,
155600,,,,,,,,,,,...,,,,,,,,,,
155700,,,,,,,,,,,...,,,,,,,,,,
155800,,,,,,,,,,,...,,,,,,,,,,


In [8]:
def AR_reg(i, t):
    """
    i: índice referente ao ativo que vamos prever
    t: variável de corte da janela de estimação (t = {0,...,356})
    """
    df = y.iloc[:,i]
    df = df[t:33+t]  # a janela de estimação dos parâmetros é de 30 segundos, mas o modelo AR(3) perde 3 graus de liberdade (3 primeiras linhas) para fazer a estimação
    bmk_model = AutoReg(df, lags=3).fit()
    return print(bmk_model.summary())

In [12]:
def AR_forecast(i, t):
    """
    i: índice referente ao ativo que vamos prever
    t: variável de corte da janela de estimação (t = {0,...,356})
    """
    index = f_bmk.iloc[t].name  # pegando o minuto (índice) da previsão 
    stock = y.iloc[:,i].name  # pegando o nome do ativo
    df = y.iloc[:,i]  # selecionando só a coluna da ação indexada em i
    df = df[t:33+t]  # a janela de estimação dos parâmetros é de 30 segundos, mas o modelo AR(3) perde 3 graus de liberdade (3 primeiras linhas) para fazer a estimação
    bmk_model = AutoReg(df, lags=3).fit()
    alpha, beta1, beta2, beta3 = bmk_model.params
    f = alpha + beta1 * df.iloc[-1] + beta2 * df.iloc[-2] + beta3 * df.iloc[-3]
    return index, stock, f

In [13]:
for i in range(250):
    for t in range(356):
        f_bmk.at[AR_forecast(i, t)[0], AR_forecast(i, t)[1]] = AR_forecast(i, t)[2]

In [14]:
f_bmk

Unnamed: 0_level_0,IFUL(t),RMD(t),NI(t),HYSQ(t),HSC(t),ACDO(t),GNLB(t),DRVR(t),BJCT(t),SP(t),...,BPRX(t),DLX(t),RRGB(t),PLUM(t),CALA(t),DHB(t),RRA(t),RMHT(t),FDTR(t),DRRX(t)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100400,0.0,-0.000219,-0.000605,-0.000391,0.000377,-0.000497,-0.000606,0.0,0.0,0.000578,...,0.000674,-0.000107,-0.000164,-0.000529,0.0,-0.000216,-0.000434,-0.000003,0.0,-0.0001
100500,0.0,-0.000424,0.000619,-0.000399,0.000039,0.00205,-0.000957,0.0,0.0,0.000125,...,0.000723,-0.000107,-0.000234,-0.000529,0.0,0.000708,-0.000434,-0.000004,0.0,-0.003073
100600,0.0,0.000089,0.00087,-0.000413,0.000154,0.000729,-0.000045,0.0,0.0,0.001666,...,0.000747,0.000007,-0.000201,-0.000529,0.0,-0.000237,-0.000074,0.000019,0.0,-0.002858
100700,0.0,-0.000009,-0.000028,-0.000428,0.000653,-0.00026,0.000254,0.0,0.0,0.001607,...,0.000742,0.000089,-0.000206,-0.000529,0.0,-0.000187,-0.000603,-0.000203,0.0,-0.000017
100800,0.0,0.000358,0.00037,-0.000428,0.001295,0.000336,0.000384,0.0,0.0,0.00055,...,0.000742,0.000038,-0.000211,-0.000529,0.0,-0.000187,-0.000474,-0.000172,0.0,-0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155500,0.0,-0.000065,0.000947,-0.000384,-0.000271,0.001153,-0.003492,-0.003124,0.0,0.000121,...,0.000038,0.000204,-0.001226,-0.001376,0.0,-0.002185,0.000443,0.00028,0.0,0.0
155600,0.0,-0.000057,0.001205,-0.000381,-0.000394,-0.0002,0.00169,0.0,0.0,0.000121,...,-0.000303,0.00001,0.005098,0.000099,0.0,0.000419,0.000134,0.000268,0.0,0.0
155700,0.0,-0.000058,0.000111,-0.000394,-0.000147,0.001502,-0.001752,0.0,0.0,0.000038,...,-0.000357,0.000121,0.000189,-0.00146,0.0,-0.000995,0.000624,0.000263,0.0,0.0
155800,0.0,-0.000049,0.000037,-0.000414,-0.000377,-0.000066,-0.000304,0.0,0.0,0.000285,...,-0.001291,0.000145,0.000977,0.000046,0.0,-0.001292,0.000645,-0.000916,0.0,0.0


In [16]:
f_bmk.to_csv('..\..\output\data\\20030102_f_bmk.csv', sep=',', encoding='utf-8')