## Benchmark Forecast


**Input**: 
* YYYYMMDD_y.csv - retornos de 250 ações escolhidas aleatóriamente.

**Output**: 
* YYYYMMDD_f_bmk.csv - previsões de retornos 250 das ações escolhidas aleatóriamente.


Esse notebook tem como objetivo fazer previsões de retornos do ativo usando um modelo Benchmark. O modelo benchmark escolhido será um AR(3), cujo especificação segue abaixo.

$$
f_{n,t}^{Bmk} \stackrel{\text { def }}{=} \hat{\alpha}_n + \hat{\beta}_{n, 1} \cdot r_{n,t-1} + \hat{\beta}_{n, 2} \cdot r_{n,t-2} + \hat{\beta}_{n, 3} \cdot r_{n,t-3}
$$

In [1]:
# pacotes
import numpy as np
import pandas as pd
import datetime as dt
from statsmodels.tsa.ar_model import AutoReg

In [2]:
# oculta mensagens de avisos
import warnings
warnings.filterwarnings("ignore")

In [3]:
y = pd.read_csv('../../output/data/20030102_y.csv', index_col=0)

In [4]:
y

Unnamed: 0_level_0,FITB(t),AGN(t),ZBRA(t),ADBE(t),CKFR(t),MEDI(t),TXT(t),CMCSA(t),HON(t),SCH(t),...,CVX(t),JPM(t),DISH(t),CHS(t),CSCO(t),FRX(t),OSIP(t),SAFC(t),YUM(t),AT(t)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
93100,0.004079,0.000000,0.000000,0.007968,0.000000,0.006979,0.000000,0.016088,0.000000,0.001828,...,0.000000,0.000000,0.000892,0.005222,0.003123,-0.000101,-0.003065,-0.001723,0.000000,0.000000
93200,0.001525,0.000000,-0.001215,-0.000794,-0.007651,0.002011,-0.003466,-0.001681,0.000000,0.009996,...,0.000000,0.000000,0.001782,-0.002608,0.004552,-0.000607,-0.006776,0.000575,0.000000,0.000000
93300,0.003381,0.000000,0.004832,0.004754,-0.007091,0.005283,0.001157,0.006291,0.000000,0.000904,...,0.000298,0.000000,0.000890,0.001044,0.005962,-0.000608,-0.001237,0.000000,0.000000,0.006245
93400,-0.000338,0.000000,0.000000,0.003945,0.013400,0.000182,0.000000,0.006667,0.000000,0.002706,...,0.000000,0.000000,0.003551,0.001564,0.000376,-0.000912,0.000000,0.000000,0.002452,-0.002142
93500,-0.000507,0.000000,-0.000156,-0.000394,-0.013338,-0.002364,0.002309,-0.006249,0.000000,0.003597,...,-0.001042,0.000000,0.001771,0.000521,-0.002636,0.000203,-0.003409,0.002725,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155600,-0.000995,0.000000,0.000034,-0.003106,0.008088,0.001078,-0.000111,0.000000,-0.000801,0.001748,...,0.000148,0.002761,0.002145,-0.000494,0.000661,0.000100,0.000535,0.000000,0.000000,0.000941
155700,-0.000664,0.000341,-0.000377,0.002735,-0.006349,-0.000359,-0.000111,0.000404,-0.001604,0.000000,...,-0.000148,-0.000394,0.000000,0.000000,-0.001432,-0.000050,-0.000535,0.000000,0.000399,-0.000565
155800,0.001078,-0.001363,-0.001888,-0.001562,0.000579,0.000000,-0.000222,0.000000,0.002005,0.000000,...,-0.000295,0.000394,-0.000429,0.000000,-0.000404,-0.000050,0.000000,0.000422,-0.000399,-0.001696
155900,0.000249,0.001363,-0.000155,-0.000782,0.000000,0.000000,-0.000667,-0.000809,-0.000802,0.002617,...,0.000148,0.000787,-0.001716,0.000494,0.000772,-0.000100,0.001130,-0.000141,0.000399,0.001508


In [5]:
f_bmk = pd.DataFrame(index = y.iloc[33:-1].index, columns = y.columns)

In [6]:
f_bmk

Unnamed: 0_level_0,FITB(t),AGN(t),ZBRA(t),ADBE(t),CKFR(t),MEDI(t),TXT(t),CMCSA(t),HON(t),SCH(t),...,CVX(t),JPM(t),DISH(t),CHS(t),CSCO(t),FRX(t),OSIP(t),SAFC(t),YUM(t),AT(t)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100400,,,,,,,,,,,...,,,,,,,,,,
100500,,,,,,,,,,,...,,,,,,,,,,
100600,,,,,,,,,,,...,,,,,,,,,,
100700,,,,,,,,,,,...,,,,,,,,,,
100800,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155500,,,,,,,,,,,...,,,,,,,,,,
155600,,,,,,,,,,,...,,,,,,,,,,
155700,,,,,,,,,,,...,,,,,,,,,,
155800,,,,,,,,,,,...,,,,,,,,,,


In [7]:
def AR_reg(i, t):
    """
    i: índice referente ao ativo que vamos prever
    t: variável de corte da janela de estimação (t = {0,...,356})
    """
    df = y.iloc[:,i]
    df = df[t:33+t]  # a janela de estimação dos parâmetros é de 30 segundos, mas o modelo AR(3) perde 3 graus de liberdade (3 primeiras linhas) para fazer a estimação
    bmk_model = AutoReg(df, lags=3).fit()
    return print(bmk_model.summary())

In [8]:
def AR_forecast(i, t):
    """
    i: índice referente ao ativo que vamos prever
    t: variável de corte da janela de estimação (t = {0,...,356})
    """

    """
    primeiro guardaremos duas variáveis importantes: minuto da previsão e nome da ação
    """
    index = f_bmk.iloc[t].name
    stock = y.iloc[:,i].name 

    """
    depois selcionamos somente a coluna da ação indexada em i 
    e fazemos o slice apenas da janela de estimação 
    obs: a janela é de 30 segundos, o número 33 é porque o modelo AR(3) perde três graus de liberdade na estimação
    """
    df = y.iloc[:,i] 
    df = df[t:33+t] 

    """
    bmk_model é o modelo AR(3)
    """
    bmk_model = AutoReg(df, lags=3).fit()

    """
    por fim, pegamos os parâmetros do modelo e guardamos a previsão com base nesses parâmetros em f.
    """
    alpha, beta1, beta2, beta3 = bmk_model.params
    f = alpha + beta1 * df.iloc[-1] + beta2 * df.iloc[-2] + beta3 * df.iloc[-3]

    """
    essa função retorna uma tupla com 3 elementos: índice (minuto), nome da ação, previsão
    """
    return index, stock, f

In [9]:
for i in range(250):
    for t in range(356):
        tupla = AR_forecast(i, t)
        f_bmk.at[tupla[0], tupla[1]] = tupla[2]

In [10]:
f_bmk

Unnamed: 0_level_0,FITB(t),AGN(t),ZBRA(t),ADBE(t),CKFR(t),MEDI(t),TXT(t),CMCSA(t),HON(t),SCH(t),...,CVX(t),JPM(t),DISH(t),CHS(t),CSCO(t),FRX(t),OSIP(t),SAFC(t),YUM(t),AT(t)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100400,0.001534,0.000429,-0.000868,-0.000201,-0.002526,-0.000648,0.000343,0.003303,-0.000985,0.001097,...,-0.001029,0.0,0.001437,-0.00026,-0.000073,-0.000059,0.000184,-0.000507,0.001751,-0.000169
100500,0.000733,0.000114,-0.00082,-0.000355,0.000151,0.001281,0.001685,-0.000365,0.000454,0.000613,...,-0.000307,0.0,0.001178,-0.000509,-0.000037,0.000009,0.000059,-0.000854,-0.00118,0.003083
100600,0.000815,-0.000041,-0.000844,-0.004314,-0.001357,0.000397,0.000226,0.001212,0.000203,0.00101,...,0.000043,0.0,0.000663,0.0012,0.000415,-0.000141,0.000581,-0.001029,0.001674,0.001007
100700,-0.000151,-0.000049,-0.000487,0.002395,-0.001305,-0.000329,0.000415,0.000036,-0.000669,0.00198,...,-0.001046,0.0,0.000113,0.00113,-0.00041,0.000284,0.000447,0.000103,-0.000751,0.000767
100800,-0.000412,-0.000109,0.000642,-0.00032,0.00103,-0.000066,0.000173,0.000171,0.00019,0.000025,...,-0.000058,0.0,-0.000132,-0.001274,0.000654,0.000342,-0.00002,-0.000341,0.0001,-0.00025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155500,0.000078,0.00007,0.002241,0.000041,-0.000789,-0.000058,-0.00021,0.000489,0.000148,0.000028,...,-0.000006,0.000136,-0.000538,0.00078,-0.000127,0.000107,0.00015,0.000006,-0.000192,-0.000198
155600,-0.000303,-0.000143,0.000359,-0.000163,0.000101,-0.000168,-0.000218,-0.000251,0.000266,-0.000515,...,-0.000076,0.000398,-0.000578,0.000042,0.000224,0.00021,-0.000398,0.000416,-0.000104,0.000433
155700,0.000397,-0.000201,0.000818,0.000502,-0.004281,-0.000052,-0.000062,-0.000667,0.000229,-0.000712,...,0.000027,-0.000762,-0.000715,0.00038,-0.000446,-0.000081,-0.00084,-0.000168,-0.000362,0.000361
155800,0.000583,0.000107,0.000621,-0.000699,0.000894,-0.000057,0.000105,-0.000831,-0.000224,-0.000165,...,-0.000086,-0.001062,-0.00031,0.000111,-0.000121,0.00006,-0.000404,-0.000033,-0.000138,0.000574


In [11]:
f_bmk.to_csv('..\..\output\data\\20030102_f_bmk.csv', sep=',', encoding='utf-8')