In [None]:
import pickle
import random

import numpy as np
import numpy.matlib
import pandas as pd
from scipy import optimize
from tqdm import tqdm
from joblib import Parallel, delayed


In [None]:
data_dir = '../input/optiver-realized-volatility-prediction/'

def calc_wap1(df):
    wap = (df['bid_price1'] * df['ask_size1'] + df['ask_price1'] * df['bid_size1']) / (df['bid_size1'] + df['ask_size1'])
    return wap

def log_return(series):
    return np.log(series).diff()

def n_return(series):
    return (series - series.shift()) / series.shift()

def read_train_test():
    train = pd.read_csv('../input/optiver-realized-volatility-prediction/train.csv')
    test = pd.read_csv('../input/optiver-realized-volatility-prediction/test.csv')
    train['row_id'] = train['stock_id'].astype(str) + '-' + train['time_id'].astype(str)
    test['row_id'] = test['stock_id'].astype(str) + '-' + test['time_id'].astype(str)
    print(f'Our training set has {train.shape[0]} rows')
    return train, test

def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.DEFAULT_PROTOCOL)


def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)


In [None]:
def garch_forward(return_rate, variance, coefficients):
    alpha, beta, omega = coefficients
    return omega + alpha * return_rate * return_rate + beta * variance

def garch_for_optimization(array, series, variance):
    sum_probability = 0
    for i in range(1, series.shape[0]): 
        return_rate = series[i]
        if variance <= 0:
            print("Negative variance")
            break
        probability = -np.log(variance) - return_rate * return_rate / variance
        sum_probability += probability
        variance = garch_forward(return_rate, variance, array)
    return -sum_probability

In [None]:
train, test = read_train_test()
train_stock_ids = train['stock_id'].unique()

dic_l_alpha, dic_l_beta, dic_l_omega = {},{},{}

In [None]:
def for_joblib(id):
    file_path_book = data_dir + "book_train.parquet/stock_id=" + str(id)
    df = pd.read_parquet(file_path_book)
    df['wap1'] = calc_wap1(df)
    name_return = 'n_return1'
    df[name_return] = df.groupby(['time_id'])['wap1'].apply(n_return)
    variance = numpy.var(df[name_return])
    l_alpha, l_beta, l_omega = [],[],[]
    for ti in tqdm(random.sample(set(df.time_id.unique()), 50)):
        bounds = optimize.Bounds([0, 0, 0], [1, 1, np.inf])
        initial_guess = [0.14, 0.76, 2.97]  # V-Lab's estimate
        serie =df[df['time_id']==ti][name_return].reset_index(drop=True).copy()
        optimize_res_trust = optimize.minimize(garch_for_optimization, initial_guess, args=(serie,variance), method='trust-constr', bounds=bounds)
        l_alpha.append( optimize_res_trust.x[0])
        l_beta.append(optimize_res_trust.x[1])
        l_omega.append(optimize_res_trust.x[2])
    return sum(l_alpha) / len(l_alpha), sum(l_beta) / len(l_beta), sum(l_omega) / len(l_omega), id

In [None]:
l = Parallel(n_jobs=-1, verbose=1)(delayed(for_joblib)(id) for id in tqdm(train_stock_ids))
for i in l :
    id = int(i[3])
    dic_l_alpha[id] = i[0]
    dic_l_beta[id] = i[1]
    dic_l_omega[id] = i[2]

In [None]:
save_obj(dic_l_alpha, 'dic_l_alpha')
save_obj(dic_l_beta, 'dic_l_beta')
save_obj(dic_l_omega, 'dic_l_omega')

print('dic_l_alpha')
print(dic_l_alpha)
print('dic_l_beta')
print(dic_l_beta)
print('dic_l_omega')
print(dic_l_omega)