# Trading at the Close - Inference
-----------------------
-----------------------

## Imports
-----------

In [1]:
import sys
import numpy as np
from pathlib import Path
import pandas as pd

In [2]:
utils_path = Path("/", "kaggle", "input", "optiver-inference-utils")
if str(utils_path) not in sys.path:
    sys.path = [str(utils_path),] + sys.path
    
print(sys.path)

['/kaggle/input/optiver-inference-utils', '/kaggle/working', '/kaggle/lib/kagglegym', '/kaggle/lib', '/kaggle/input/optiver-trading-at-the-close', '/opt/conda/lib/python310.zip', '/opt/conda/lib/python3.10', '/opt/conda/lib/python3.10/lib-dynload', '', '/root/.local/lib/python3.10/site-packages', '/opt/conda/lib/python3.10/site-packages', '/root/src/BigQuery_Helper']


In [3]:
class CFG:
    LOCAL = True
    JOBS_PATH = Path("/", "kaggle", "input", "optiver-trained-artifacts", "job_artifacts")
    FEATURES_PATH = JOBS_PATH.joinpath("optiver-feature_selection-0002")
    FEATURES_NAME = "feature_names.json"
    MODEL_PATH = JOBS_PATH.joinpath("optiver-tuning_lgbmregressor-0008")

## Inference
--------------

In [4]:
def zero_sum(prices, volumes):
    std_error = np.sqrt(volumes)
    step = np.sum(prices)/np.sum(std_error)
    out = prices - std_error * step

    return out

In [5]:
import optiver2023

env = optiver2023.make_env()
iter_test = env.iter_test()

In [6]:
def load_model(
    model_type,
    booster_file
):
    model = model_type(model_file=str(booster_file.with_suffix(".txt")))
    
    return model

In [7]:
from utils.files import read_json
from utils.features import make_features, select_features
from lightgbm import Booster

selected_features = read_json(CFG.FEATURES_PATH.joinpath(CFG.FEATURES_NAME))["selected_features"]
counter = 0
predictions = []

models_boosters = CFG.MODEL_PATH.glob("**/*.txt")
models = [load_model(Booster, path) for path in models_boosters]


for (test, revealed_targets, sample_prediction) in iter_test:
    feat = test.copy()
    make_features(feat, reduce_memory=True)
    select_features(feat, selected_features)

    # Mean ensemble
    prediction = 0
    for model in models:
        prediction += model.predict(feat)   
    prediction /= len(models)

    prediction = zero_sum(prediction, test.loc[:, "bid_size"] + test.loc[:, "ask_size"])
    sample_prediction["target"] = prediction
    env.predict(sample_prediction)
    counter += 1

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
