In [None]:
import numpy as np
import pandas as pd

import lightgbm as lgb
from sklearn.decomposition import PCA

from itertools import combinations


train = pd.read_csv('/kaggle/input/optiver-trading-at-the-close/train.csv')

In [None]:
# df = train.drop(columns="target")
def feat_eng(df):

    # volume
    df["bid_volume"] = df.eval("bid_size * bid_price")
    df["ask_volume"] = df.eval("ask_size * ask_price")

    # bid ask spread
    df["bid_ask_spread"] = df["ask_price"] - df["bid_price"]

    # imbalance ratio
    df["imbalance_ratio"] = df.eval("imbalance_size / matched_size")

    df['imb_s1'] = df.eval("(bid_size - ask_size) / (bid_size + ask_size)")
    df['imb_s2'] = df.eval("(imbalance_size - matched_size) / (matched_size + imbalance_size)")

    prices = ["reference_price", "far_price", "near_price", "ask_price", "bid_price", "wap"]

    for a, b in combinations(prices, 2):
        df[f"{a}_{b}_imb"] = df.eval(f"({a} - {b}) / ({a} + {b})")

    for a, b, c in combinations(prices, 3):
        max_ = df[[a, b, c]].max(axis=1)
        min_ = df[[a, b, c]].min(axis=1)
        mid_ = df[[a, b, c]].sum(axis=1) - min_ - max_

        df[f"{a}_{b}_{c}_imb"] = (max_ - mid_) / (mid_ - min_)

    # log return
    df["log_return"] = df["wap"].apply(lambda x: np.log(x) if x is not None else x)

    features = [feat for feat in df.columns if feat not in ["row_id", "time_id", "date_id"]]

    return df[features]

In [None]:
X = feat_eng(train.drop(columns="target"))
y = train['target'].values

In [None]:
# PCA
prices = [feat for feat in train.columns if "price" in feat]
pca = PCA(n_components=1)
X["pca_prices"] = pca.fit_transform(X[prices].fillna(1))

In [None]:
model = lgb.LGBMRegressor(
    learning_rate=0.018,
    max_depth=9,
    n_estimators=700,
    num_leaves=442,
    objective="mae",
    reg_alpha=0.02,
    reg_lambda=0.01
)
model.fit(X, y)

In [None]:
# feature importance
feat_imp = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
print(feat_imp)
feat_imp.plot(kind="barh")

In [None]:
import optiver2023
env = optiver2023.make_env()
iter_test = env.iter_test()

In [None]:
counter = 0
for (test, revealed_targets, sample_prediction) in iter_test:

    feature = feat_eng(test)
    feature["pca_prices"] = pca.transform(feature[prices].fillna(1))
    sample_prediction['target'] = model.predict(feature)
    env.predict(sample_prediction)

    counter += 1