In [1]:
import pandas as pd
import polars as pl
import numpy as np
import os, gc
from tqdm.auto import tqdm
from matplotlib import pyplot as plt
import pickle

from sklearn.metrics import r2_score
from lightgbm import LGBMRegressor
import lightgbm as lgb
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor

import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

import kaggle_evaluation.jane_street_inference_server

In [2]:
class CONFIG:
    seed = 42
    target_col = "responder_6"
    feature_cols = ["weight"] + [f"feature_{idx:02d}" for idx in range(79)]
    # ord_ftrs = ["feature_09", "feature_10", "feature_11"]

In [3]:
result_path = "result_null_nolags.pkl"

with open( result_path, "rb") as fp:
    result = pickle.load(fp)

model = result["model"]

model

In [10]:
lags_ : pl.DataFrame | None = None
    
def predict(test: pl.DataFrame, lags: pl.DataFrame | None) -> pl.DataFrame | pd.DataFrame:
    global lags_
    if lags is not None:
        lags_ = lags

    # print columns
    # print(test[CONFIG.feature_cols].columns)

    # print the columns that models are trained on
    # print(model.get_booster().feature_names)

    # Make predictions
    preds = model.predict(test[CONFIG.feature_cols].to_pandas())
    print(f"predict> preds.shape =", preds.shape)
    
    predictions = \
    test.select('row_id').\
    with_columns(
        pl.Series(
            name   = 'responder_6', 
            values = np.clip(preds, a_min = -5, a_max = 5),
            dtype  = pl.Float64,
        )
    )

    # print(predictions)

    # The predict function must return a DataFrame
    assert isinstance(predictions, pl.DataFrame | pd.DataFrame)
    # with columns 'row_id', 'responer_6'
    assert list(predictions.columns) == ['row_id', 'responder_6']
    # and as many rows as the test data.
    assert len(predictions) == len(test)

    return predictions

In [11]:
inference_server = kaggle_evaluation.jane_street_inference_server.JSInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    print("Hi")
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            './dataset/test.parquet',
            './dataset/lags.parquet',
        )
    )

predict> preds.shape = (39,)
shape: (39, 2)
┌────────┬─────────────┐
│ row_id ┆ responder_6 │
│ ---    ┆ ---         │
│ i64    ┆ f64         │
╞════════╪═════════════╡
│ 0      ┆ -0.004085   │
│ 1      ┆ -0.003021   │
│ 2      ┆ -0.004085   │
│ 3      ┆ -0.004085   │
│ 4      ┆ -0.003021   │
│ …      ┆ …           │
│ 34     ┆ -0.004085   │
│ 35     ┆ -0.003021   │
│ 36     ┆ -0.02121    │
│ 37     ┆ -0.003021   │
│ 38     ┆ -0.004085   │
└────────┴─────────────┘
