In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import pickle
import os
import polars

import xgboost as xgb
print("xgb.__version__:", xgb.__version__)

import sys
sys.path.append("/kaggle/input/mcts-artifacts")
from preproc import process_test_data
import kaggle_evaluation.mcts_inference_server

xgb.__version__: 2.0.3


***
### load artifacts

In [2]:
# Specify the path where you want to save the serialized function
# xgboost_artifacts_path = '/kaggle/input/mcts-artifacts/xgboost_predict_uni90.pkl'
xgboost_artifacts_path = '/kaggle/input/mcts-artifacts/xgboost_predict_fsv24.pkl'

# Load the function from the file
with open(xgboost_artifacts_path, 'rb') as f:
    xgboost_artifacts = pickle.load(f)

len(xgboost_artifacts["models"])

configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


15

In [3]:
class XGBoostInference:
    def __init__(
        self,
        models,
        numerical_cols,
        categorical_cols,
        encoder,
        scaler,
    ):
        """Initialize inference class with trained artifacts
        
        Args:
            models: List of trained XGBoost models
            numerical_cols: List of numerical column names
            categorical_cols: List of categorical column names
            encoder: Fitted OrdinalEncoder for categorical features
            scaler: Fitted StandardScaler for numerical features
        """
        self.models = models
        self.numerical_cols = numerical_cols
        self.categorical_cols = categorical_cols
        self.encoder = encoder
        self.scaler = scaler

        print("len(numerical_cols):", len(numerical_cols))
        print("len(categorical_cols):", len(categorical_cols))
        
    def predict_array(self, df_test):
        """Make predictions on test data
        
        Args:
            df_test: pandas DataFrame containing test features
            
        Returns:
            numpy array of predictions
        """
        # Preprocess test data
        test_processed = process_test_data(
            df_test,
            self.numerical_cols,
            self.categorical_cols,
            self.encoder,
            self.scaler,
            include_position_features=True,
            include_text_features=True,
        )
        
        # Create feature types list for XGBoost
        feature_types = [
            "c" if col in self.categorical_cols else "q" 
            for col in self.numerical_cols + self.categorical_cols
        ]
        
        # Create XGBoost DMatrix for test data
        test_dmatrix = xgb.DMatrix(
            data=test_processed[self.numerical_cols + self.categorical_cols],
            feature_types=feature_types,
            enable_categorical=True
        )
        
        # Get predictions from all models
        predictions = np.mean([
            model.predict(test_dmatrix)
            for model in self.models
        ], axis=0)
        predictions = np.clip(predictions, -1, 1)
        
        return predictions
    
    def predict(self, test: polars.DataFrame, sample_sub: polars.DataFrame):
        test_pd = test.to_pandas()
        predictions = self.predict_array(test_pd)
        submission = sample_sub.with_columns(polars.Series("utility_agent1", predictions))
        return submission


model_xgboost = XGBoostInference(
    models=xgboost_artifacts["models"],
    numerical_cols=xgboost_artifacts["numerical_cols"],
    categorical_cols=xgboost_artifacts["categorical_cols"],
    encoder=xgboost_artifacts["encoder"],
    scaler=xgboost_artifacts["scaler"],
)

len(numerical_cols): 219
len(categorical_cols): 8


In [4]:
# sanity check
test = polars.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv")
sample_sub = polars.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv")
model_xgboost.predict(test, sample_sub)

Id,utility_agent1
i64,f32
233234,0.141854
233235,-0.152971
233236,-0.0335


***
### inference

In [5]:
inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(model_xgboost.predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv',
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv'
        )
    )

***