In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import pickle

import catboost as cb
print("cb.__version__:", cb.__version__)

import sys
sys.path.append("/kaggle/input/mcts-artifacts")
from preproc import process_test_data

from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import OrdinalEncoder, StandardScaler

cb.__version__: 1.2.7


***
### load artifacts


In [2]:
# Specify the path where you want to save the serialized function
catboost_artifacts_path = '/kaggle/input/mcts-artifacts/catboost_predict_fs24.pkl'

# Load the function from the file
with open(catboost_artifacts_path, 'rb') as f:
    catboost_artifacts = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [3]:
len(catboost_artifacts["models"])

15

In [4]:
class ModelInference:
    def __init__(self, models, numerical_cols, categorical_cols, encoder, scaler, text_cols=None):
        """Initialize inference class with trained artifacts
        
        Args:
            models: List of trained CatBoost models
            numerical_cols: List of numerical column names
            categorical_cols: List of categorical column names
            encoder: Fitted OrdinalEncoder for categorical features
            scaler: Fitted StandardScaler for numerical features (optional)
            text_cols: List of text columns (optional)
        """
        self.models = models
        self.numerical_cols = numerical_cols
        self.categorical_cols = categorical_cols
        self.text_cols = text_cols if text_cols is not None else []
        self.encoder = encoder
        self.scaler = scaler

        print("len(numerical_cols):", len(numerical_cols))
        print("len(categorical_cols):", len(categorical_cols))
        
    def predict(self, df_test):
        """Make predictions on test data
        
        Args:
            df_test: pandas DataFrame containing test features
            
        Returns:
            numpy array of predictions
        """
        # Preprocess test data
        test_processed = process_test_data(
            df_test,
            self.numerical_cols,
            self.categorical_cols,
            self.encoder,
            self.scaler,
            include_position_features=True,
            include_text_features=True,
        )
        
        # Create CatBoost Pool for test data
        features = self.numerical_cols + self.categorical_cols
        pool_params = {
            'data': test_processed[features],
            'cat_features': self.categorical_cols,
        }
        
        if self.text_cols:
            features += self.text_cols
            pool_params['data'] = test_processed[features]
            pool_params['text_features'] = self.text_cols
            
        test_pool = cb.Pool(**pool_params)
        
        # Get predictions from all models
        predictions = np.mean([
            model.predict(test_pool)
            for model in self.models
        ], axis=0)
        predictions = np.clip(predictions, -1, 1)
        
        return predictions

# model_catboost = ModelInference(text_cols=["LudRules",], **catboost_artifacts)

model_catboost = ModelInference(
    models=catboost_artifacts["models"][:5],
    # models=catboost_artifacts["models"][5:10],
    # models=catboost_artifacts["models"][10:],
    # models=catboost_artifacts["models"],
    numerical_cols=catboost_artifacts["numerical_cols"],
    categorical_cols=catboost_artifacts["categorical_cols"],
    # text_cols=["LudRules"],
    encoder=catboost_artifacts["encoder"],
    scaler=catboost_artifacts["scaler"],
)

len(numerical_cols): 219
len(categorical_cols): 8


In [5]:
# sanity check
test = pd.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv")
model_catboost.predict(test)

array([ 0.09798549, -0.13745244, -0.03180037])

***
### inference

In [6]:
import os
import polars as pl
import kaggle_evaluation.mcts_inference_server

In [7]:
def predict(test: pl.DataFrame, sample_sub: pl.DataFrame):
    test_pd = test.to_pandas()
    predictions = model_catboost.predict(test_pd)
    submission = sample_sub.with_columns(pl.Series("utility_agent1", predictions))
    return submission

In [8]:
inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv',
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv'
        )
    )

***