In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import pickle
import os
import polars

import catboost as cb
print("cb.__version__:", cb.__version__)

import sys
sys.path.append("/kaggle/input/mcts-artifacts")
from preproc import process_test_data
import kaggle_evaluation.mcts_inference_server

cb.__version__: 1.2.7


***
### load artifacts


In [2]:
# Specify the path where you want to save the serialized function
# catboost_artifacts_path = '/kaggle/input/mcts-artifacts/catboost_predict_fs24.pkl'
# catboost_artifacts_path = '/kaggle/input/mcts-artifacts/catboost_predict_full.pkl'
# catboost_artifacts_path = '/kaggle/input/mcts-artifacts/catboost_predict_fsv2.pkl'
# catboost_artifacts_path = '/kaggle/input/mcts-artifacts/catboost_predict_fsv23.pkl'
# catboost_artifacts_path = '/kaggle/input/mcts-artifacts/catboost_predict_fsv34.pkl'
# catboost_artifacts_path = '/kaggle/input/mcts-artifacts/catboost_text_predict_uni90.pkl'
# catboost_artifacts_path = '//kaggle/input/mcts-artifacts/catboost_predict_int95.pkl'

# catboost_artifacts_path = '/kaggle/input/mcts-artifacts/catboost_predict_int97.pkl'
# catboost_artifacts_path = '/kaggle/input/mcts-artifacts/catboost_predict_int99.pkl'
catboost_artifacts_path = '/kaggle/input/mcts-artifacts/catboost_predict_uni80.pkl'






# Load the function from the file
with open(catboost_artifacts_path, 'rb') as f:
    catboost_artifacts = pickle.load(f)

len(catboost_artifacts["models"])

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


15

In [3]:
class CATBoostInference:
    def __init__(
        self,
        models,
        numerical_cols,
        categorical_cols,
        encoder,
        scaler,
        text_cols=None,
    ):
        """Initialize inference class with trained artifacts
        
        Args:
            models: List of trained CatBoost models
            numerical_cols: List of numerical column names
            categorical_cols: List of categorical column names
            encoder: Fitted OrdinalEncoder for categorical features
            scaler: Fitted StandardScaler for numerical features (optional)
            text_cols: List of text columns (optional)
        """
        self.models = models
        self.numerical_cols = numerical_cols
        self.categorical_cols = categorical_cols
        self.text_cols = text_cols
        self.encoder = encoder
        self.scaler = scaler

        print("len(numerical_cols):", len(numerical_cols))
        print("len(categorical_cols):", len(categorical_cols))
        
    def predict_array(self, df_test):
        """Make predictions on test data
        
        Args:
            df_test: pandas DataFrame containing test features
            
        Returns:
            numpy array of predictions
        """
        # Preprocess test data
        test_processed = process_test_data(
            df_test,
            self.numerical_cols,
            self.categorical_cols,
            self.encoder,
            self.scaler,
            include_position_features=True,
            include_text_features=True,
        )
        
        # Create CatBoost Pool for test data
        features = self.numerical_cols + self.categorical_cols
        pool_kwargs = {
            'data': test_processed[features],
            'cat_features': self.categorical_cols,
        }
        
        if self.text_cols is not None:
            features += self.text_cols
            pool_kwargs['data'] = test_processed[features]
            pool_kwargs['text_features'] = self.text_cols
            
        test_pool = cb.Pool(**pool_kwargs)
        
        # Get predictions from all models
        predictions = np.mean([
            model.predict(test_pool)
            for model in self.models
        ], axis=0)
        predictions = np.clip(predictions, -1, 1)
        
        return predictions
    

    def predict(self, test: polars.DataFrame, sample_sub: polars.DataFrame):
        test_pd = test.to_pandas()
        predictions = self.predict_array(test_pd)
        submission = sample_sub.with_columns(polars.Series("utility_agent1", predictions))
        return submission


model_catboost = CATBoostInference(
    # models=catboost_artifacts["models"][:5],
    # models=catboost_artifacts["models"][5:10],
    # models=catboost_artifacts["models"][10:],
    models=catboost_artifacts["models"],
    numerical_cols=catboost_artifacts["numerical_cols"],
    categorical_cols=catboost_artifacts["categorical_cols"],
    text_cols=catboost_artifacts["text_cols"],
    encoder=catboost_artifacts["encoder"],
    scaler=catboost_artifacts["scaler"],
)

len(numerical_cols): 287
len(categorical_cols): 8


In [4]:
# sanity check
test = polars.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv")
sample_sub = polars.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv")
model_catboost.predict(test, sample_sub)

Id,utility_agent1
i64,f64
233234,0.127013
233235,-0.15161
233236,-0.044114


***
### inference

In [5]:
inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(model_catboost.predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv',
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv'
        )
    )

***