In [1]:
pip install -qq /kaggle/input/wheels/lightning-2.4.0-py3-none-any.whl

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
from typing import Optional, List
import polars
import os

import torch 
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset
from torch.optim.lr_scheduler import OneCycleLR

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint

print(f"PyTorch version: {torch.__version__}")
print(f"PyTorch Lightning version: {pl.__version__}")

import sys
sys.path.append("/kaggle/input/mcts-artifacts")
from preproc import process_test_data
import kaggle_evaluation.mcts_inference_server

PyTorch version: 2.4.0+cpu
PyTorch Lightning version: 2.4.0


***
### load artifacts


In [3]:
# Specify the path where you want to save the serialized function
# nn_1dcnn_artifacts_path = '/kaggle/input/mcts-artifacts/nn-1dcnn_predict_uni95.pt'

# nn_1dcnn_artifacts_path = '/kaggle/input/mcts-artifacts/nn-1dcnn_predict_fsv24.pt'
nn_1dcnn_artifacts_path = '/kaggle/input/mcts-artifacts/nn-1dcnn_predict_full.pt'
# nn_1dcnn_artifacts_path = '/kaggle/input/mcts-artifacts/nn-1dcnn_predict_uni80.pt'
# nn_1dcnn_artifacts_path = '/kaggle/input/mcts-artifacts/nn-1dcnn_predict_uni90.pt'

# Load the function from the file
nn_1dcnn_artifacts = torch.load(nn_1dcnn_artifacts_path, weights_only=False)

len(nn_1dcnn_artifacts['models'])

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


15

In [4]:
class SoftOrdering1DCNN(pl.LightningModule):

    def __init__(self, 
            num_input_dim: int,
            cat_input_dims: list[int],
            output_dim: int,
            sign_size: int = 32,
            cha_input: int = 16, 
            cha_hidden: int = 32,
            K: int = 2,
            dropout_input: float = 0.2,
            dropout_hidden: float = 0.2, 
            dropout_output: float = 0.2,
            embedding_dropout: float = 0.2,
            learning_rate: float = 1e-3,
            weight_decay: float = 1e-5,
            embedding_dim: Optional[List[int]] = None,
            pct_start: float = 0.2,
            div_factor: float = 10.0,
            final_div_factor: float = 1e4):
        super().__init__()
        self.save_hyperparameters()

        # Initialize embedding dimensions if not provided
        if embedding_dim is None:
            embedding_dim = [min(50, int(1 + np.ceil(np.sqrt(dim)))) for dim in cat_input_dims]
        elif len(embedding_dim) != len(cat_input_dims):
            raise ValueError("Length of embedding_dim must match number of categorical features.")
        
        self.embedding_dim = embedding_dim
        self.embedding_dropout = embedding_dropout
        
        # Create embedding layers
        self.embeddings = nn.ModuleList(
            [nn.Embedding(dim, emb_dim) for dim, emb_dim in zip(cat_input_dims, embedding_dim)]
        )
        self.embedding_dropout_layer = nn.Dropout(self.embedding_dropout)

        # Calculate total input dimension after embeddings
        total_embedding_dim = sum(self.embedding_dim)
        total_input_dim = num_input_dim + total_embedding_dim

        # CNN architecture parameters
        hidden_size = sign_size * cha_input
        self.sign_size1 = sign_size
        self.sign_size2 = sign_size//2
        self.output_size = (sign_size//4) * cha_hidden
        self.cha_input = cha_input
        self.cha_hidden = cha_hidden
        self.K = K

        # Input projection
        self.batch_norm1 = nn.BatchNorm1d(total_input_dim)
        self.dropout1 = nn.Dropout(dropout_input)
        dense1 = nn.Linear(total_input_dim, hidden_size, bias=False)
        self.dense1 = nn.utils.weight_norm(dense1)

        # 1st conv layer
        self.batch_norm_c1 = nn.BatchNorm1d(cha_input)
        conv1 = nn.Conv1d(
            cha_input, 
            cha_input*K, 
            kernel_size=5, 
            stride=1, 
            padding=2,  
            groups=cha_input, 
            bias=False)
        self.conv1 = nn.utils.weight_norm(conv1, dim=None)
        self.ave_po_c1 = nn.AdaptiveAvgPool1d(output_size=self.sign_size2)

        # 2nd conv layer
        self.batch_norm_c2 = nn.BatchNorm1d(cha_input*K)
        self.dropout_c2 = nn.Dropout(dropout_hidden)
        conv2 = nn.Conv1d(
            cha_input*K, 
            cha_hidden, 
            kernel_size=3, 
            stride=1, 
            padding=1, 
            bias=False)
        self.conv2 = nn.utils.weight_norm(conv2, dim=None)

        # 3rd conv layer
        self.batch_norm_c3 = nn.BatchNorm1d(cha_hidden)
        self.dropout_c3 = nn.Dropout(dropout_hidden)
        conv3 = nn.Conv1d(
            cha_hidden, 
            cha_hidden, 
            kernel_size=3, 
            stride=1, 
            padding=1, 
            bias=False)
        self.conv3 = nn.utils.weight_norm(conv3, dim=None)

        # 4th conv layer
        self.batch_norm_c4 = nn.BatchNorm1d(cha_hidden)
        conv4 = nn.Conv1d(
            cha_hidden, 
            cha_hidden, 
            kernel_size=5, 
            stride=1, 
            padding=2, 
            groups=cha_hidden, 
            bias=False)
        self.conv4 = nn.utils.weight_norm(conv4, dim=None)

        self.avg_po_c4 = nn.AvgPool1d(kernel_size=4, stride=2, padding=1)
        self.flt = nn.Flatten()

        # Output head
        self.batch_norm2 = nn.BatchNorm1d(self.output_size)
        self.dropout2 = nn.Dropout(dropout_output)
        dense2 = nn.Linear(self.output_size, output_dim, bias=False)
        self.dense2 = nn.utils.weight_norm(dense2)

        # Training parameters
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.pct_start = pct_start
        self.div_factor = div_factor
        self.final_div_factor = final_div_factor

        # Initialize lists to store validation outputs
        self.validation_targets = []
        self.validation_predictions = []

    def forward(self, x_num, x_cat):
        # Process categorical variables
        embedded = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        embedded = torch.cat(embedded, dim=1)
        embedded = self.embedding_dropout_layer(embedded)
        
        # Concatenate numerical and embedded categorical features
        x = torch.cat([x_num, embedded], dim=1)

        # Input projection
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = nn.functional.celu(self.dense1(x))

        # Reshape for CNN
        x = x.reshape(x.shape[0], self.cha_input, self.sign_size1)

        # CNN backbone
        x = self.batch_norm_c1(x)
        x = nn.functional.leaky_relu(self.conv1(x))
        x = self.ave_po_c1(x)

        x = self.batch_norm_c2(x)
        x = self.dropout_c2(x)
        x = nn.functional.leaky_relu(self.conv2(x))
        x_s = x

        x = self.batch_norm_c3(x)
        x = self.dropout_c3(x)
        x = nn.functional.leaky_relu(self.conv3(x))

        x = self.batch_norm_c4(x)
        x = self.conv4(x)
        x = x + x_s
        x = nn.functional.leaky_relu(x)

        x = self.avg_po_c4(x)
        x = self.flt(x)

        # Output head
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = self.dense2(x)
        x = nn.functional.hardtanh(x)

        return x.squeeze(-1)

    def training_step(self, batch, batch_idx):
        x_num, x_cat, y = batch
        y_hat = self(x_num, x_cat)
        loss = F.mse_loss(y_hat, y)
        self.log('train_loss', loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x_num, x_cat, y = batch
        y_hat = self(x_num, x_cat)
        loss = F.mse_loss(y_hat, y)
        self.log('valid_loss', loss, prog_bar=True)
        # Store targets and predictions for later use
        self.validation_targets.append(y)
        self.validation_predictions.append(y_hat)
        return loss
    
    def predict_step(self, batch, batch_idx):
        if len(batch) == 2:
            x_num, x_cat = batch
        elif len(batch) == 3:
            x_num, x_cat, _ = batch
        y_hat = self(x_num, x_cat)
        return y_hat

    def on_validation_epoch_end(self):
        # Concatenate all targets and predictions
        y = torch.cat(self.validation_targets)
        y_hat = torch.cat(self.validation_predictions)
        rmse = torch.sqrt(F.mse_loss(y_hat, y))
        self.log('val_rmse', rmse, prog_bar=True)
        # Clear the lists for next epoch
        self.validation_targets.clear()
        self.validation_predictions.clear()
                
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(), 
            lr=self.learning_rate, 
            weight_decay=self.weight_decay,
        )
        scheduler = OneCycleLR(
            optimizer,
            max_lr=self.learning_rate,
            total_steps=self.trainer.estimated_stepping_batches,
            pct_start=self.pct_start,
            div_factor=self.div_factor,
            final_div_factor=self.final_div_factor,
            anneal_strategy='cos',
            cycle_momentum=True,
            base_momentum=0.85,
            max_momentum=0.95,
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "interval": "step",
            },
        }

In [5]:
class SoftOrdering1DCNNInference:
    def __init__(
        self,
        models_state_dicts,
        models_hparams,
        numerical_cols,
        categorical_cols,
        encoder,
        scaler,
        lgbm_encoders,
    ):
        """Initialize inference class with trained artifacts
        
        Args:
            models_state_dicts: List of model state dictionaries
            models_hparams: List of model hyperparameters
            numerical_cols: List of numerical column names
            categorical_cols: List of categorical column names
            encoder: Fitted OrdinalEncoder for categorical features
            scaler: Fitted StandardScaler for numerical features
            lgbm_encoders: List of LightGBM encoders for feature engineering
        """
        self.numerical_cols = numerical_cols
        self.categorical_cols = categorical_cols
        self.encoder = encoder
        self.scaler = scaler
        self.lgbm_encoders = lgbm_encoders

        # Load models
        self.models = []
        for state_dict, hparams in zip(models_state_dicts, models_hparams):
            model = SoftOrdering1DCNN(**hparams)
            model.load_state_dict(state_dict)
            model.eval()  # Set to evaluation mode
            self.models.append(model)

        print("len(numerical_cols):", len(numerical_cols))
        print("len(categorical_cols):", len(categorical_cols))

    def predict_array(self, df_test, batch_size=512):
        """Make predictions on test data using DataLoader
        
        Args:
            df_test: pandas DataFrame containing test features
            batch_size: size of batches for inference
            
        Returns:
            numpy array of predictions
        """
        # Preprocess test data
        test_processed = process_test_data(
            df_test,
            self.numerical_cols,
            self.categorical_cols,
            self.encoder,
            self.scaler,
            include_position_features=False,
            include_text_features=False,
        )

        # Initialize predictions array
        predictions = np.zeros(len(df_test))

        # Get predictions from all models
        for lgbm_encoder, model in zip(self.lgbm_encoders, self.models):
            # Prepare numerical and categorical features
            X_test_num = test_processed[self.numerical_cols].copy()
            X_test_cat = test_processed[self.categorical_cols].copy()

            # Add LGBM encoder leaves features
            lgbm_features = lgbm_encoder.transform(
                test_processed[self.numerical_cols + self.categorical_cols]
            )
            X_test_cat = pd.concat([X_test_cat, lgbm_features], axis=1)
            _categorical_cols = self.categorical_cols + lgbm_encoder.new_columns

            # Create tensors
            X_num_tensor = torch.tensor(
                X_test_num[self.numerical_cols].values, 
                dtype=torch.float32
            )
            X_cat_tensor = torch.tensor(
                X_test_cat[_categorical_cols].values, 
                dtype=torch.int32
            )
            
            # Create TensorDataset and DataLoader
            dataset = torch.utils.data.TensorDataset(
                X_num_tensor, 
                X_cat_tensor
            )
            dataloader = torch.utils.data.DataLoader(
                dataset, 
                batch_size=batch_size,
                shuffle=False
            )
            
            # Process batches using DataLoader
            batch_predictions = []
            with torch.no_grad():
                for X_num_batch, X_cat_batch in dataloader:
                    pred_batch = model(X_num_batch, X_cat_batch).cpu()
                    batch_predictions.append(pred_batch)

            # Concatenate all batch predictions
            model_predictions = torch.cat(batch_predictions).numpy().flatten()
            predictions += model_predictions

        # Average predictions across models
        predictions /= len(self.models)
        return predictions

    def predict(self, test: polars.DataFrame, sample_sub: polars.DataFrame):
        test_pd = test.to_pandas()
        predictions = self.predict_array(test_pd)
        submission = sample_sub.with_columns(polars.Series("utility_agent1", predictions))
        return submission


# Create inference class
model_1dcnn = SoftOrdering1DCNNInference(
    models_state_dicts=nn_1dcnn_artifacts['models'],
    models_hparams=nn_1dcnn_artifacts['models_hparams'],
    numerical_cols=nn_1dcnn_artifacts['numerical_cols'],
    categorical_cols=nn_1dcnn_artifacts['categorical_cols'],
    encoder=nn_1dcnn_artifacts['encoder'],
    scaler=nn_1dcnn_artifacts['scaler'],
    lgbm_encoders=nn_1dcnn_artifacts['lgbm_encoders'],
)

  WeightNorm.apply(module, name, dim)


len(numerical_cols): 588
len(categorical_cols): 10


In [6]:
# sanity check #1
test = polars.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv")
sample_sub = polars.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv")
model_1dcnn.predict(test, sample_sub)

Id,utility_agent1
i64,f64
233234,0.152096
233235,-0.183361
233236,0.016027


In [7]:
# # sanity check #2
# train = polars.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/train.csv")
#
# test = train.drop(['num_wins_agent1', 'num_draws_agent1', 'num_losses_agent1', 'utility_agent1'])
# sample_sub = train.select(['Id', 'utility_agent1'])
#
# model_1dcnn.predict(test, sample_sub)

***
### inference

In [8]:
inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(model_1dcnn.predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv',
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv'
        )
    )

***