In [None]:
pip install -qq /kaggle/input/wheels/lightning-2.4.0-py3-none-any.whl

In [None]:
import numpy as np
import pandas as pd
from typing import Optional, List
import polars
import os

import torch 
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset
from torch.optim.lr_scheduler import OneCycleLR

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint

print(f"PyTorch version: {torch.__version__}")
print(f"PyTorch Lightning version: {pl.__version__}")

import sys
sys.path.append("/kaggle/input/mcts-artifacts")
from preproc import process_test_data
import kaggle_evaluation.mcts_inference_server

***
### load artifacts


In [None]:
# Specify the path where you want to save the serialized function
nn_mlp_artifacts_path = '/kaggle/input/mcts-artifacts/nn-mlp_predict.pt'

# Load the function from the file
nn_mlp_artifacts = torch.load(nn_mlp_artifacts_path, weights_only=False)

len(nn_mlp_artifacts['models'])

In [None]:
class MLP(pl.LightningModule):

    def __init__(self, 
            num_input_dim: int,
            cat_input_dims: list[int],
            output_dim: int,
            layers: str,
            dropout: float,
            embedding_dropout: float,
            learning_rate: float = 1e-3,
            weight_decay: float = 1e-5,
            initialization: str = 'kaiming_uniform',
            embedding_dim: Optional[List[int]] = None,
            pct_start: float = 0.2,
            div_factor: float = 10.0,
            final_div_factor: float = 1e4,
        ):
        super().__init__()
        self.save_hyperparameters()
        self.dropout = dropout
        self.embedding_dropout = embedding_dropout
        self.pct_start = pct_start
        self.div_factor = div_factor
        self.final_div_factor = final_div_factor

        # Initialize embedding dimensions if not provided
        if embedding_dim is None:
            # Rule of thumb: min(50, num_unique // 2 + 1) for each categorical feature
            embedding_dim = [min(50, int(1 + np.ceil(np.sqrt(dim)))) for dim in cat_input_dims]

        elif len(embedding_dim) != len(cat_input_dims):
            raise ValueError("Length of embedding_dim must match number of categorical features.")

        self.embedding_dim = embedding_dim

        # Create embedding layers
        self.create_embeddings(cat_input_dims, embedding_dim)

        # Create backbone layers
        self.create_backbone(num_input_dim, layers)

        # Create head layers
        self.create_head(output_dim)

        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.initialization = initialization

        self._init_weights()

        # Initialize lists to store validation outputs
        self.validation_targets = []
        self.validation_predictions = []

    def create_embeddings(self, cat_input_dims: list[int], embedding_dim: list[int]):
        self.embeddings = nn.ModuleList(
            [nn.Embedding(dim, emb_dim) for dim, emb_dim in zip(cat_input_dims, embedding_dim)]
        )
        self.embedding_dropout_layer = nn.Dropout(self.embedding_dropout)

    def create_backbone(self, num_input_dim: int, layers: str):
        # Calculate total input dimension after embeddings
        total_embedding_dim = sum(self.embedding_dim)
        total_input_dim = num_input_dim + total_embedding_dim

        # Parse layers string
        layer_sizes = [int(size) for size in layers.split('-')]

        # Create backbone network layers
        backbone_layers = []
        prev_size = total_input_dim
        for size in layer_sizes:
            backbone_layers.extend([
                nn.BatchNorm1d(prev_size),
                nn.Linear(prev_size, size),
                nn.ReLU(),
                nn.Dropout(self.hparams.dropout),
            ])
            prev_size = size
        self.backbone = nn.Sequential(*backbone_layers)
        self.backbone_output_size = prev_size

    def create_head(self, output_dim: int):
        # Output layer
        self.head = nn.Sequential(
            nn.BatchNorm1d(self.backbone_output_size),
            nn.Linear(self.backbone_output_size, output_dim)
        )

    def _init_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                if any(module is m for m in self.head.modules()):
                    nn.init.xavier_uniform_(module.weight, gain=nn.init.calculate_gain('tanh'))
                else:
                    if self.initialization == 'kaiming_uniform':
                        nn.init.kaiming_uniform_(module.weight, nonlinearity='relu')
                    elif self.initialization == 'kaiming_normal':
                        nn.init.kaiming_normal_(module.weight, nonlinearity='relu')
                    elif self.initialization == 'xavier_uniform':
                        nn.init.xavier_uniform_(module.weight, gain=nn.init.calculate_gain('relu'))
                    elif self.initialization == 'xavier_normal':
                        nn.init.xavier_normal_(module.weight, gain=nn.init.calculate_gain('relu'))
                    else:
                        raise ValueError(f"Unsupported initialization method: {self.initialization}")
                
                # Initialize bias to small values
                if module.bias is not None:
                    nn.init.uniform_(module.bias, -0.1, 0.1)

    def forward(self, x_num, x_cat):
        # Process categorical variables
        embedded = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        embedded = torch.cat(embedded, dim=1)
        embedded = self.embedding_dropout_layer(embedded)
        
        # Concatenate numerical and embedded categorical features
        x = torch.cat([x_num, embedded], dim=1)
        
        # Pass through backbone
        x = self.backbone(x)
        
        # Pass through head
        x = self.head(x)
        x = nn.functional.hardtanh(x)

        return x.squeeze(-1)

    def training_step(self, batch, batch_idx):
        x_num, x_cat, y = batch
        y_hat = self(x_num, x_cat)
        loss = F.mse_loss(y_hat, y)
        self.log('train_loss', loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x_num, x_cat, y = batch
        y_hat = self(x_num, x_cat)
        loss = F.mse_loss(y_hat, y)
        self.log('valid_loss', loss, prog_bar=True)
        # Store targets and predictions for later use
        self.validation_targets.append(y)
        self.validation_predictions.append(y_hat)
        return loss
    
    def predict_step(self, batch, batch_idx):
        if len(batch) == 2:
            x_num, x_cat = batch
        elif len(batch) == 3:
            x_num, x_cat, _ = batch
        y_hat = self(x_num, x_cat)
        return y_hat

    def on_validation_epoch_end(self):
        # Concatenate all targets and predictions
        y = torch.cat(self.validation_targets)
        y_hat = torch.cat(self.validation_predictions)
        rmse = torch.sqrt(F.mse_loss(y_hat, y))
        self.log('val_rmse', rmse, prog_bar=True)
        # Clear the lists for next epoch
        self.validation_targets.clear()
        self.validation_predictions.clear()
                
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(), 
            lr=self.learning_rate, 
            weight_decay=self.weight_decay,
        )
        scheduler = OneCycleLR(
            optimizer,
            max_lr=self.learning_rate,
            total_steps=self.trainer.estimated_stepping_batches,
            pct_start=self.pct_start,
            div_factor=self.div_factor,
            final_div_factor=self.final_div_factor,
            anneal_strategy='cos',
            cycle_momentum=True,
            base_momentum=0.85,
            max_momentum=0.95,
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "interval": "step",
            },
        }

In [None]:
class MLPInference:
    def __init__(
        self,
        models_state_dicts,
        models_hparams,
        numerical_cols,
        categorical_cols,
        encoder,
        scaler,
        lgbm_encoders,
    ):
        """Initialize inference class with trained artifacts
        
        Args:
            models_state_dicts: List of model state dictionaries
            models_hparams: List of model hyperparameters
            numerical_cols: List of numerical column names
            categorical_cols: List of categorical column names
            encoder: Fitted OrdinalEncoder for categorical features
            scaler: Fitted StandardScaler for numerical features
            lgbm_encoders: List of LightGBM encoders for feature engineering
        """
        self.numerical_cols = numerical_cols
        self.categorical_cols = categorical_cols
        self.encoder = encoder
        self.scaler = scaler
        self.lgbm_encoders = lgbm_encoders

        # Load models
        self.models = []
        for state_dict, hparams in zip(models_state_dicts, models_hparams):
            model = MLP(**hparams)
            model.load_state_dict(state_dict)
            model.eval()  # Set to evaluation mode
            self.models.append(model)

        print("len(numerical_cols):", len(numerical_cols))
        print("len(categorical_cols):", len(categorical_cols))

    def predict_array(self, df_test, batch_size=512):
        """Make predictions on test data using DataLoader
        
        Args:
            df_test: pandas DataFrame containing test features
            batch_size: size of batches for inference
            
        Returns:
            numpy array of predictions
        """
        # Preprocess test data
        test_processed = process_test_data(
            df_test,
            self.numerical_cols,
            self.categorical_cols,
            self.encoder,
            self.scaler,
            include_position_features=False,
            include_text_features=False,
        )

        # Initialize predictions array
        predictions = np.zeros(len(df_test))

        # Get predictions from all models
        for lgbm_encoder, model in zip(self.lgbm_encoders, self.models):
            # Prepare numerical and categorical features
            X_test_num = test_processed[self.numerical_cols].copy()
            X_test_cat = test_processed[self.categorical_cols].copy()

            # Add LGBM encoder leaves features
            lgbm_features = lgbm_encoder.transform(
                test_processed[self.numerical_cols + self.categorical_cols]
            )
            X_test_cat = pd.concat([X_test_cat, lgbm_features], axis=1)
            _categorical_cols = self.categorical_cols + lgbm_encoder.new_columns

            # Create tensors
            X_num_tensor = torch.tensor(
                X_test_num[self.numerical_cols].values, 
                dtype=torch.float32
            )
            X_cat_tensor = torch.tensor(
                X_test_cat[_categorical_cols].values, 
                dtype=torch.int32
            )
            
            # Create TensorDataset and DataLoader
            dataset = torch.utils.data.TensorDataset(
                X_num_tensor, 
                X_cat_tensor
            )
            dataloader = torch.utils.data.DataLoader(
                dataset, 
                batch_size=batch_size,
                shuffle=False
            )
            
            # Process batches using DataLoader
            batch_predictions = []
            with torch.no_grad():
                for X_num_batch, X_cat_batch in dataloader:
                    pred_batch = model(X_num_batch, X_cat_batch).cpu()
                    batch_predictions.append(pred_batch)

            # Concatenate all batch predictions
            model_predictions = torch.cat(batch_predictions).numpy().flatten()
            predictions += model_predictions

        # Average predictions across models
        predictions /= len(self.models)
        return predictions

    def predict(self, test: polars.DataFrame, sample_sub: polars.DataFrame):
        test_pd = test.to_pandas()
        predictions = self.predict_array(test_pd)
        submission = sample_sub.with_columns(polars.Series("utility_agent1", predictions))
        return submission


# Create inference class
model_mlp = MLPInference(
    models_state_dicts=nn_mlp_artifacts['models'],
    models_hparams=nn_mlp_artifacts['models_hparams'],
    numerical_cols=nn_mlp_artifacts['numerical_cols'],
    categorical_cols=nn_mlp_artifacts['categorical_cols'],
    encoder=nn_mlp_artifacts['encoder'],
    scaler=nn_mlp_artifacts['scaler'],
    lgbm_encoders=nn_mlp_artifacts['lgbm_encoders'],
)

In [None]:
# sanity check #1
test = polars.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv")
sample_sub = polars.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv")
model_mlp.predict(test, sample_sub)

In [None]:
# sanity check #2
train = polars.read_csv("/kaggle/input/um-game-playing-strength-of-mcts-variants/train.csv")

test = train.drop(['num_wins_agent1', 'num_draws_agent1', 'num_losses_agent1', 'utility_agent1'])
sample_sub = train.select(['Id', 'utility_agent1'])

model_mlp.predict(test, sample_sub)

***
### inference

In [None]:
inference_server = kaggle_evaluation.mcts_inference_server.MCTSInferenceServer(model_mlp.predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/test.csv',
            '/kaggle/input/um-game-playing-strength-of-mcts-variants/sample_submission.csv'
        )
    )

***