# About this notebook

In this case of MNIST image classification, we cannot ensemble by taking the mean of predictions from different models, as we would do in a regression problem. This would make no sense, as this approach would, for example, take a predicted label (digit 6) from one model and a predicted label (digit 0) from other model and would output a 3, or even worse, output a float number (a class that does not exist).

So we have two options to ensemble models:
- make predictions for several models and take the mode (most common predicted digit), just like a hard voting classifier with a majority rule; or
- perform some calculation on the probabilities the models predicted for each class (like a soft voting classifier).

In this notebook I am ensembling only two models. So I will have to go with the second approach and I am taking a combination of the probabilities each model predicted for each sample to predict its class.

For that we will need:
- Two Dataset classes (because the models used have different input shapes)
- Two DataLoaders objects
- Two Model classes
- One inference function
- Add data (two models): best custom model, best timm model
- No training, only inference: just predict and ensemble.

You can find the models I am using on the links below, where they were trained:

[Model 1: custom CNN model](https://www.kaggle.com/hinepo/pytorch-tutorial-cv-99-67-lb-99-26)

[Model 2: ResNet50](https://www.kaggle.com/hinepo/transfer-learning-with-timm-models-and-pytorch)

# Imports

In [None]:
!pip install timm -q

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

import timm

# Load data

In [None]:
# Load the data
INPUT_PATH = '../input/digit-recognizer/'
OUTPUT_PATH = './'

test = pd.read_csv(INPUT_PATH + "test.csv")
test

In [None]:
# detect and define device 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(device)

# CFG

In [None]:
class CFG:
  N_CLASS = 10
  BATCH_SIZE = 1024
  model_name = 'resnet50'

# Dataset class (for custom model)

In [None]:
class Digit_Inference_Dataset_Custom(Dataset):
    def __init__(self, df, augmentations = None):
        self.features = df.values/255 # scale (greyscale) features
        self.augmentations = augmentations 

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        image = self.features[idx].reshape((1, 28, 28))
        return torch.FloatTensor(image)

# Dataset class (for ResNet)

In [None]:
class Digit_Inference_Dataset_ResNet(Dataset):
    def __init__(self, df, augmentations = None):
        self.df = df
        self.features = df[:].values/255 # scale (greyscale) only features. do not scale target
        self.augmentations = augmentations

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        image = self.features[idx].reshape((28, 28))
        image = torch.from_numpy(image).float()
        image = torch.stack([image, image, image], dim = 0) # images must have 3 channels to enter timm models
        return image

# Model class (custom model)

In [None]:
class Digit_Custom_Model(nn.Module):
    def __init__(self):
        super(Digit_Custom_Model, self).__init__()
        
        # Convolution to detect features and create feature maps: kernel = feature detector = filter
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(5,5), padding=0)
        
        # activation
        self.actv = nn.LeakyReLU()

        # Batch normalization 1
        self.batchnorm1 = nn.BatchNorm2d(32)
        
        # Max pool: down sample the detected features in feature maps
        self.maxpool = nn.MaxPool2d(kernel_size=(2,2))

        # Dropout
        self.dropout = nn.Dropout(0.25) 
     
        # Convolution
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(5,5), padding=0)

        # Batch normalization 2
        self.batchnorm2 = nn.BatchNorm2d(64)        

        # flatten the feature map: reduce dimensionality
        self.flatten = nn.Flatten()

        # Fully connected
        self.fc1 = nn.Linear(64 * 4 * 4, 256)

        # Batch normalization 3
        self.batchnorm3 = nn.BatchNorm1d(256)  # 1 D because it is called after the flatten layer

        # The last fully connected layer must output the number of classes
        self.classifier = nn.Linear(256, CFG.N_CLASS)
    
    def forward(self, x):
        # conv1 block
        x = self.conv1(x)
        x = self.actv(x)
        x = self.batchnorm1(x)
        x = self.maxpool(x)
        # x = self.dropout(x)

        # conv2 block
        x = self.conv2(x)
        x = self.actv(x)
        x = self.batchnorm2(x)
        x = self.maxpool(x)
        # x = self.dropout(x)

        # flatten
        x = self.flatten(x)

        # print(x.size())
        
        # Linear functions
        x = self.fc1(x)
        x = self.batchnorm3(x)
        # x = self.dropout(x)
        out = self.classifier(x)
        
        return out 

# Model class (ResNet50)

In [None]:
class Digit_ResNet_Model(nn.Module):
    def __init__(self, model_name = CFG.model_name, pretrained = True):
        super().__init__()

        self.model_name = model_name
        self.cnn = timm.create_model(self.model_name, pretrained = pretrained, num_classes = CFG.N_CLASS)

    def forward(self, x):
        x = self.cnn(x)
        return x

# Inference and Ensemble

In [None]:
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x), axis=1)[:, None]

def inference(test_loader_custom, test_loader_resnet):
    start = time.time()

    probs_custom = torch.Tensor()
    probs_resnet = torch.Tensor()

    ################################# model 1: custom model #################################
    model_custom = Digit_Custom_Model() # instantiate custom model
    model_custom.load_state_dict(torch.load(f"../input/pytorch-tutorial-cv-99-67-lb-99-26/DigitModel_ep21.pth")) # Load custom model
    model_custom.eval() # eval mode
    model_custom.to(device)

    ################################# model 2: ResNet #################################
    model_resnet = Digit_ResNet_Model() # instantiate ResNet model
    model_resnet.load_state_dict(torch.load(f"../input/transfer-learning-with-timm-models-and-pytorch/DigitModel_ep38.pth")) # Load ResNet model
    model_resnet.eval() # eval mode
    model_resnet.to(device)

    # disable gradients for inference
    with torch.no_grad():
      ################################# inference for custom model #################################      
      for batch, X in enumerate(test_loader_custom):

        X = X.to(device)

        # compute predictions for custom model
        pred_custom = model_custom(X)
        y_pred_custom = softmax(pred_custom.detach().cpu().numpy()) # convert tensor to numpy to apply softmax

        batch_probs_custom = torch.from_numpy(y_pred_custom) # convert np array back to torch tensor
        probs_custom = torch.cat((probs_custom, batch_probs_custom), dim = 0) # concatenate softmax probabilities

      ################################# inference for resnet #################################
      for batch, X in enumerate(test_loader_resnet):

        X = X.to(device)

        # compute predictions for resnet model
        pred_resnet = model_resnet(X)
        y_pred_resnet = softmax(pred_resnet.detach().cpu().numpy()) # convert tensor to numpy to apply softmax

        batch_probs_resnet = torch.from_numpy(y_pred_resnet) # convert np array back to torch tensor
        probs_resnet = torch.cat((probs_resnet, batch_probs_resnet), dim = 0) # concatenate softmax probabilities

    
    # ensemble by probabilities: combination of the probabilities predicted by each model
    ens = probs_custom * 0.6 + probs_resnet * 0.4
    final_predictions = torch.argmax(ens, axis = 1) # indice of the highest probability in the ensemble (predicted digit/class)
    
    # log
    end = time.time()
    time_delta = np.round(end - start, 5)     
    print('Elapsed time: ', time_delta, "s")

    # return probs_custom, probs_resnet
    return final_predictions

In [None]:
# instantiate Inference Dataset class (create inference Dataset)
inference_dataset_custom = Digit_Inference_Dataset_Custom(test, augmentations=None)
inference_dataset_resnet = Digit_Inference_Dataset_ResNet(test, augmentations=None)

# create Inference DataLoader object from Dataset class object (for custom model)
inference_dataloader_custom = DataLoader(inference_dataset_custom,
                                         batch_size = CFG.BATCH_SIZE,
                                         shuffle = False)

# create Inference DataLoader object from Dataset class object (for ResNet50 model)
inference_dataloader_resnet = DataLoader(inference_dataset_resnet,
                                         batch_size = CFG.BATCH_SIZE,
                                         shuffle = False)

In [None]:
# run inference
final_predictions = inference(inference_dataloader_custom, inference_dataloader_resnet)
final_predictions

# Submission

In [None]:
submission = pd.read_csv(INPUT_PATH + "sample_submission.csv")
submission["Label"] = final_predictions

submission.to_csv(OUTPUT_PATH + 'submission.csv', index = False)
submission.head()

# Check predictions

In [None]:
# check some predictions

fig = plt.figure(figsize = (12, 12))
fig.suptitle('Visualizing Predictions', fontsize = 24)

# define a range of predictions to plot
begin = 0
end = begin + 20

for i in range(begin, end):

  img = np.array(test.iloc[i, :]).reshape(1, 1, 28, 28) # reshape to image dimensions
  plt.subplot(4, 5, i + 1 - begin) # 4 rows and 5 columns plot 
  label = str(submission.loc[i, 'Label'])
  plt.title("Predicted label: " + label, color="red") # write label in each image title
  plt.imshow(np.squeeze(img), cmap='gray') # plot image
  plt.axis('off')

Upvote if you found value in this notebook! ðŸ˜€