Steps:

- Load data and visualize a sample of them
- Check for distribution across classes

- Give ERM/IRM a shot to improve the performance

In [9]:
# Run following commands if running on local
# !pip install kaggle

# Download kaggle.json from kaggle website under profile->new API section
# !kaggle competitions download -c plant-seedlings-classification
# !unzip -q plant-seedlings-classification.zip

In [10]:
# import os

# os.chdir('/kaggle/input/plant-seedlings-classification/')

In [11]:
# Folder structure
# Training data
# contains images in 12 folders, each folder contains images of a single class
# Test data
# contains all images in a single folder

# Load the data
from torchvision import datasets, transforms
from transformers import AutoImageProcessor

image_processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
size = (
    (image_processor.size["shortest_edge"], image_processor.size["shortest_edge"])
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)

print(size)

transforms = transforms.Compose([
    # transforms.Resize((256, 256)),
    transforms.Resize(size),
    # RandomResizedCrop being used here --> https://huggingface.co/docs/transformers/main/en/tasks/image_classification
    transforms.RandomRotation(360),
    transforms.RandomResizedCrop(size),
    transforms.ToTensor(),
    transforms.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
])

dataset = datasets.ImageFolder('./train', transform=transforms)

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


(224, 224)


### Class Distribution

In [12]:
# # Plot class distribution
# from collections import Counter
# import matplotlib.pyplot as plt

# distribution = dict(Counter(dataset.targets))

# # Plot class distribution histogram
# plt.bar(list(map(lambda x: dataset.classes[x], distribution.keys())), distribution.values())
# plt.xticks(rotation=90)
# plt.show()

### Sampling imbalance classes

In [13]:
from torch.utils.data import DataLoader
import numpy as np
from torch.utils.data.sampler import WeightedRandomSampler

def sampler(indices):
    labels = [dataset.targets[x] for x in indices]
    print(f'label length: {len(labels)}')
    distribution = dict(Counter(labels))
    class_weights = {k: 1/v for k, v in distribution.items()}

    samples_weight = np.array([class_weights[t] for t in labels])
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    return sampler

In [14]:
from torch.utils.data import DataLoader, random_split
from torch.utils.data import Subset
from collections import Counter

# Split validation data from training data
dataset_size = len(dataset)
indices = list(range(dataset_size))
np.random.shuffle(indices) # shuffle the dataset before splitting into train and val
print(f'dataset_size: {dataset_size}')

split = int(np.floor(0.8 * dataset_size))
train_indices, val_indices = indices[:split], indices[split:]

# 
BATCH_SIZE = 24

train = DataLoader(Subset(dataset, train_indices), sampler=sampler(train_indices), batch_size=BATCH_SIZE)
val = DataLoader(Subset(dataset, val_indices), sampler=sampler(val_indices), batch_size=BATCH_SIZE)

dataset_size: 4750
label length: 3800
label length: 950


### Visualize distribution after sampling

In [15]:
# # Plot class distribution histogram for training data
# class_counts = [0]*len(dataset.classes)

# for i, (_, label) in enumerate(train):
#     for l in label:
#         class_counts[l] += 1

# # Plot class distribution histogram
# plt.bar(dataset.classes, class_counts)
# plt.xticks(rotation=90)
# plt.show()

In [16]:
# # Plot class distribution histogram for validation data
# class_counts = [0]*len(dataset.classes)

# for i, (_, label) in enumerate(val):
#     for l in label:
#         class_counts[l] += 1

# # Plot class distribution histogram
# plt.bar(dataset.classes, class_counts)
# plt.xticks(rotation=90)
# plt.show()

### Visualize images

In [17]:
# def visualizeBatch(batch, classes=None):
#     # sample 8 indexes from BATCH_SIZE
#     indexes = np.random.choice(BATCH_SIZE, 8, replace=False)
#     for i, j in enumerate(indexes):
#         image, idx = batch[0][j], batch[1][j]
        
#         ax = plt.subplot(2, 4, i + 1)
#         image = image.cpu().numpy()
#         image = image.transpose((1, 2, 0))
#         image = (255.0 * image).astype('uint8')
        
#         plt.imshow(image)
#         if classes is not None:
#             plt.title(classes[idx])
#         plt.axis('off')
    
#     plt.tight_layout()
#     plt.show()

In [18]:
# trainBatch = next(iter(train))
# visualizeBatch(trainBatch, dataset.classes)

In [19]:
# testBatch = next(iter(test))
# visualizeBatch(testBatch)

### FineTuning resnet-50

In [20]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 
                      'mps' if torch.backends.mps.is_built() else 
                      'cpu')

In [21]:
from transformers import ResNetModel, ResNetConfig
from torch import nn
from transformers.modeling_outputs import ImageClassifierOutputWithNoAttention

# model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50").to(device)

class CustomResNet(nn.Module):
    def __init__(self, checkpoint="microsoft/resnet-50", num_classes=12):
        super(CustomResNet, self).__init__()
        self.num_classes = num_classes
        self.model = ResNetModel.from_pretrained(checkpoint)
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(0.1)
        self.pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = torch.nn.Linear(2048, num_classes)
    
    def forward(self, x, labels=None):
        x = self.model(x)
        x = self.pooling(x[0])
        x = self.flatten(x)
        x = self.dropout(x)
        logits = self.classifier(x)
        
        loss = None
        if labels is not None:
            loss_fct = torch.nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_classes), labels.view(-1))
        
        return ImageClassifierOutputWithNoAttention(loss=loss, logits=logits)

model = CustomResNet().to(device)

Some weights of the model checkpoint at microsoft/resnet-50 were not used when initializing ResNetModel: ['classifier.1.weight', 'classifier.1.bias']
- This IS expected if you are initializing ResNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ResNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [22]:
# import sys

# # If true passed in sys argv, then load the model from checkpoint
# if len(sys.argv) > 1 and sys.argv[1] == 'True':
#     model.load_state_dict(torch.load('model.pth'))
# model.load_state_dict(torch.load('best_model.pt', map_location=torch.device(device)))

In [23]:
from tqdm import tqdm

# define training loop
def train_loop(model, train, val, optimizer, loss_fn, epochs=10):
    best_val_acc = 0
    pred_cm = torch.empty(0)
    label_cm = torch.empty(0)
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        total_correct = 0
        loops = 0
        for i, (image, label) in enumerate(tqdm(train)):
            image = image.to(device)
            label = label.to(device)
            
            optimizer.zero_grad()
            output = model(image, labels=label)
            loss = loss_fn(output, label)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            loops += 1
            predicted = output.logits.argmax(-1)
            total_correct += (predicted == label).sum().item()
        
        print(f'Epoch: {epoch}, Training Loss: {total_loss/loops:.2f}, Training Accuracy: {(total_correct/(loops*BATCH_SIZE))*100:.2f}%')
        
        model.eval()
        with torch.no_grad():
            total_loss = 0
            total_correct = 0
            loops = 0
            for i, (image, label) in enumerate(tqdm(val)):
                image = image.to(device)
                label = label.to(device)
                
                output = model(image, labels=label)
                loss = loss_fn(output, label)
                
                total_loss += loss.item()
                loops += 1
                print(output.logits.shape)
                predicted = output.logits.argmax(-1)
                total_correct += (predicted == label).sum().item()
                
                # store predicted and label for confusion matrix
                pred_cm = torch.cat((pred_cm, predicted.cpu()), 0)
                label_cm = torch.cat((label_cm, label.cpu()), 0)
                
            print(f'Epoch: {epoch}, Validation Loss: {total_loss/loops:.2f}, Validation Accuracy: {(total_correct/(loops*BATCH_SIZE))*100:.2f}%')
            
            # Save model if validation accuracy is better than previous best
            if (total_correct/(loops*BATCH_SIZE))*100 > best_val_acc:
                best_val_acc = (total_correct/(loops*BATCH_SIZE))*100
                torch.save(model.state_dict(), 'best_model.pt')
                print(f'Best model saved with validation accuracy: {best_val_acc:.2f}%')
    
    return model, pred_cm, label_cm

In [24]:
from transformers import ResNetForImageClassification

ResNetForImageClassification.from_pretrained("microsoft/resnet-50")

ResNetForImageClassification(
  (resnet): ResNetModel(
    (embedder): ResNetEmbeddings(
      (embedder): ResNetConvLayer(
        (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation): ReLU()
      )
      (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (encoder): ResNetEncoder(
      (stages): ModuleList(
        (0): ResNetStage(
          (layers): Sequential(
            (0): ResNetBottleNeckLayer(
              (shortcut): ResNetShortCut(
                (convolution): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (normalization): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (layer): Sequential(
                (0): ResNetConvLayer(
                  (convolution): Conv2d(64

In [25]:
model

CustomResNet(
  (model): ResNetModel(
    (embedder): ResNetEmbeddings(
      (embedder): ResNetConvLayer(
        (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation): ReLU()
      )
      (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (encoder): ResNetEncoder(
      (stages): ModuleList(
        (0): ResNetStage(
          (layers): Sequential(
            (0): ResNetBottleNeckLayer(
              (shortcut): ResNetShortCut(
                (convolution): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (normalization): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (layer): Sequential(
                (0): ResNetConvLayer(
                  (convolution): Conv2d(64, 64, kernel_size

In [26]:
epoch = 1
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
criteria = torch.nn.CrossEntropyLoss()

In [27]:
model, pred_cm, label_cm = train_loop(model, train, val, optimizer, criteria, epochs=epoch)

  0%|          | 0/159 [00:00<?, ?it/s]

DEBUG:1


  0%|          | 0/159 [00:00<?, ?it/s]

DEBUG:2





TypeError: cross_entropy_loss(): argument 'input' (position 1) must be Tensor, not ImageClassifierOutputWithNoAttention

In [43]:
pred_cm.shape, label_cm.shape

(torch.Size([1900]), torch.Size([1900]))

In [52]:
# all unique values and their count in label_cm
print(torch.unique(label_cm, return_counts=True))
print(torch.unique(pred_cm, return_counts=True))

(tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.]), tensor([167, 145, 154, 170, 168, 150, 167, 158, 135, 148, 178, 160]))
(tensor([  0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
        852.]), tensor([160, 141, 154, 163, 157, 159, 180, 161, 140, 150, 179, 155,   1]))


In [44]:
from sklearn.metrics import confusion_matrix

# Confusion matrix
conf_mat=confusion_matrix(pred_cm.numpy(), label_cm.numpy())
print(conf_mat)

# Per-class accuracy
class_accuracy=100*conf_mat.diagonal()/conf_mat.sum(1)
print(class_accuracy)

[[144   0   0   1   2   0  12   0   0   0   0   1   0]
 [  0 141   0   0   0   0   0   0   0   0   0   0   0]
 [  0   1 152   0   1   0   0   0   0   0   0   0   0]
 [  0   0   0 161   1   0   0   0   0   0   0   1   0]
 [  0   0   0   0 155   1   1   0   0   0   0   0   0]
 [  0   0   1   1   4 149   0   0   0   1   0   3   0]
 [ 22   0   1   1   2   0 153   0   0   0   0   1   0]
 [  1   0   0   0   0   0   0 158   0   0   0   2   0]
 [  0   0   0   2   1   0   0   0 135   2   0   0   0]
 [  0   2   0   3   0   0   0   0   0 145   0   0   0]
 [  0   1   0   1   0   0   0   0   0   0 177   0   0]
 [  0   0   0   0   2   0   1   0   0   0   0 152   0]
 [  0   0   0   0   0   0   0   0   0   0   1   0   0]]
[ 90.         100.          98.7012987   98.77300613  98.72611465
  93.71069182  85.          98.13664596  96.42857143  96.66666667
  98.88268156  98.06451613   0.        ]


In [51]:
conf_mat.shape

(13, 13)

In [45]:
import os, glob
from PIL import Image
import pandas as pd
from torchvision import transforms

transforms = transforms.Compose([
    # transforms.Resize((256, 256)),
    transforms.Resize(size),
    # RandomResizedCrop being used here --> https://huggingface.co/docs/transformers/main/en/tasks/image_classification
    transforms.ToTensor(),
    transforms.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
])

# create empty dataframe
df = pd.DataFrame(columns=['file', 'species'])

# Run model over test data
for file_name in tqdm(glob.glob(os.path.join('./test', '*.png'))):
    image = transforms(Image.open(file_name)).to(device)
    output = model(image.unsqueeze(0))
    predicted = output.logits.argmax(-1).item()
    
    # append to dataframe
    df = df.append({'file': file_name.split('/')[-1], 'species': dataset.classes[predicted]}, ignore_index=True)

# Save file to csv
df.to_csv('../../working/submission.csv', index=False)

  0%|          | 0/794 [00:01<?, ?it/s]


AttributeError: 'DataFrame' object has no attribute 'append'