# Starter Code - Classification on Unlabeled and Mislabeled Images

<h4> Import Libraries </h4>

In [1]:
import os
import sys

import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

# Pytorch imports
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

<h4> Training Configuration </h4>
You should experiment with different hyperparameters

In [2]:
CONFIG = {"seed": 420,
          "epochs": 10,
          "img_size": 64,
          "num_classes": 30,
          "train_batch_size": 128,
          "val_batch_size": 128,
          "learning_rate": 1e-2,
          "num_workers": 2,
          "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
          # StepLR Scheduler hyperparameters
          "step_size": 10,
          "gamma": 0.95
          }

<h4> Set seed for reproducibility </h4>

In [3]:
torch.manual_seed(CONFIG["seed"])
torch.cuda.manual_seed(CONFIG["seed"])

<h4> Data Directories </h4>

In [4]:
ROOT_DIR = '/kaggle/input/classification-on-unlabeled-and-mislabeled-images/'
TRAIN_LABELED_DIR = os.path.join(ROOT_DIR, 'train/train/labeled_images/')
TRAIN_UNLABELED_DIR = os.path.join(ROOT_DIR, 'train/train/unlabeled_images/')
TEST_DIR = os.path.join(ROOT_DIR, 'test/test/images/')
SAVE_PATH = "best_model.pth"

<h4> Read the CSV </h4>

In [5]:
df = pd.read_csv(os.path.join(ROOT_DIR, 'train_annotations.csv'))
df.head()

Unnamed: 0,image_name,class_name
0,000000.JPEG,250605
1,000001.JPEG,516810
2,000002.JPEG,289648
3,000003.JPEG,688319
4,000004.JPEG,964607


<h4> Create mapping for class_name </h4>

In [6]:
class_names = df.class_name.unique()
class_to_index_mapping = {}
index_to_class_mapping = {}
for i in range(CONFIG['num_classes']):
    class_to_index_mapping[class_names[i]] = i
    index_to_class_mapping[i] = class_names[i]
class_to_index_mapping

{250605: 0,
 516810: 1,
 289648: 2,
 688319: 3,
 964607: 4,
 431115: 5,
 517908: 6,
 44558: 7,
 665003: 8,
 500192: 9,
 477273: 10,
 335856: 11,
 158226: 12,
 914948: 13,
 690093: 14,
 759848: 15,
 28368: 16,
 612747: 17,
 812491: 18,
 589958: 19,
 436479: 20,
 358316: 21,
 339246: 22,
 537498: 23,
 146725: 24,
 896857: 25,
 877556: 26,
 56639: 27,
 648396: 28,
 821885: 29}

<h4> Dataset Class </h4>

In [7]:
class CustomDataset(Dataset):
    def __init__(self, csv_path, data_dir, transform, dataset_type='train'):
        self.transform = transform
        df = pd.read_csv(csv_path)
        self.data_dir = data_dir
        
        # Split training set into train and validation
        train_data=df.sample(frac=0.8,random_state=CONFIG['seed'])
        if dataset_type == 'train':
            self.data = train_data
        elif dataset_type == 'val':
            self.data=df.drop(train_data.index)

        # For submission use full training set
        elif dataset_type == 'full-train':
                self.data = df

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]

        img_path = os.path.join(self.data_dir, row['image_name'])
        img_label = class_to_index_mapping[row.class_name]

        img = Image.open(img_path).convert("RGB")

        if self.transform:
            img = self.transform(img)        
            
        output = {'img': img, 'label': img_label}

        return output

In [8]:
# Dataset for loading unlabeled set and test set

class UnlabeledDataset(Dataset):
    def __init__(self, data_dir, transform):
        self.transform = transform
        self.data_dir = data_dir
        self.img_names = [filename for filename in sorted(os.listdir(self.data_dir))]

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_path = os.path.join(self.data_dir, self.img_names[idx])

        img = Image.open(img_path).convert("RGB")

        if self.transform:
            img = self.transform(img)        
            
        output = {'img': img, 'img_name': self.img_names[idx]}

        return output

<h4> Augmentations </h4>
You should experiment with adding/removing transforms

In [9]:
train_transforms = transforms.Compose([
    transforms.Resize(CONFIG['img_size']),
    transforms.RandomResizedCrop(CONFIG['img_size']),
    transforms.RandomRotation(degrees=90),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229,0.224,0.225]),
])

test_transforms = transforms.Compose([
    transforms.Resize(CONFIG['img_size']),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229,0.224,0.225]),
])

<h4> Prepare Data loaders </h4>

In [10]:
weak_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(p = 0.5),
    transforms.RandomCrop(size=32, padding=int(32*0.125), padding_mode='reflect'),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
]) # weak transforms are just random shifting and flipping

strong_transforms = transforms.Compose([
    transforms.RandomRotation(degrees = 90),
    transforms.RandomVerticalFlip(p = 0.5),
    transforms.RandomAdjustSharpness(sharpness_factor = 1, p = 0.5),
    transforms.RandomHorizontalFlip(p = 0.5),
    transforms.RandomCrop(size=32, padding=int(32*0.125), padding_mode='reflect'),
    transforms.ColorJitter(0.4, 0.4, 0.4, 0.2),
    transforms.RandomGrayscale(p = 0.2),
#     transforms.Resize(CONFIG['img_size']),
#     transforms.RandomResizedCrop(CONFIG['img_size']),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

val_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

class TransformFixMatch(object):
    def __init__(self, weak, strong):
        self.weak = weak
        self.strong = strong

    def __call__(self, x):
        weak = self.weak(x)
        strong = self.strong(x)

        return weak, strong

In [11]:
# Adding the new transforms
# cifar_labeled_trainset = CIFAR10SSL(root='./data', indexs = labeled_indices, train=True, transform = weak_transforms)
# cifar_unlabeled_trainset = CIFAR10SSL(root='./data', indexs = unlabeled_indices, train=True, transform=TransformFixMatch(weak = weak_transforms, strong = strong_transforms))
# cifar_testset = datasets.CIFAR10(root='./data', train=False, transform = val_transforms, download=False)

# cifar_labeled_trainloader = DataLoader(cifar_labeled_trainset, batch_size=64, sampler = RandomSampler(cifar_labeled_trainset))
# cifar_unlabeled_trainloader = DataLoader(cifar_unlabeled_trainset, batch_size=7 * 64, sampler = RandomSampler(cifar_unlabeled_trainset))
# cifar_testloader = DataLoader(cifar_testset, batch_size=64, shuffle = False)

In [12]:
def prepare_loaders():
    train_set = CustomDataset(
        csv_path = os.path.join(ROOT_DIR, 'train_annotations.csv'),
        data_dir = TRAIN_LABELED_DIR,
        transform = train_transforms,
        dataset_type ='train'
    )
    
    unlabeled_train_set = UnlabeledDataset(
        data_dir = TRAIN_UNLABELED_DIR,
        transform = TransformFixMatch(weak = weak_transforms, strong = strong_transforms),
    )
    
    train_loader = DataLoader(
        train_set,
        batch_size = CONFIG['train_batch_size'], 
        shuffle = True,
        num_workers = CONFIG['num_workers'], 
        pin_memory = True
    )
    
    unlabeled_train_loader = DataLoader(
        unlabeled_train_set,
        batch_size = CONFIG['train_batch_size'], 
        shuffle = True,
        num_workers = CONFIG['num_workers'], 
        pin_memory = True
    )

    val_set = CustomDataset(
        csv_path = os.path.join(ROOT_DIR, 'train_annotations.csv'),
        data_dir = TRAIN_LABELED_DIR,
        transform = test_transforms,
        dataset_type ='val'
    )
    
    val_loader = DataLoader(
        val_set,
        batch_size = CONFIG['val_batch_size'], 
        shuffle = False,
        num_workers = CONFIG['num_workers'], 
        pin_memory = True
    )

    return train_loader, unlabeled_train_loader, val_loader

In [13]:
def train_fixmatch_epoch(model, train_loader, unlabeled_train_loader, device, optimizer, epoch):
    criterion_labeled = nn.CrossEntropyLoss()
    criterion_unlabeled = nn.CrossEntropyLoss(reduction='none') # loss per example

    threshold = 0.90 # predictions smaller than 90% confidence are filtered.

    model.train()

    total_train_loss = 0.0
    dataset_size = 0
    
    bar = tqdm(enumerate(unlabeled_train_loader), total=len(unlabeled_train_loader), colour='cyan', file=sys.stdout)

    labeled_iterator = iter(train_loader)

    epoch_loss = 0

    for step, unlabeled_images in bar:

        unlabeled_images_weak, unlabeled_images_strong = unlabeled_images['img']

        unlabeled_images_weak = unlabeled_images_weak.to(device)
        unlabeled_images_strong = unlabeled_images_strong.to(device)
        try:
          labeled_data = next(labeled_iterator)
        except StopIteration as e:
          labeled_iterator = iter(train_loader)
          labeled_data = next(labeled_iterator)

        labeled_images = labeled_data['img']
        labels = labeled_data['label']

        labeled_images = labeled_images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        pred_labeled = model(labeled_images)

        # get pseudo-labels, don't propagate gradients
        with torch.no_grad():
          pred_weak = model(unlabeled_images_weak)

          # get confidence as a probability
          pred_weak_confidence = torch.nn.functional.softmax(pred_weak, dim = -1)
          max_values, max_indices = torch.max(pred_weak_confidence, dim = -1)

          # filter out unconfident predictions
          fixmatch_mask = (max_values > threshold).float()

        pred_strong = model(unlabeled_images_strong)

        loss_labeled = criterion_labeled(pred_labeled, labels)
        loss_consistency = criterion_unlabeled(pred_strong, max_indices) * fixmatch_mask
        loss_consistency = loss_consistency.mean()

        loss = loss_labeled + loss_consistency

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

        bar.set_postfix(Epoch=epoch, LabeledLoss=loss_labeled.item(), ConsistencyLoss = loss_consistency.item(), FractionMasked=(1 - fixmatch_mask.float().mean()).item())

    return epoch_loss


<h4> Create model </h4>
You should experiment with different models by modifying existing ones or constructing new models from scratch

In [14]:
from torchvision.models import resnet18

class ResnetModel(nn.Module):
    def __init__(self, num_classes):
        super(ResnetModel, self).__init__()
        self.backbone = resnet18()
        
        # Change the classification head to have num_classes output neurons
        in_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(in_features=in_features, out_features=num_classes, bias=True)
    
    def forward(self, x):
        x = self.backbone(x)
        
        return x

In [15]:
model = ResnetModel(num_classes=CONFIG['num_classes'])
model.to(CONFIG['device'])

ResnetModel(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, trac

<h4> Training Helpers </h4>
You are encouraged to experiment with different scheduler types and parameters

In [16]:
criterion = nn.CrossEntropyLoss()

optimizer = optim.AdamW(model.parameters(), lr=CONFIG['learning_rate'])
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=CONFIG['step_size'], gamma=CONFIG['gamma'])

<h4> Training function </h4>

<h4> Validation function </h4>

In [17]:
def valid_epoch(model, dataloader, device, epoch):
    model.eval()
    
    total_val_loss = 0.0
    dataset_size = 0
    
    correct = 0

    bar = tqdm(enumerate(dataloader), total=len(dataloader), colour='cyan', file=sys.stdout)
    for step, data in bar:
        images = data['img'].to(device)
        labels = data['label'].to(device)
        
        batch_size = images.shape[0]

        pred = model(images)
        loss = criterion(pred, labels)
        
        _, predicted = torch.max(pred, 1)
        correct += (predicted == labels).sum().item()
        
        total_val_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = np.round(total_val_loss / dataset_size, 2)
    
        accuracy = np.round(100 * correct / dataset_size, 2)

        bar.set_postfix(Epoch=epoch, Valid_Acc=accuracy, Valid_Loss=epoch_loss)

    return accuracy, epoch_loss

<h4> Build submission file </h4>

In [18]:
def build_submission(model, dataloader, device, submission_file):
    model.eval()
    
    all_predictions = []
    all_image_names = []

    for data in dataloader:
        images = data['img'].to(device)
        img_names = data['img_name']
        pred = model(images)
        _, predicted = torch.max(pred, 1)
        
        predicted = predicted.cpu().numpy().tolist()
        all_predictions.extend(predicted)
        all_image_names.extend(img_names)
    
    all_predictions = [index_to_class_mapping[prediction] for prediction in all_predictions]
    data = list(zip(all_image_names, all_predictions))
    submission_df = pd.DataFrame(data=data, columns=['image_name', 'class_name'])
    submission_df.to_csv(submission_file, index=False)
    print(f"Submission saved to {submission_file}")

<h4> Run Training </h4>

In [19]:
def run_training(model, device, optimizer, num_epochs):
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
    
    top_accuracy = 0.0
    
    train_loader, unlabeled_train_loader, val_loader = prepare_loaders()
    for epoch in range(num_epochs):
        
        train_loss = train_fixmatch_epoch(model, train_loader, unlabeled_train_loader, device, optimizer, num_epochs)
        with torch.no_grad():
            val_accuracy, val_loss = valid_epoch(model, val_loader, device, epoch)
            if val_accuracy > top_accuracy:
                print(f"Validation Accuracy Improved ({top_accuracy} ---> {val_accuracy})")
                top_accuracy = val_accuracy
                torch.save(model.state_dict(), SAVE_PATH)
                print("Model Saved")
        print()

In [20]:
run_training(model, CONFIG['device'],optimizer, CONFIG['epochs'])

[INFO] Using GPU: Tesla T4

100%|[36m██████████[0m| 94/94 [01:02<00:00,  1.50it/s, ConsistencyLoss=0.0138, Epoch=10, FractionMasked=0, LabeledLoss=3.22]
100%|[36m██████████[0m| 5/5 [00:01<00:00,  2.95it/s, Epoch=0, Valid_Acc=3.17, Valid_Loss=35.1]
Validation Accuracy Improved (0.0 ---> 3.17)
Model Saved

100%|[36m██████████[0m| 94/94 [00:46<00:00,  2.02it/s, ConsistencyLoss=0.000162, Epoch=10, FractionMasked=0, LabeledLoss=3.03]
100%|[36m██████████[0m| 5/5 [00:00<00:00,  7.67it/s, Epoch=1, Valid_Acc=8.83, Valid_Loss=12.6]
Validation Accuracy Improved (3.17 ---> 8.83)
Model Saved

100%|[36m██████████[0m| 94/94 [00:45<00:00,  2.07it/s, ConsistencyLoss=4.86e-5, Epoch=10, FractionMasked=0, LabeledLoss=2.95]
100%|[36m██████████[0m| 5/5 [00:00<00:00,  7.52it/s, Epoch=2, Valid_Acc=8.67, Valid_Loss=5.74]

100%|[36m██████████[0m| 94/94 [00:45<00:00,  2.07it/s, ConsistencyLoss=6.32e-5, Epoch=10, FractionMasked=0, LabeledLoss=2.76]
100%|[36m██████████[0m| 5/5 [00:00<00:00,  7.17it

In [21]:
print("Loading best model for submission")
model.load_state_dict(torch.load(SAVE_PATH))

test_set = UnlabeledDataset(TEST_DIR, test_transforms)

test_loader = DataLoader(
        test_set,
        batch_size = CONFIG['val_batch_size'], 
        shuffle = False,
        num_workers = CONFIG['num_workers'], 
        pin_memory = True
)

build_submission(model, test_loader, CONFIG['device'], 'submission.csv')

Loading best model for submission
Submission saved to submission.csv
