In [13]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms, models
from torchvision.datasets import ImageFolder, DatasetFolder

from tqdm import tqdm

import numpy as np
import os

In [14]:
# Seed everything.
torch.manual_seed(0)

<torch._C.Generator at 0x7d963931a830>

In [15]:
class DataModule:
    def __init__(
        self,
        dataset_path,
        transform,
        batch_size,
        num_workers,
    ):
        print(dataset_path)

        # Create the training dataset
        self.dataset = ImageFolder(dataset_path, transform = transform)
        
        # Split the dataset in 5
        self.dataset_list = torch.utils.data.random_split(self.dataset, [0.7, 0.3])

        self.batch_size = batch_size
        self.num_workers = num_workers
    
    def val_dataloader_list(self):
        """ Returns the list containing a training DataLoader and a test Dataloader """
        val_loader_list = [DataLoader(
                            x,
                            batch_size = self.batch_size,
                            shuffle = False,
                            num_workers = self.num_workers,
                            ) for x in self.dataset_list]
        return val_loader_list

    def train_dataloader(self, i):
        """
        i: indicates which train set has to be used (i is 0, 1, 2, 3 or 4)

        Returns a DataLoader containing 80% of the training set, and the pseudo-labelled images
        """
        self.concat = torch.utils.data.ConcatDataset([self.dataset_list[j] for j in range(5) if j != i]
                                                     + [self.pseudo_labelled_dataset])

        train_loader = DataLoader(
                        self.concat,
                        batch_size = self.batch_size,
                        shuffle = True,
                        num_workers = self.num_workers
                        )
        
        return train_loader



In [16]:
dataset_path = '/kaggle/input/hackathon-qb/images'
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()])

In [29]:
data = DataModule(dataset_path, transform, 32, 4)

/kaggle/input/hackathon-qb/images


In [18]:
sets = data.val_dataloader_list()
training_set, test_set = sets[0], sets[1]

In [19]:
class TransformerFinetune(nn.Module):
    """ Model using vit_b_16 as backbone with a linear classifier """
    def __init__(self, num_classes, frozen = False):
        super().__init__()
        
        self.backbone = torchvision.models.vit_b_16()
        self.backbone.heads = nn.Identity()
        if frozen:
            for param in self.backbone.parameters():
                param.requires_grad = False
                
        self.classifier = nn.Linear(768, num_classes)

    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [38]:
def train(model, device, dataloader, epoch, rate = 1e-4):
    
    model.to(device)
    model.train()

    optimizer = torch.optim.SGD(model.parameters(), lr=rate)

    loss_fn = nn.CrossEntropyLoss()
    train_losses = []
    accuracy = []
    for t in tqdm(range(epoch)):
        for i, (input_data, target) in enumerate(dataloader):

            input_data, target = input_data.to(device), target.to(device)

            #on calcule la prediction 
            y_pred = model(input_data)
            loss = loss_fn(y_pred, target)
            optimizer.zero_grad()
            #back propagation
            loss.backward()
            optimizer.step()
        train_losses.append(loss.detach().item())
        accuracy = success_rate(model, test_set)

    return train_losses, accuracy

In [41]:
def success_rate(model,dataloader):
    nb = 0
    success = 0
    results = []
    for i, (input, target) in enumerate(dataloader):
        input, target = input.to(device), target.to(device)
        with torch.no_grad():
          y_pred = model(input)
        pred = torch.argmax(y_pred, dim = -1)
        for i in range(int(pred.shape[0])):
            if pred[i] == target[i]:
                success += 1
            nb += 1
    return success / nb

In [42]:
model_vit1 = TransformerFinetune(2, True)

In [43]:
train(model_vit1, device, training_set, 10, rate = 1e-3)

100%|██████████| 10/10 [00:57<00:00,  5.76s/it]


([0.6795843839645386,
  0.699249267578125,
  0.710032045841217,
  0.7148244976997375,
  0.7165431380271912,
  0.716803789138794,
  0.7164103388786316,
  0.7157447934150696,
  0.7149845361709595,
  0.7142083048820496],
 0.6220472440944882)

In [44]:
success_rate(model, test_set)

0.6299212598425197