In [1]:
from google.colab import drive
drive.mount('/content/drive')
!unzip drive/MyDrive/SDSC4016_hw2_colab/food-11.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: __MACOSX/food-11/testing/00/._2170.jpg  
  inflating: food-11/testing/00/1451.jpg  
  inflating: __MACOSX/food-11/testing/00/._1451.jpg  
  inflating: food-11/testing/00/0997.jpg  
  inflating: __MACOSX/food-11/testing/00/._0997.jpg  
  inflating: food-11/testing/00/2158.jpg  
  inflating: __MACOSX/food-11/testing/00/._2158.jpg  
  inflating: food-11/testing/00/0029.jpg  
  inflating: __MACOSX/food-11/testing/00/._0029.jpg  
  inflating: food-11/testing/00/1337.jpg  
  inflating: __MACOSX/food-11/testing/00/._1337.jpg  
  inflating: food-11/testing/00/1323.jpg  
  inflating: __MACOSX/food-11/testing/00/._1323.jpg  
  inflating: food-11/testing/00/1445.jpg  
  inflating: __MACOSX/food-11/testing/00/._1445.jpg  
  inflating: food-11/testing/00/0983.jpg  
  inflating: __MACOSX/food-11/testing/00/._0983.jpg  
  inflating: food-11/testing/00/1335.jpg  
  inflating: __MACOSX/food-11/testing/00/._1335.jpg  
  inflat

In [2]:
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import ConcatDataset, DataLoader, Subset
from torchvision.datasets import DatasetFolder
import shutil
from tqdm.auto import tqdm
from torchvision.models import resnet101

In [3]:
train_tfm_ = [
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    transforms.RandomRotation(40),
    transforms.RandomHorizontalFlip(p=0.8),
    transforms.RandomVerticalFlip(p=0.1),
    # transforms.GaussianBlur(21,10)
    # ToTensor() should be the last one of the transforms.
]

train_tfm = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.RandomApply(train_tfm_, p=0.9),
    transforms.ToTensor()
])

# We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# Batch size for training, validation, and testing.
# A greater batch size usually gives a more stable gradient.
# But the GPU memory is limited, so please adjust it carefully.
batch_size = 200

# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = DatasetFolder("/content/food-11/training/labeled", loader=lambda x: Image.open(x), extensions="jpg", transform=train_tfm)
valid_set = DatasetFolder("/content/food-11/validation", loader=lambda x: Image.open(x), extensions="jpg", transform=test_tfm)
unlabeled_set = DatasetFolder("/content/food-11/training/unlabeled", loader=lambda x: Image.open(x), extensions="jpg", transform=train_tfm)
test_set = DatasetFolder("/content/food-11/testing", loader=lambda x: Image.open(x), extensions="jpg", transform=test_tfm)


# Construct data loaders.
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [4]:
def get_pseudo_labels(dataset, model, threshold, batch_size=128):
    # This function generates pseudo-labels of a dataset using a given model.
    # It returns an instance of DatasetFolder containing images whose prediction confidences exceed a given threshold.
    # You are NOT allowed to use any models trained on external data for pseudo-labeling.
    k = 0
    image_path = '/content/food-11/labeling'
    if not os.path.exists(image_path):
        os.mkdir(image_path)
        
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Make sure the model is in eval mode.
    model.eval()
    
    # Define softmax function.
    softmax = nn.Softmax(dim=-1)

    unlabeled_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

    # Iterate over the dataset by batches.
    for batch in tqdm(unlabeled_loader):
        imgs, labels = batch

        # Forward the data
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))
        
        # Obtain the probability distributions by applying softmax on logits.
        probs = softmax(logits)
        
        for index, prob in enumerate(probs):
            for i in prob:
                if i > threshold:
                    label = torch.argmax(prob)
                    img = imgs[index]
                    img = transforms.ToPILImage()(img)
                    
                    if not os.path.exists(image_path + f'/{label}'):
                        os.mkdir(image_path + f'/{label}')
                    
                    k+=1
                    img.save(f"{image_path}/{label}/{k}.png")
                    break

    try:
        dataset = DatasetFolder("/content/food-11/labeling", loader=lambda x: Image.open(x), extensions="jpg", transform=None)
        
        # need to delete folder directory after create dataset
        os.rmdir(image_path)
        
        # Turn off the eval mode.
        model.train()
        
        return dataset
    
    except FileNotFoundError:
        print('No Class recognized by the model')
        
        # Turn off the eval mode.
        model.train()
        
        if os.path.exists(image_path):
            shutil.rmtree(image_path, ignore_errors = True)
        
        return None

In [5]:
# "cuda" only when GPUs are available.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize a model, and put it on the device specified.
model = resnet101(pretrained=False).to(device)
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Sequential(nn.Linear(2048, 11), nn.Softmax(dim=1))
model = model.to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=1e-5, momentum=0.9)

# The number of training epochs.
n_epochs = 60

# Whether to do semi-supervised learning.
do_semi = True

loss_record = {'train': [], 'dev': []}
acc_record = {'train': [], 'dev': []}

error = 1000
cnt = 0
early_stop = 10

for epoch in range(n_epochs):
    # In each epoch, relabel the unlabeled dataset for semi-supervised learning.
    # Then you can combine the labeled dataset and pseudo-labeled dataset for the training.
    if do_semi:
        # Obtain pseudo-labels for unlabeled data using trained model.
        try:
            if valid_acc:
                if valid_acc < 0.2:
                    threshold = 0.45
                elif valid_acc < 0.5:
                    threshold = 0.7
                elif valid_acc < 0.7:
                    threshold = 0.8
        except:
            threshold = 0.3
        pseudo_set = get_pseudo_labels(unlabeled_set, model, threshold)
        if pseudo_set == None:
            pass
        else: 
            # Construct a new dataset and a data loader for training.
            # This is used in semi-supervised learning only.
            concat_dataset = ConcatDataset([train_set, pseudo_set])
            train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)

    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    # Iterate the training set by batches.
    for batch in tqdm(train_loader):
        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        imgs = imgs.to(device)
        labels = labels.to(device)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs)

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels)

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels).float().mean()



  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

No Class recognized by the model


  0%|          | 0/15 [00:00<?, ?it/s]

In [6]:
# Make sure the model is in eval mode.
# Some modules like Dropout or BatchNorm affect if the model is in training mode.
model.eval()

# Initialize a list to store the predictions.
predictions = []

# Iterate the testing set by batches.
for batch in tqdm(test_loader):
    # A batch consists of image data and corresponding labels.
    # But here the variable "labels" is useless since we do not have the ground-truth.
    # If printing out the labels, you will find that it is always 0.
    # This is because the wrapper (DatasetFolder) returns images and labels for each batch,
    # so we have to create fake labels to make it work normally.
    imgs, labels = batch

    # We don't need gradient in testing, and we don't even have labels to compute loss.
    # Using torch.no_grad() accelerates the forward process.
    with torch.no_grad():
        logits = model(imgs.to(device))

    # Take the class with greatest logit as prediction and record it.
    predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())

  0%|          | 0/16 [00:00<?, ?it/s]

In [9]:
# Save predictions into the file.
with open("drive/MyDrive//SDSC4016_hw2_colab/predict.csv", "w") as f:

    # The first row must be "Id, Category"
    f.write("Id,Category\n")

    # For the rest of the rows, each image id corresponds to a predicted class.
    for i, pred in  enumerate(predictions):
         f.write(f"{i},{pred}\n")