# MLDL2 Homework 3

In [58]:
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import numpy as np
from torch.utils.data import Dataset
import torch.nn.functional as F
from tqdm.auto import tqdm
from torch.utils.data import Dataset, RandomSampler, Subset
import matplotlib.pyplot as plt


# 1. Load the Caltech UCSD Birds-200 Dataset

In [60]:
#This cell is for Dataloader for CUB200 data.

BATCH_SIZE =64
class CUB_Dataset(Dataset):
    def __init__(self,img_file, label_file, transform=None):
        self.img =np.load(img_file)
        self.labels = np.load(label_file)
        self.transform = transform

    def __len__(self):
        return len(self.img)

    def __getitem__(self, idx):
        image = self.img[idx]
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]

        return image,label

cub_bird_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

cub_train_dataset = CUB_Dataset(img_file="./CUB_train_images.npy",
                                        label_file="./CUB_train_labels.npy", transform=cub_bird_transform)
cub_train_loader = torch.utils.data.DataLoader(cub_train_dataset, batch_size=BATCH_SIZE, shuffle=True)

cub_val_dataset = CUB_Dataset(img_file="./CUB_val_images.npy",
                                        label_file="./CUB_val_labels.npy",transform=cub_bird_transform)
cub_val_loader = torch.utils.data.DataLoader(cub_val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [61]:
#In this training, I'll use mixmatch method. This cell is for augmentation of labeled image batch with stochastic aug method. 
def random_aug(batch):
    aug_data = []
    transform1 = transforms.Compose([transforms.Normalize(mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225], std=[1/0.229, 1/0.224, 1/0.225]),
                                                 transforms.ToPILImage(),
                                                 transforms.RandomGrayscale(p=1),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    transform2 = transforms.Compose([transforms.Normalize(mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225], std=[1/0.229, 1/0.224, 1/0.225]),
                                                 transforms.ToPILImage(),
                                                 transforms.RandomResizedCrop((224, 224)),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    transform3 = transforms.Compose([transforms.Normalize(mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225], std=[1/0.229, 1/0.224, 1/0.225]),
                                                 transforms.ToPILImage(),
                                                 transforms.RandomHorizontalFlip(p=1),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    transform4 = transforms.Compose([transforms.Normalize(mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225], std=[1/0.229, 1/0.224, 1/0.225]),
                transforms.ToPILImage(),
                                                 transforms.RandomVerticalFlip(p=1),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    transform5 = transforms.Compose([transforms.Normalize(mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225], std=[1/0.229, 1/0.224, 1/0.225]), transforms.ToPILImage(),
                                                 transforms.RandomRotation([-90, 90]),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
    
    transform7 = transforms.Compose([transforms.Normalize(mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225], std=[1/0.229, 1/0.224, 1/0.225]), transforms.ToPILImage(),
                                                 transforms.RandomAffine([-180, 180]),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                                ])    
    transform8 = transforms.Compose([transforms.Normalize(mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225], std=[1/0.229, 1/0.224, 1/0.225]), transforms.ToPILImage(),
                                                 transforms.ColorJitter(brightness=(0, 1), contrast=(0.2, 0.9), saturation= (0.5, 0.9), hue=(-0.2, 0.2)),
                                                  transforms.ToTensor(),
                                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                                ])    
    for data in batch:
            a= np.random.randint(0, 7, size =1)
            
            if a ==0:    
                aug_data.append(transform1(data))
            elif a ==1:
                aug_data.append(transform2(data))
            elif a ==2:
                aug_data.append(transform3(data))
            elif a ==3:
                aug_data.append(transform4(data))

            elif a == 4:            
                aug_data.append(transform5(data))
            elif a ==5:
                aug_data.append(data)
            elif a ==6:
                aug_data.append(transform7(data))
            elif a ==7:
                aug_data.append(transform8(data)) 

    aug_data = torch.stack(aug_data, dim =0) 
    aug_data = aug_data.view(-1, 3, 224 ,224)
    aug_data = aug_data.to(device)
    return aug_data           


In [13]:
#Before the training CUB-data, I'll pretrain the model with Cifar100. This cell is for Dataset & DataLoader for pre-training the model

BATCH_SIZE = 32

cifar10_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.244, 0.262))
    ])

cifar100_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.507, 0.487, 0.441), (0.267, 0.256, 0.276))
    ])

cifar10_train_dataset = datasets.CIFAR10(root="./data/", train=True, transform=cifar10_transform)
cifar100_train_dataset = datasets.CIFAR100(root="./data/", train=True, transform=cifar100_transform)

cifar10_train_loader = torch.utils.data.DataLoader(dataset=cifar10_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
cifar100_train_loader = torch.utils.data.DataLoader(dataset=cifar100_train_dataset, batch_size=64, shuffle=True)

In [63]:
#Dataset for unlabeled bird data in cifar 10.

BATCH_SIZE = 64


cifar10_train_dataset = datasets.CIFAR10(root="./data/", train=True)

bird_data = []

for i, data in enumerate(cifar10_train_dataset):
    if data[1] ==2:
        bird_data.append(data)
 

class Bird_dataset(Dataset):
    def __init__(self, data_file, transforms =None):
        self.data =data_file
        self.transforms = transforms
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        data = self.data[idx]
        if self.transforms:
            
            image = self.transforms(data[0])
        return image
    
transform = transforms.Compose([transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.244, 0.262))
    ])
Bird_dataset = Bird_dataset(bird_data, transform)
cifar_bird_loader = torch.utils.data.DataLoader(Bird_dataset, batch_size=BATCH_SIZE, shuffle=True)



# 3. Define the Model Architecture

Here we define the model. Below is very simple model with CNN. You can customize your own model and note that you are not limited to use any methods. **But you are not allowed to use pretrained weight**

In [64]:
#This cell is for augmentation of unlabeled image batch with stochastic augmentation method in mixmatch.
    
def batch_aug(batch): #This function is for stochastic augmentation for unlabeled data.# I chose K=2 as parameter.
    aug_data = []
    transform1 = transforms.Compose([transforms.Normalize(mean=[-0.491/0.247, -0.482/0.244, -0.447/0.262], std=[1/0.247, 1/0.244, 1/0.262]),
                                                 transforms.ToPILImage(),
                                                 transforms.RandomGrayscale(p=1),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.244, 0.262))])

    transform2 = transforms.Compose([transforms.Normalize(mean=[-0.491/0.247, -0.482/0.244, -0.447/0.262], std=[1/0.247, 1/0.244, 1/0.262]),
                                                 transforms.ToPILImage(),
                                                 transforms.RandomHorizontalFlip(p=0.5),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.244, 0.262))])

    transform4 = transforms.Compose([transforms.Normalize(mean=[-0.491/0.247, -0.482/0.244, -0.447/0.262], std=[1/0.247, 1/0.244, 1/0.262]),
                                                 transforms.ToPILImage(),
                                                 transforms.RandomResizedCrop((224, 224)),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.244, 0.262))])
    transform5 = transforms.Compose([transforms.Normalize(mean=[-0.491/0.247, -0.482/0.244, -0.447/0.262], std=[1/0.247, 1/0.244, 1/0.262]),
                                                 transforms.ToPILImage(),
                                                 transforms.RandomVerticalFlip(p=1),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.244, 0.262))])

    transform6 = transforms.Compose([transforms.Normalize(mean=[-0.491/0.247, -0.482/0.244, -0.447/0.262], std=[1/0.247, 1/0.244, 1/0.262]),
                                                 transforms.ToPILImage(),
                                                 transforms.RandomHorizontalFlip(p=0.5),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.244, 0.262))])
    transform7 = transforms.Compose([transforms.Normalize(mean=[-0.491/0.247, -0.482/0.244, -0.447/0.262], std=[1/0.247, 1/0.244, 1/0.262]),
                                                 transforms.ToPILImage(),
                                                transforms.RandomAffine([-180, 180]),
                                                 transforms.ToTensor(),
                                                 transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.244, 0.262))])

    transform8 = transforms.Compose([transforms.Normalize(mean=[-0.491/0.247, -0.482/0.244, -0.447/0.262], std=[1/0.247, 1/0.244, 1/0.262]),
                                                 transforms.ToPILImage(),
                                                 transforms.ColorJitter(brightness=(0, 1), contrast=(0.4, 0.8), saturation= (0.7, 0.9), hue=(-0.2, 0.2)),
                                                  transforms.ToTensor(),
                                                 transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.244, 0.262))])

    for data in batch:
        data = data.to('cpu')
        a= np.random.randint(0, 4, size =2)
        if a[0] ==0:    
            aug_data.append(transform1(data))
        elif a[0] ==1:
            aug_data.append(transform2(data))
        elif a[0] ==2:
            aug_data.append(data)
        elif a[0] ==3:
            aug_data.append(transform4(data))

        if a[1] == 0:            
            aug_data.append(transform5(data))
        elif a[1] ==1:
            aug_data.append(transform6(data))
        elif a[1] ==2:
            aug_data.append(transform7(data))
        elif a[1] ==3:
            aug_data.append(transform8(data)) 

    aug_data = torch.stack(aug_data, dim =0) 
    aug_data = aug_data.view(-1, 3, 224 ,224)
    aug_data = aug_data.to(device)
    return aug_data   

def making_guessed_batch(unlabeled_batch_inputs, model, T, trans_num): #This function is for making guessed_label
        
    unlabeled_aug_input2 = batch_aug(unlabeled_batch_inputs)
    unlabeled_aug_input2= unlabeled_aug_input2.to(device)
    unlabeled_aug_outputs = model(unlabeled_aug_input2)
    
    softmax = nn.Softmax(dim=1)

    unlabeled_aug_output = softmax(unlabeled_aug_outputs)
    
    #Get meaned guessed label for same unlabeled data
    unlabeled_aug_outputs = unlabeled_aug_output.view(-1, trans_num, 200)
    unlabeled_mean_tensor = unlabeled_aug_outputs.mean(dim=1)
    
    # #sharpening the guessed label

    unlabeled_mean_tensor = torch.log(unlabeled_mean_tensor)
    unlabeled_mean_tensor = unlabeled_mean_tensor / T
    unlabeled_mean_tensor = softmax(unlabeled_mean_tensor)
    
    unlabeled_mean_tensor_unsqueezed =  unlabeled_mean_tensor.unsqueeze(1)

    unlabeled_mean_tensor_unsqueezed2 = unlabeled_mean_tensor_unsqueezed.repeat(1, trans_num, 1)

    result_guess_tensor = unlabeled_mean_tensor_unsqueezed2.view(-1, 200)
    
    return unlabeled_aug_input2, result_guess_tensor        

def mix_up(input1, target1, input2, target2): #This function is for mix up step of mixmatch

    one_hot_target= torch.nn.functional.one_hot(target1, num_classes=200)

    input1 = input1.to(device)
    
    one_hot_target= one_hot_target.to(device)
    
    all_inputs = torch.cat([input1, input2], dim=0)
    all_targets = torch.cat([one_hot_target, target2], dim=0)
    
    idx = torch.randperm(all_inputs.size(0))
    
    input_a, input_b = all_inputs, all_inputs[idx]
    target_a, target_b = all_targets, all_targets[idx]

    mixed_input = []
    mixed_target = []
    for i in range(len(all_inputs)):
        l = np.random.beta(0.75, 0.75)
        l = max(l, 1-l)
        a= l * input_a[i] + (1 - l) * input_b[i]
        b= l * target_a[i] + (1 - l) * target_b[i]
        mixed_input.append(a)
        mixed_target.append(b)

    mixed_input = torch.stack(mixed_input, dim =0)
    mixed_input = mixed_input.view(-1, 3, 224, 224)
    mixed_target = torch.stack(mixed_target, dim =0)
    mixed_target = mixed_target.view(-1, 200)

    labeled_img, unlabeled_img= torch.split(mixed_input, [input1.size(0), input2.size(0)], dim =0)
    labeled_target, unlabeled_target= torch.split(mixed_target, [input1.size(0), input2.size(0)], dim =0)
    
    
    return labeled_img, labeled_target, unlabeled_img , unlabeled_target 

#This code is for random selection of unlabeled batch for each labeled batch

def get_random_indices(dataset, num_samples):
    sampler = RandomSampler(dataset, replacement=False, num_samples=num_samples)
    indices = list(sampler)
    return indices

def get_random_subset(dataset, num_samples):
    random_indices = get_random_indices(dataset, num_samples)
    subset = Subset(dataset, random_indices)
    subset_data = [subset[i] for i in range(len(subset))]
    return torch.stack(subset_data)

In [71]:
# Baseline of the Model is Alexnet. I tuned the # of parameter as dataset is not much huge in CUB-200 
from torchsummary import summary

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(12, 12), stride=4, padding=0)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=124, kernel_size=(5, 5), stride=1, padding=2)
        self.pool2 = nn.MaxPool2d(3, 2)
        self.conv3 = nn.Conv2d(in_channels=124, out_channels=168, kernel_size=(3, 3), stride=1, padding=1)
        self.conv4 = nn.Conv2d(in_channels=168, out_channels=168, kernel_size=(3, 3), stride=1, padding=1)
        self.conv5 = nn.Conv2d(in_channels=168, out_channels=64, kernel_size=(3, 3), stride=1, padding=0)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 5 * 5, 700)
        self.fc2 = nn.Linear(700, 100)
        self.drop1 = nn.Dropout(p=0.4)
        self.bn = torch.nn.BatchNorm1d(700)


    def forward(self, x):
        x = self.conv1(x) #(64, 54, 54)

        x= F.relu(x)
        x= self.pool(x) #(64, 27, 27)
        x= self.conv2(x) #(124, 27, 27)

        x= F.relu(x)
        x= self.pool(x) #(124, 13, 13)
        x= self.conv3(x) #(168, 13, 13)
        # x=self.bn_conv3(x)
        x= F.relu(x)
        x= self.conv4(x) #(168, 13, 13)
        # x= self.bn_conv3(x)
        x= F.relu(x)
        x= self.conv5(x) #(64, 11, 11)|
        x= F.relu(x)
        x= self.pool2(x) #(64, 5, 5)

        x = x.view(-1, 64*5*5) 
        x=self.fc1(x)
        x= self.bn(x)
        x = self.drop1(F.relu(x))
        x = self.fc2(x)
        return x
    
device = torch.device('mps')
print(device)
model = Net()
summary(model, ( 3, 224, 224))
model.to(device)


mps
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 54, 54]          27,712
         MaxPool2d-2           [-1, 64, 27, 27]               0
            Conv2d-3          [-1, 124, 27, 27]         198,524
         MaxPool2d-4          [-1, 124, 13, 13]               0
            Conv2d-5          [-1, 168, 13, 13]         187,656
            Conv2d-6          [-1, 168, 13, 13]         254,184
            Conv2d-7           [-1, 64, 11, 11]          96,832
         MaxPool2d-8             [-1, 64, 5, 5]               0
            Linear-9                  [-1, 700]       1,120,700
      BatchNorm1d-10                  [-1, 700]           1,400
          Dropout-11                  [-1, 700]               0
           Linear-12                  [-1, 100]          70,100
Total params: 1,957,108
Trainable params: 1,957,108
Non-trainable params: 0
-----------------------

Net(
  (conv1): Conv2d(3, 64, kernel_size=(12, 12), stride=(4, 4))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 124, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(124, 168, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(168, 168, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(168, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1600, out_features=700, bias=True)
  (fc2): Linear(in_features=700, out_features=100, bias=True)
  (drop1): Dropout(p=0.4, inplace=False)
  (bn): BatchNorm1d(700, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

# 4. Train the network

In [256]:
#Pre-training model by cifar100 data

import warnings
warnings.filterwarnings('ignore')

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas = (0.9, 0.999), weight_decay=5e-4) 

for epoch in range(20):  
    n_iter = 0
    loss_total = 0
    acc_total = 0
    model.train()
    for data in tqdm(cifar100_train_loader):
        inputs, targets = data[0].to(device), data[1].to(device)
        outputs = model(inputs)

        optimizer.zero_grad()
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        pred = outputs.argmax(dim=-1)
        correct = pred.eq(targets).float().sum()
        acc = correct / targets.shape[0]
        loss_total += loss.item()
        acc_total += acc
        n_iter += 1
    print(f"Epoch: {epoch+1} - Loss: {loss_total/n_iter:.3f} - Acc: {acc_total/n_iter:.3f}")

print('Finished Training')

PATH = "./model_cifar100.pth"
torch.save(model.state_dict(), PATH)


100%|██████████| 782/782 [01:24<00:00,  9.27it/s]


Epoch: 1 - Loss: 2.913 - Acc: 0.279


100%|██████████| 782/782 [01:24<00:00,  9.30it/s]


Epoch: 2 - Loss: 2.635 - Acc: 0.332


100%|██████████| 782/782 [01:24<00:00,  9.30it/s]


Epoch: 3 - Loss: 2.454 - Acc: 0.373


100%|██████████| 782/782 [01:24<00:00,  9.29it/s]


Epoch: 4 - Loss: 2.301 - Acc: 0.406


100%|██████████| 782/782 [01:24<00:00,  9.28it/s]


Epoch: 5 - Loss: 2.164 - Acc: 0.432


100%|██████████| 782/782 [01:24<00:00,  9.27it/s]


Epoch: 6 - Loss: 2.056 - Acc: 0.457


100%|██████████| 782/782 [01:24<00:00,  9.30it/s]


Epoch: 7 - Loss: 1.961 - Acc: 0.475


100%|██████████| 782/782 [01:24<00:00,  9.31it/s]


Epoch: 8 - Loss: 1.881 - Acc: 0.493


100%|██████████| 782/782 [01:24<00:00,  9.29it/s]


Epoch: 9 - Loss: 1.799 - Acc: 0.512


100%|██████████| 782/782 [01:23<00:00,  9.31it/s]


Epoch: 10 - Loss: 1.740 - Acc: 0.526


100%|██████████| 782/782 [01:24<00:00,  9.27it/s]


Epoch: 11 - Loss: 1.667 - Acc: 0.541


100%|██████████| 782/782 [01:24<00:00,  9.25it/s]


Epoch: 12 - Loss: 1.619 - Acc: 0.555


100%|██████████| 782/782 [01:25<00:00,  9.15it/s]


Epoch: 13 - Loss: 1.564 - Acc: 0.566


100%|██████████| 782/782 [01:23<00:00,  9.33it/s]


Epoch: 14 - Loss: 1.530 - Acc: 0.575


100%|██████████| 782/782 [01:23<00:00,  9.31it/s]


Epoch: 15 - Loss: 1.482 - Acc: 0.585


100%|██████████| 782/782 [01:23<00:00,  9.32it/s]


Epoch: 16 - Loss: 1.442 - Acc: 0.595


100%|██████████| 782/782 [01:24<00:00,  9.30it/s]


Epoch: 17 - Loss: 1.405 - Acc: 0.604


100%|██████████| 782/782 [01:23<00:00,  9.32it/s]


Epoch: 18 - Loss: 1.373 - Acc: 0.613


100%|██████████| 782/782 [01:23<00:00,  9.32it/s]


Epoch: 19 - Loss: 1.343 - Acc: 0.618


100%|██████████| 782/782 [01:23<00:00,  9.33it/s]

Epoch: 20 - Loss: 1.311 - Acc: 0.626
Finished Training





In [72]:
#Tuning the last layer of model output by # of class of CUB-200 
model = Net()
PATH = "./model_cifar100_train.pth"
model.load_state_dict(torch.load(PATH))
model.fc2 = nn.Linear(model.fc2.in_features, 200)

summary(model.to('cpu'), ( 3, 224, 224))
model.to(device)


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 54, 54]          27,712
         MaxPool2d-2           [-1, 64, 27, 27]               0
            Conv2d-3          [-1, 124, 27, 27]         198,524
         MaxPool2d-4          [-1, 124, 13, 13]               0
            Conv2d-5          [-1, 168, 13, 13]         187,656
            Conv2d-6          [-1, 168, 13, 13]         254,184
            Conv2d-7           [-1, 64, 11, 11]          96,832
         MaxPool2d-8             [-1, 64, 5, 5]               0
            Linear-9                  [-1, 700]       1,120,700
      BatchNorm1d-10                  [-1, 700]           1,400
          Dropout-11                  [-1, 700]               0
           Linear-12                  [-1, 200]         140,200
Total params: 2,027,208
Trainable params: 2,027,208
Non-trainable params: 0
---------------------------

Net(
  (conv1): Conv2d(3, 64, kernel_size=(12, 12), stride=(4, 4))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 124, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(124, 168, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(168, 168, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(168, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1600, out_features=700, bias=True)
  (fc2): Linear(in_features=700, out_features=200, bias=True)
  (drop1): Dropout(p=0.4, inplace=False)
  (bn): BatchNorm1d(700, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [73]:
#Training model for 50 epoch

import warnings
warnings.filterwarnings('ignore')
EPOCH = 50
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas = (0.9, 0.999) , weight_decay = 5e-4) 

for epoch in range(EPOCH):  
    n_iter = 0
    loss_total = 0
    acc_total = 0
    model.train()
    
    for data in tqdm(cub_train_loader):
        optimizer.zero_grad()
        inputs1, labels1 = data
        length = len(inputs1)
        labels1 = labels1.to(device)
        inputs2= get_random_subset(Bird_dataset, length) #Get unlabeled image batch for each labeled batch
        
        aug_input1 = random_aug(inputs1) # Stochastic augmentation of image of labeled batch
        aug_input1  = aug_input1.to(device)
        labels1 = labels1.to(device)
        inputs2 = inputs2.to(device)

        model.eval()
        with torch.no_grad():
            unlabeled_img, unlabeled_label = making_guessed_batch(inputs2, model, 0.5, 2) #Get sharpen guess label of augmented unlabeled data.

            aug_label_img , aug_label_tar, aug_unlabel_img , aug_unlabel_tar = mix_up( aug_input1, labels1, unlabeled_img, unlabeled_label)
            #Get mixup data set for augmented labeled batch and augmented unlabeled batch
        model.train()
        
        output1 = model(aug_label_img)

        loss1 = criterion1(output1, aug_label_tar) 

        output2 = model(aug_unlabel_img)

        loss2 = criterion2(output2, aug_unlabel_tar)

        loss = loss1  +150*(1/200)*np.clip(epoch /EPOCH, 0.0, 1.0)* loss2
        
        loss.backward()
        optimizer.step()
        loss_total += loss.item()
        n_iter += 1

    print(f"Epoch: {epoch+1} - Loss: {loss_total/n_iter:.3f}")
    
    correct = 0
    total = 0
    with torch.no_grad():
        model.eval()
        for data in cub_val_loader:
            images, labels = data
            outputs = model(images.to(device))
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted.cpu() == labels).sum().item()

        print(f'Accuracy of the network on the 2897 validation images: {100 * correct / total:.2f} %')

print('Finished Training')

100%|██████████| 94/94 [01:21<00:00,  1.15it/s]


Epoch: 1 - Loss: 5.198
Accuracy of the network on the 2897 validation images: 5.28 %


100%|██████████| 94/94 [01:20<00:00,  1.17it/s]


Epoch: 2 - Loss: 4.910
Accuracy of the network on the 2897 validation images: 7.70 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 3 - Loss: 4.833
Accuracy of the network on the 2897 validation images: 10.18 %


100%|██████████| 94/94 [01:19<00:00,  1.19it/s]


Epoch: 4 - Loss: 4.738
Accuracy of the network on the 2897 validation images: 10.22 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 5 - Loss: 4.648
Accuracy of the network on the 2897 validation images: 13.46 %


100%|██████████| 94/94 [01:19<00:00,  1.19it/s]


Epoch: 6 - Loss: 4.551
Accuracy of the network on the 2897 validation images: 12.43 %


100%|██████████| 94/94 [01:18<00:00,  1.20it/s]


Epoch: 7 - Loss: 4.497
Accuracy of the network on the 2897 validation images: 14.29 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 8 - Loss: 4.474
Accuracy of the network on the 2897 validation images: 15.50 %


100%|██████████| 94/94 [01:18<00:00,  1.20it/s]


Epoch: 9 - Loss: 4.437
Accuracy of the network on the 2897 validation images: 14.12 %


100%|██████████| 94/94 [01:19<00:00,  1.19it/s]


Epoch: 10 - Loss: 4.384
Accuracy of the network on the 2897 validation images: 16.74 %


100%|██████████| 94/94 [01:19<00:00,  1.19it/s]


Epoch: 11 - Loss: 4.341
Accuracy of the network on the 2897 validation images: 15.71 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 12 - Loss: 4.326
Accuracy of the network on the 2897 validation images: 15.74 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 13 - Loss: 4.282
Accuracy of the network on the 2897 validation images: 17.09 %


100%|██████████| 94/94 [01:18<00:00,  1.19it/s]


Epoch: 14 - Loss: 4.238
Accuracy of the network on the 2897 validation images: 18.57 %


100%|██████████| 94/94 [01:18<00:00,  1.19it/s]


Epoch: 15 - Loss: 4.213
Accuracy of the network on the 2897 validation images: 17.36 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 16 - Loss: 4.176
Accuracy of the network on the 2897 validation images: 18.12 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 17 - Loss: 4.136
Accuracy of the network on the 2897 validation images: 17.67 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 18 - Loss: 4.120
Accuracy of the network on the 2897 validation images: 18.23 %


100%|██████████| 94/94 [01:18<00:00,  1.19it/s]


Epoch: 19 - Loss: 4.101
Accuracy of the network on the 2897 validation images: 18.16 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 20 - Loss: 4.112
Accuracy of the network on the 2897 validation images: 19.47 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 21 - Loss: 4.146
Accuracy of the network on the 2897 validation images: 21.33 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 22 - Loss: 4.219
Accuracy of the network on the 2897 validation images: 20.06 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 23 - Loss: 4.007
Accuracy of the network on the 2897 validation images: 21.95 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 24 - Loss: 4.035
Accuracy of the network on the 2897 validation images: 20.06 %


100%|██████████| 94/94 [01:20<00:00,  1.17it/s]


Epoch: 25 - Loss: 3.968
Accuracy of the network on the 2897 validation images: 19.02 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 26 - Loss: 3.952
Accuracy of the network on the 2897 validation images: 19.57 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 27 - Loss: 3.964
Accuracy of the network on the 2897 validation images: 20.12 %


100%|██████████| 94/94 [01:20<00:00,  1.17it/s]


Epoch: 28 - Loss: 3.981
Accuracy of the network on the 2897 validation images: 20.23 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 29 - Loss: 4.014
Accuracy of the network on the 2897 validation images: 20.88 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 30 - Loss: 3.948
Accuracy of the network on the 2897 validation images: 23.13 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 31 - Loss: 3.898
Accuracy of the network on the 2897 validation images: 21.61 %


100%|██████████| 94/94 [01:20<00:00,  1.17it/s]


Epoch: 32 - Loss: 3.873
Accuracy of the network on the 2897 validation images: 22.13 %


100%|██████████| 94/94 [01:17<00:00,  1.21it/s]


Epoch: 33 - Loss: 3.857
Accuracy of the network on the 2897 validation images: 21.40 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 34 - Loss: 3.849
Accuracy of the network on the 2897 validation images: 23.40 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 35 - Loss: 3.868
Accuracy of the network on the 2897 validation images: 22.20 %


100%|██████████| 94/94 [01:19<00:00,  1.19it/s]


Epoch: 36 - Loss: 3.799
Accuracy of the network on the 2897 validation images: 22.47 %


100%|██████████| 94/94 [01:18<00:00,  1.20it/s]


Epoch: 37 - Loss: 3.786
Accuracy of the network on the 2897 validation images: 23.37 %


100%|██████████| 94/94 [01:18<00:00,  1.19it/s]


Epoch: 38 - Loss: 3.891
Accuracy of the network on the 2897 validation images: 21.57 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 39 - Loss: 3.893
Accuracy of the network on the 2897 validation images: 22.75 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 40 - Loss: 3.777
Accuracy of the network on the 2897 validation images: 24.65 %


100%|██████████| 94/94 [01:20<00:00,  1.17it/s]


Epoch: 41 - Loss: 3.824
Accuracy of the network on the 2897 validation images: 21.19 %


100%|██████████| 94/94 [01:20<00:00,  1.17it/s]


Epoch: 42 - Loss: 3.747
Accuracy of the network on the 2897 validation images: 24.30 %


100%|██████████| 94/94 [01:20<00:00,  1.16it/s]


Epoch: 43 - Loss: 3.720
Accuracy of the network on the 2897 validation images: 23.16 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 44 - Loss: 3.731
Accuracy of the network on the 2897 validation images: 22.78 %


100%|██████████| 94/94 [01:20<00:00,  1.16it/s]


Epoch: 45 - Loss: 3.975
Accuracy of the network on the 2897 validation images: 20.57 %


100%|██████████| 94/94 [01:20<00:00,  1.17it/s]


Epoch: 46 - Loss: 3.750
Accuracy of the network on the 2897 validation images: 23.99 %


100%|██████████| 94/94 [01:20<00:00,  1.16it/s]


Epoch: 47 - Loss: 3.703
Accuracy of the network on the 2897 validation images: 23.47 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 48 - Loss: 3.694
Accuracy of the network on the 2897 validation images: 24.68 %


100%|██████████| 94/94 [01:21<00:00,  1.15it/s]


Epoch: 49 - Loss: 3.686
Accuracy of the network on the 2897 validation images: 22.68 %


100%|██████████| 94/94 [01:20<00:00,  1.17it/s]


Epoch: 50 - Loss: 3.660
Accuracy of the network on the 2897 validation images: 24.99 %
Finished Training


In [74]:
#Training model for 10 epoch more.

EPOCH = 10
for epoch in range(EPOCH):  
    n_iter = 0
    loss_total = 0
    acc_total = 0
    model.train()
    
    for data in tqdm(cub_train_loader):
        optimizer.zero_grad()
        inputs1, labels1 = data
        length = len(inputs1)
        labels1 = labels1.to(device)
        inputs2= get_random_subset(Bird_dataset, length) #Get unlabeled image batch for each labeled batch
        
        aug_input1 = random_aug(inputs1) # Stochastic augmentation of image of labeled batch
        aug_input1  = aug_input1.to(device)
        labels1 = labels1.to(device)
        inputs2 = inputs2.to(device)

        model.eval()
        with torch.no_grad():
            unlabeled_img, unlabeled_label = making_guessed_batch(inputs2, model, 0.5, 2) #Get sharpen guess label of augmented unlabeled data.

            aug_label_img , aug_label_tar, aug_unlabel_img , aug_unlabel_tar = mix_up( aug_input1, labels1, unlabeled_img, unlabeled_label)
            #Get mixup data set for augmented labeled batch and augmented unlabeled batch
        model.train()
        
        output1 = model(aug_label_img)

        loss1 = criterion1(output1, aug_label_tar) 

        output2 = model(aug_unlabel_img)

        loss2 = criterion2(output2, aug_unlabel_tar)

        loss = loss1  +150*(1/200)*np.clip(epoch /EPOCH, 0.0, 1.0)* loss2
        
        loss.backward()
        optimizer.step()
        loss_total += loss.item()
        n_iter += 1

    print(f"Epoch: {epoch+1} - Loss: {loss_total/n_iter:.3f}")
    
    correct = 0
    total = 0
    with torch.no_grad():
        model.eval()
        for data in cub_val_loader:
            images, labels = data
            outputs = model(images.to(device))
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted.cpu() == labels).sum().item()

        print(f'Accuracy of the network on the 2897 validation images: {100 * correct / total:.2f} %')

print('Finished Training')

100%|██████████| 94/94 [01:20<00:00,  1.17it/s]


Epoch: 1 - Loss: 3.628
Accuracy of the network on the 2897 validation images: 25.20 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 2 - Loss: 3.635
Accuracy of the network on the 2897 validation images: 24.47 %


100%|██████████| 94/94 [01:19<00:00,  1.19it/s]


Epoch: 3 - Loss: 3.609
Accuracy of the network on the 2897 validation images: 23.23 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 4 - Loss: 3.587
Accuracy of the network on the 2897 validation images: 24.85 %


100%|██████████| 94/94 [01:19<00:00,  1.19it/s]


Epoch: 5 - Loss: 3.615
Accuracy of the network on the 2897 validation images: 24.51 %


100%|██████████| 94/94 [01:19<00:00,  1.19it/s]


Epoch: 6 - Loss: 3.583
Accuracy of the network on the 2897 validation images: 24.96 %


100%|██████████| 94/94 [01:18<00:00,  1.19it/s]


Epoch: 7 - Loss: 3.615
Accuracy of the network on the 2897 validation images: 23.92 %


100%|██████████| 94/94 [01:17<00:00,  1.21it/s]


Epoch: 8 - Loss: 3.591
Accuracy of the network on the 2897 validation images: 25.23 %


100%|██████████| 94/94 [01:20<00:00,  1.17it/s]


Epoch: 9 - Loss: 3.573
Accuracy of the network on the 2897 validation images: 25.03 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 10 - Loss: 3.620
Accuracy of the network on the 2897 validation images: 24.82 %
Finished Training


In [75]:
#Training model for 10 epoch more.

EPOCH = 5
for epoch in range(EPOCH):  
    n_iter = 0
    loss_total = 0
    acc_total = 0
    model.train()
    
    for data in tqdm(cub_train_loader):
        optimizer.zero_grad()
        inputs1, labels1 = data
        length = len(inputs1)
        labels1 = labels1.to(device)
        inputs2= get_random_subset(Bird_dataset, length) #Get unlabeled image batch for each labeled batch
        
        aug_input1 = random_aug(inputs1) # Stochastic augmentation of image of labeled batch
        aug_input1  = aug_input1.to(device)
        labels1 = labels1.to(device)
        inputs2 = inputs2.to(device)

        model.eval()
        with torch.no_grad():
            unlabeled_img, unlabeled_label = making_guessed_batch(inputs2, model, 0.5, 2) #Get sharpen guess label of augmented unlabeled data.

            aug_label_img , aug_label_tar, aug_unlabel_img , aug_unlabel_tar = mix_up( aug_input1, labels1, unlabeled_img, unlabeled_label)
            #Get mixup data set for augmented labeled batch and augmented unlabeled batch
        model.train()
        
        output1 = model(aug_label_img)

        loss1 = criterion1(output1, aug_label_tar) 

        output2 = model(aug_unlabel_img)

        loss2 = criterion2(output2, aug_unlabel_tar)

        loss = loss1  +150*(1/200)* loss2
        
        loss.backward()
        optimizer.step()
        loss_total += loss.item()
        n_iter += 1

    print(f"Epoch: {epoch+1} - Loss: {loss_total/n_iter:.3f}")
    
    correct = 0
    total = 0
    with torch.no_grad():
        model.eval()
        for data in cub_val_loader:
            images, labels = data
            outputs = model(images.to(device))
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted.cpu() == labels).sum().item()

        print(f'Accuracy of the network on the 2897 validation images: {100 * correct / total:.2f} %')

print('Finished Training')

100%|██████████| 94/94 [01:18<00:00,  1.20it/s]


Epoch: 1 - Loss: 3.540
Accuracy of the network on the 2897 validation images: 24.20 %


100%|██████████| 94/94 [01:13<00:00,  1.28it/s]


Epoch: 2 - Loss: 3.739
Accuracy of the network on the 2897 validation images: 24.89 %


100%|██████████| 94/94 [01:15<00:00,  1.24it/s]


Epoch: 3 - Loss: 3.724
Accuracy of the network on the 2897 validation images: 24.75 %


100%|██████████| 94/94 [01:19<00:00,  1.18it/s]


Epoch: 4 - Loss: 3.540
Accuracy of the network on the 2897 validation images: 25.51 %


100%|██████████| 94/94 [01:20<00:00,  1.17it/s]


Epoch: 5 - Loss: 3.609
Accuracy of the network on the 2897 validation images: 24.40 %
Finished Training


# 6. Test and Submit

You can modify your TestDataset, but you should be mindful to align it with the training dataset and its transformations.

In [68]:
class TestDataset(Dataset):
    def __init__(self, img_file, transform=None):
        self.img =np.load(img_file)
        self.transform = transform

    def __len__(self):
        return len(self.img)

    def __getitem__(self, idx):
        image = self.img[idx]
        if self.transform is not None:
            image = self.transform(image)

        return image

test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

test_dataset = TestDataset(img_file="./CUB_test_images.npy",transform=test_transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE,
                                         shuffle=False)

## **Do not modify the cell below!!!!**


In [76]:
def test(model, test_loader):
  model.eval()
  test_predictions = []

  with torch.inference_mode():
      for i, data in enumerate(tqdm(test_loader)):
          data = data.float().to(device)
          output = model(data)
          test_predictions.append(output.cpu())

  return torch.cat(test_predictions, dim=0)

In [77]:
# Save test output npy file
predictions = test(model, test_loader)
np.save('./ID_NAME_HW3', predictions.numpy())

100%|██████████| 46/46 [00:04<00:00,  9.78it/s]
