In [22]:
import torch
from torch import optim
from torch import Tensor
import matplotlib.pyplot as plt
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
import torchvision.transforms as tt
import torch.nn as nn
import torch.nn.functional as F 

from utils import *
import config
import random

from typing import Type

from Classify import Classifier

from PIL import Image

from tqdm import tqdm

In [23]:
print_config()

RANDOM_SEED   :  11042004
DATA_DIR      :    ./data
USED_DATA     :     MNIST
NUM_LABELLED  :      1000
DEVICE        :    cuda:0
EPOCHS        :        50
BATCH_SIZE    :        64
LEARNING_RATE :      0.01
SCHED         :      True
GAN_BATCH_SIZE:       128


In [24]:
set_random_seed(config.RANDOM_SEED)
random.seed(config.RANDOM_SEED)

Setting seeds ...... 



In [25]:
name = "GANSSL"

In [5]:
PATH = get_PATH(name)
PATH

'MNIST/CNN/_1000'

In [6]:
class ToPILImage():
    def __init__(self, mode=None):
        self.tt = tt.ToPILImage(mode)

    def __call__(self, pic):
        """
        Args:
            pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.

        Returns:
            PIL Image: Image converted to PIL Image.

        """
        if type(pic) == Image.Image:
            return pic
        else:
            return self.tt(pic)

    def __repr__(self) -> str:
        return self.tt.__repr__()

In [7]:
mean = [0.5]*3
std = [0.5]*3
if config.USED_DATA == "CIFAR10":

	train_tfm = tt.Compose([
		ToPILImage(),
		tt.RandomCrop(32, padding=4, padding_mode='edge'),
		tt.RandomHorizontalFlip(),
		tt.ToTensor(),
		tt.Normalize(mean, std, inplace=True)
	])

if config.USED_DATA == "MNIST":
	train_tfm = tt.Compose([
		tt.Normalize(mean, std, inplace=True)
	])

test_tfm = tt.Compose([
	ToPILImage(),
	tt.ToTensor(),
	tt.Normalize(mean, std)
])

In [8]:
train_ds, test_ds, classes = load_data(train_tfm, test_tfm)

In [9]:
X_full = train_ds.data
y_full = train_ds.targets

In [13]:
test_ds.data.shape

torch.Size([10000, 28, 28])

In [10]:
classes

['0 - zero',
 '1 - one',
 '2 - two',
 '3 - three',
 '4 - four',
 '5 - five',
 '6 - six',
 '7 - seven',
 '8 - eight',
 '9 - nine']

In [11]:
n_classes = len(classes)
channels = 1 # MNIST
n_classes, channels

(10, 1)

In [12]:
# test dataloader
test_dl = CreateDataLoader(Tensor(test_ds.data).permute(0, 3, 1, 2), torch.ByteTensor(test_ds.targets), 512, test_tfm, device=config.DEVICE)

RuntimeError: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 3 is not equal to len(dims) = 4

In [None]:
print(test_ds.data.shape)

(10000, 32, 32, 3)


In [None]:
X_sup, y_sup, X_unsup, _ = supervised_samples(Tensor(X_full), Tensor(y_full), config.NUM_LABELLED, n_classes, get_unsup=True)

In [None]:
X_sup = X_sup.permute(0, 3, 1, 2)
X_unsup = X_unsup.permute(0,3, 1, 2)

torch.Size([49000, 32, 32, 3])


In [None]:
model = Classifier(channels, n_classes).to(config.DEVICE, non_blocking=True)
model

Classifier(
  (conv): ConvModel(
    (initial): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
    )
    (Conv): Sequential(
      (0): ConvBn(
        (Conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (Bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): ReLU(inplace=True)
        (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (1): ConvBn(
        (Conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (Bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): ReLU(inplace=True)
        (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (2): ConvBn(
        (Conv): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (Bn): BatchNorm2d(512, eps=1e-05

In [None]:
import copy 
from torch import ByteTensor

In [None]:
class SelfTraining: 
    def __init__(self, model: Classifier, X_sup: Tensor, y_sup: Tensor, X_unsup: Tensor, test_dl: DeviceDataLoader, transform, num_rounds): 
        '''
            Input of self-training model:
            model: classifier
            num_rounds: number of self_training rounds
            sup_samples: number of supervised samples
        '''
        self.model = model 
        self.X_sup = X_sup
        self.y_sup = y_sup
        self.X_unsup = X_unsup
        self.transform = transform
        self.test_dataloader = test_dl
        
        self.num_rounds = num_rounds
    

    def CalDisagreement(self, h1: Classifier, h2: Classifier, dataset: CustomDataSet): 
        '''
            Calculate disagreement between teacher model and student model
            h1: Teacher model 
            h2: Student model
        '''
        disagreement = 0
        for x, _ in dataset: 
            disagreement += (torch.argmax(h1(x.unsqueeze(0))) == torch.argmax(h2(x.unsqueeze(0))))
        
        return disagreement/len(dataset)

    def random_sampling(self, sample_fraction: float, dataset: CustomDataSet, n: int): 
        dataset_set: list[CustomDataSet] = []
        for _ in range(n): 
            
            idx = random.sample(range(0, len(dataset)), int(len(dataset)*sample_fraction))
            data_X = dataset.x[idx]
            data_y = dataset.y[idx]

            dataset_set.append(CustomDataSet(data_X, data_y))
        return dataset_set
    
    def random_sampling(self, idx, sample_fraction, n):
        subsets_idx = []
        for _ in range(n):
            subset_idx = random.sample(idx, int(len(idx) * sample_fraction))
            subsets_idx.append(subset_idx)
        
        return subsets_idx

    def selfTraining(self, epochs, lr, batch_size: int, sample_fraction: float, n: int, opt_func: Type[optim.Optimizer] = optim.Adam, sched = True, PATH = ".", save_best = False, device = 'cpu'): 
        teacher_model = copy.deepcopy(self.model)
        for _ in range(self.num_rounds): 
            sup_dl = CreateDataLoader(self.X_sup, self.y_sup, batch_size=batch_size, transform=self.transform, device=device)
            student_model = copy.deepcopy(teacher_model) 
            student_model.fit(epochs, lr, sup_dl, self.test_dataloader, opt_func=opt_func, sched=sched, PATH=PATH, save_best=save_best)
            d=dict()
            labels = []

            print("start")
            
            unsup_dl = CreateDataLoader(self.X_unsup, None, batch_size=batch_size, transform=self.transform, device = device)
            confidence = Tensor().to(device)
            for i, x in enumerate(unsup_dl): 
                # print(x.get_device())
                x = Tensor(x)
                prob = F.softmax(student_model(x), dim=1)
                # add 0.1 into label in order to prevent collapsing at label 0
                labels = Tensor([0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 5.1, 7.1, 8.1, 9.1]).to(device)
                a = torch.matmul(prob, labels)
                confidence = torch.cat((confidence, a))
            
            print(confidence)
            print("threshold")
            break
            threshold = np.median(np.array(list(d.values())))
            threshold_idx = []

            for i in range(len(d)):
                if d[i] > threshold:
                    threshold_idx.append(i)
            
            print("sampling")
            # randomly sample sample_fraction of threshold_ds
            dataset_idx = self.random_sampling(threshold_idx, sample_fraction=sample_fraction, n=n)

            max = 0

            for subset_idx in dataset_idx:
                print(':)')
                model = model = Classifier(channels, n_classes).to(config.DEVICE)
                model.train()
                # calculate U\U[i]
                unlabel = self.X_unsup
                unlabel_i = self.X_unsup[subset_idx]

                counterpart_idx = []

                for i in range(len(self.X_unsup)):
                    if i not in subset_idx:
                        counterpart_idx.append(i)
                
                counterpart = self.X_unsup[counterpart_idx]

                y_counterpart = torch.argmax(teacher_model(counterpart), dim=1)  # shape: len(counterpart) x 1
                
                print(y_counterpart)
            break
            # '''
            for I in range(len(dataset_idx)): 
                model = Classifier(channels, n_classes).to(config.DEVICE)
                model.train()
                # calculate U\U[i]
                unlabel = self.X_unsup
                unlabel_i = dataset_set[I].x

                counterpart = Tensor().type_as(unlabel)
                
                # '''debugging'''
                # testing = True
                # print(threshold_X.shape) 
                # for i in range(60): 
                #     for j in range(i, 61): 
                #         if torch.equal(threshold_X[i] ,threshold_X[j]): 
                #             testing = False
                
                # print(testing)
                # break

                for i in range(unlabel.shape[0]): 
                    check = True
                    for j in range(unlabel_i.shape[0]): 
                       if torch.equal(unlabel[i], unlabel_i[j]):
                           check = False
                           break
                    if check:
                       counterpart = torch.cat((counterpart, unlabel[i].unsqueeze(0)))


                # generate label of data in U\U[i] by teacher_model classifier 
                y_counterpart = Tensor().type_as(unlabel)
                for x in counterpart: 
                    y_counterpart = torch.cat((y_counterpart, teacher_model(x.unsqueeze(0)).unsqueeze(0)))

                X_data = torch.cat((self.X_sup, unlabel_i, counterpart))
                y_data = torch.cat((self.y_sup, dataset_set[I].y ,y_counterpart)).to(dtype=torch.int)
                dl = CreateDataLoader(X_data, y_data, config.BATCH_SIZE, train_tfm, config.DEVICE)
               
                model.fit(config.EPOCHS, config.LEARNING_RATE, dl, test_dl, opt_func=optim.Adam, save_best=False)
                if self.CalDisagreement(student_model.classify, model, unlabeled_dataset) > max: 
                    max = self.CalDisagreement(student_model, model, unlabeled_dataset)
                    best = dataset_set[I]
            
            labeled_dataset.x = torch.cat((labeled_dataset.x, best.x))
            labeled_dataset.y = torch.cat((labeled_dataset.y, best.y))
            # remove sample from best dataset from unlabled dataset 
            for i in range(self.X_unsup.shape[0]): 
                check = True
                for j in range(best.x.shape[0]): 
                    if torch.equal(self.X_unsup[i], best.x[j]):
                        check = False
                        break
                    if not check:
                        self.X_unsup = torch.cat((self.X_unsup[:i], self.X_unsup[:i+1]))
                        unlabeled_dataset.y = torch.cat((unlabeled_dataset.y[:i], unlabeled_dataset.y[:i+1]))
            # reassign teacher model 
            teacher_model = student_model
        # return best model 
        self.model = teacher_model

In [None]:
selftraining = SelfTraining(model, X_sup, y_sup.to(dtype = torch.uint8), X_unsup, test_dl, transform=train_tfm, num_rounds=3)

In [None]:
dl = selftraining.selfTraining(1, 0.0001, 64, 0.4, 10, device=config.DEVICE)

Epoch [0]


  0%|          | 0/16 [00:00<?, ?it/s]

100%|██████████| 16/16 [00:01<00:00,  8.41it/s]


train_loss: 2.4043, val_loss: 2.4159, train_acc: 0.1620, val_acc: 0.1000, lrs: 0.0000->0.0000
start


OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Including non-PyTorch memory, this process has 17179869184.00 GiB memory in use. Of the allocated memory 10.60 GiB is allocated by PyTorch, and 153.81 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
torch.cuda.memory_summary(device=None, abbreviated=False)

