In [3]:
import numpy as np
import torch
from torch import Tensor
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import MNIST
import random
from tqdm import tqdm

import matplotlib.pyplot as plt

from CNNmodel import ConvModel
from utils import * 

In [4]:
print_config()

RANDOM_SEED  :  110404
DATA_DIR     :  ./data
USED_DATA    :   MNIST
NUM_LABELLED :    1000
DEVICE       :  cuda:0
EPOCHS       :      50
BATCH_SIZE   :      64
LEARNING_RATE:    0.01


In [6]:
random_seed = config.RANDOM_SEED
torch.manual_seed(random_seed)
random.seed(random_seed)
np.random.seed(random_seed)

In [5]:
DATA_DIR = "./data"
n_classes = 10
device = config.DEVICE
num_labelled = config.NUM_LABELLED
epochs = config.EPOCHS
batch_size = config.BATCH_SIZE
learning_rate = config.LEARNING_RATE

In [8]:
X_train, y_train, X_test, y_test, classes = load_data(0, 1)

In [9]:
X_sup, y_sup, X_unsup, y_unsup = supervised_samples(X_train, y_train, config.NUM_LABELLED, n_classes, get_unsup=True)

In [10]:
print(X_sup.shape)

torch.Size([1000, 1, 28, 28])


In [11]:
print(X_train.shape)

torch.Size([60000, 1, 28, 28])


In [12]:
X_train = X_train.to(device, dtype = torch.float32) 
X_test = X_test.to(device, dtype = torch.float32)
print(X_train.get_device())

0


In [13]:
y_train = y_train.to(device)
y_test = y_test.to(device)

In [14]:
print(y_train)

tensor([5, 0, 4,  ..., 5, 6, 8], device='cuda:0')


In [15]:
class Generator(nn.Module):
	def __init__(self, inp_size, out_size) -> None:
		super().__init__()


		self.NN = nn.Sequential(
			nn.Linear(inp_size, 256*7*7),
			nn.LeakyReLU(negative_slope=0.2),
		)

		self.CONV = nn.Sequential(
			nn.ConvTranspose2d(256, 128, (3, 3), (2, 2)),
			nn.BatchNorm2d(128),
			nn.LeakyReLU(0.2),
			nn.ConvTranspose2d(128, 64, (3, 3), (1, 1)),
			nn.BatchNorm2d(64),
			nn.LeakyReLU(0.2),
			
		)

		self.out = nn.Sequential(
			nn.ConvTranspose2d(64, out_size[0], (3, 3), (2, 2)),
			nn.AdaptiveAvgPool2d((out_size[1], out_size[2])),
			nn.Sigmoid()
		)

		self.optimizer = optim.Adam(self.parameters(), lr = 0.0002, betas=[0.5, 0.999])
		self.criterion = nn.BCELoss()
	
	def forward(self, X: Tensor):
		X = self.NN(X)
		X = X.view(-1, 256, 7, 7)
		X = self.CONV(X)

		X = self.out(X)

		return X

In [16]:
class Feature_Extractor(nn.Module):
	def __init__(self, inp_channel) -> None:
		super().__init__()


		self.CNN = ConvModel(inp_channel)

		self.dropout = nn.Sequential(
			nn.Dropout(0.4)
		)

	
	def forward(self, X: Tensor):
		X = self.CNN(X)
		X = self.dropout(X)
		return X

In [17]:
class Classify(nn.Module):
	def __init__(self, feature_extractor: nn.Module, num_classes) -> None:
		super().__init__()

		self.CNN = feature_extractor

		self.out = nn.Sequential(
			nn.Linear(512, num_classes)
		)

		self.optimizer = optim.Adam(self.parameters(), lr = 0.0002, betas= [0.5, 0.999])

		self.criterion = nn.CrossEntropyLoss()
	
	
	def forward(self, X: Tensor):
		X = self.CNN(X)
		X = self.out(X)

		return X

In [18]:
class Discriminator(nn.Module):
	def __init__(self, feature_extractor) -> None:
		super().__init__()
		self.CNN = feature_extractor

		self.out = nn.Sequential(
			nn.Linear(512, 1),
			nn.Sigmoid()
		)
	
		self.optimizer = optim.Adam(self.parameters(), lr=0.0002, betas=[0.5, 0.999])
		self.criterion = nn.BCELoss()

	def forward(self, X: Tensor):
		X = self.CNN(X)

		X = self.out(X)

		return X

In [19]:
class SGAN:
	def __init__(self, image_size, num_classes, feature_extractor: nn.Module, latent_size = 100, lr=0.0002):

		self.latent_size = latent_size

		CNN = feature_extractor.to(device)

		self.generator = Generator(latent_size, image_size).to(device)

		self.classify = Classify(CNN, num_classes).to(device)
		self.discriminator = Discriminator(CNN).to(device)

		self.history = {}
	
	def __call__(self, X: torch.Tensor):
		return torch.argmax(self.classify(X))
	
	def save(self, PATH = "./"):
		torch.save(self.classify.state_dict(), PATH + "/classify.pt")
		torch.save(self.discriminator.state_dict(), PATH + "/discriminator.pt")
		torch.save(self.generator.state_dict(), PATH + "/generator.pt")
	
	def validation(self, X: Tensor, y: Tensor):
		self.classify.eval()

		num_data = y.shape[0]

		run_size = 10000

		current = 0
		correct = 0
		
		while current < num_data:
			correct += torch.count_nonzero(torch.argmax(self.classify(X[current: current + run_size]), 1) == torch.argmax(y[current: current + run_size], 1))
			current += run_size

		return (correct.float().item() / num_data)


	def training_step(self, model: nn.Module, optimizer: optim.Optimizer, criterion: nn.modules.loss._Loss, X: Tensor, y: Tensor):
		out: Tensor = model(X)
		loss: Tensor = criterion(out, y)
		

		optimizer.zero_grad()
		loss.backward()
		optimizer.step()

		return loss
	

	def fit(self, X: Tensor, y: Tensor, X_sup: Tensor, y_sup: Tensor, sup_samples, epochs = 100, batch_size = 64, val: bool = True, gen_epochs = 25):

		X_sup, y_sup, X_val, y_val = supervised_sampling(X_sup, y_sup, sup_samples)

		datasets = CustomDataSet(X, y)

		sup_datasets = CustomDataSet(X_sup, y_sup)

		dataloader = DataLoader(datasets, batch_size=batch_size, shuffle=True)

		sup_dataloader = DataLoader(sup_datasets, batch_size=batch_size//2, shuffle=True)

		for epoch in range(epochs):
			self.classify.train()
			self.discriminator.train()
			self.generator.train()

			# print(f"epoch: {epoch}\nclassify: ")
			
			# for classify
			for inputs, labels in tqdm(sup_dataloader):
				sup_loss = self.training_step(self.classify, self.classify.optimizer, self.classify.criterion, inputs.to(device), labels.to(device))

			# print(f'GAN:')
			# for discriminator and generator
			if (epoch < gen_epochs): 
				for inputs, _ in tqdm(dataloader):
					real_loss = self.training_step(self.discriminator, self.discriminator.optimizer, self.discriminator.criterion, inputs, torch.ones((inputs.shape[0], 1)).to(device))
					
					z = torch.randn((inputs.shape[0], self.latent_size)).to(device)
					gen_out = self.generator(z)
					fake_loss = self.training_step(self.discriminator, self.discriminator.optimizer, self.discriminator.criterion, gen_out, torch.zeros(inputs.shape[0], 1).to(device))
					gen_out = self.generator(z)
					gen_loss = self.training_step(self.discriminator, self.generator.optimizer, self.discriminator.criterion, gen_out, torch.ones((inputs.shape[0], 1)).to(device))

				if val: 
					train_acc = self.validation(X_sup, y_sup)

					val_acc = self.validation(X_val, y_val)
					
					print(f"train acc: {train_acc*100:.2f}%, val acc: {val_acc*100:.2f}%, classification_loss: {sup_loss:.2f}, discrimination_loss: {(real_loss+fake_loss)/2:.2f}, generation_loss: {gen_loss:.2f}")
			
			else: 
				if val:
					print(f"classification_loss: {sup_loss:.2f}")

In [27]:
import copy 
from torch import IntTensor
class SelfTraining: 
    def __init__(self, model: SGAN, X: Tensor, y: Tensor, num_rounds, sup_samples: int): 
        '''
            Input of self-training model:
            model: SGAN
            labeled_dataset: labelled dataset 
            unlabeled_dataset: unlabelled dataset 
            num_rounds: number of self_training rounds
        '''
        self.model = model 
        self.X = X 
        self.y = y
        X_sup, y_sup, X_unsup, y_unsup = supervised_samples(X, y, config.NUM_LABELLED, n_classes, get_unsup=True)
        X_sup.to(device) 
        y_sup.to(device) 
        X_unsup.to(device) 
        y_unsup.to(device) 

        print(X_sup.get_device())
        print(X_unsup.get_device())
        self.labeled_dataset = CustomDataSet(X_sup, y_sup)
        self.unlabeled_dataset = CustomDataSet(X_unsup, y_unsup)
        self.num_rounds = num_rounds
        self.sup_samples = sup_samples
        
    def CalDisagreement(self, h1: Classify, h2: Classify, dataset: CustomDataSet): 
        '''
            Calculate disagreement between teacher model and student model
            h1: Teacher model 
            h2: Student model
        '''
        disagreement = 0
        for x, _ in dataset: 
            disagreement += (torch.argmax(h1(x.unsqueeze(0))) == torch.argmax(h2(x.unsqueeze(0))))
        
        return disagreement/len(dataset)
    def training_step(self, model: nn.Module, optimizer: optim.Optimizer, criterion: nn.modules.loss._Loss, X: Tensor, y: Tensor):
        out: Tensor = model(X).to(dtype = torch.float)
        y = y.to(dtype=torch.float)
        loss: Tensor = criterion(out, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    

    def random_sampling(self, sample_fraction: float, dataset: CustomDataSet, n: int): 
        dataset_set: list[CustomDataSet] = []
        for _ in range(n): 
            
            idx = random.sample(range(0, len(dataset)), int(len(dataset)*sample_fraction))
            data_X = dataset.x[idx]
            data_y = dataset.y[idx]

            dataset_set.append(CustomDataSet(data_X, data_y))
        return dataset_set

    def selfTraining(self, batch_size: int, sample_fraction: float, n: int): 
        labeled_dataset = copy.copy(self.labeled_dataset)
        unlabeled_dataset = copy.copy(self.unlabeled_dataset)
        teacher_model = copy.copy(self.model)
        for _ in range(self.num_rounds): 
            student_model = copy.copy(teacher_model)
           
            student_model.fit(self.X, 
                              self.y, 
                              labeled_dataset.x, 
                              labeled_dataset.y,
                              sup_samples=self.sup_samples, epochs = epochs, batch_size = batch_size)
            d=dict()
            labels = []
            for x in (unlabeled_dataset.x): 
                # print(x.get_device())
                c_labels = student_model.classify(x.unsqueeze(0))
                # print(c_labels[0])
                labels.append(c_labels)
                a = torch.sum(Tensor([i*c_labels[0][i] for i in range(len(c_labels[0]))]))
                # print(a.shape)
                if not isinstance(a, Tensor): 
                    print(type(a))
                d[x] = a
            
            threshold = np.median(np.array(list(d.values())))
            threshold_X = Tensor().to(device)
            threshold_y = IntTensor().to(device)
            threshold_idx = IntTensor().to(device)
        
            for i, x in enumerate(d):
                if d[x] > threshold:
                    threshold_X = torch.cat((threshold_X, x.unsqueeze(0)))
                    threshold_y = torch.cat((threshold_y, Tensor(torch.argmax(labels[i]).to(device).unsqueeze(0))))
            threshold_ds = CustomDataSet(threshold_X, threshold_y)
            # randomly sample sample_fraction of threshold_ds
            dataset_set = self.random_sampling(sample_fraction=sample_fraction, dataset=threshold_ds, n=n)
            max = 0
            for I in range(len(dataset_set)): 
                model = Classify(ConvModel(1), n_classes).to(device)
                model.train()
                # calculate U\U[i]
                unlabel = unlabeled_dataset.x
                unlabel_i = dataset_set[I].x

                counterpart = Tensor().type_as(unlabel)
                
                # '''debugging'''
                # testing = True
                # print(threshold_X.shape) 
                # for i in range(60): 
                #     for j in range(i, 61): 
                #         if torch.equal(threshold_X[i] ,threshold_X[j]): 
                #             testing = False
                
                # print(testing)
                # break

                for i in range(unlabel.shape[0]): 
                    check = True
                    for j in range(unlabel_i.shape[0]): 
                       if torch.equal(unlabel[i], unlabel_i[j]):
                           check = False
                           break
                    if check:
                       counterpart = torch.cat((counterpart, unlabel[i].unsqueeze(0)))


                # generate label of data in U\U[i] by teacher_model classifier 
                y_counterpart = Tensor().type_as(unlabel)
                for x in counterpart: 
                    y_counterpart = torch.cat((y_counterpart, teacher_model(x.unsqueeze(0)).unsqueeze(0)))

                X_data = torch.cat((labeled_dataset.x, unlabel_i, counterpart))
                y_data = one_hot(torch.cat((labeled_dataset.y, dataset_set[I].y ,y_counterpart)).to(dtype=torch.int))
               
                print(y_data.shape)
                print(model(X_data).shape)


                self.training_step(model, model.optimizer, model.criterion, X_data.to(device), y_data.to(device))
                if self.CalDisagreement(student_model.classify, model, unlabeled_dataset) > max: 
                    max = self.CalDisagreement(student_model, model, unlabeled_dataset)
                    best = dataset_set[I]
            
            labeled_dataset.x = torch.cat((labeled_dataset.x, best.x))
            labeled_dataset.y = torch.cat((labeled_dataset.y, best.y))
            # remove sample from best dataset from unlabled dataset 
            for i in range(unlabeled_dataset.x.shape[0]): 
                check = True
                for j in range(best.x.shape[0]): 
                    if torch.equal(unlabeled_dataset.x[i], best.x[j]):
                        check = False
                        break
                    if not check:
                        unlabeled_dataset.x = torch.cat((unlabeled_dataset.x[:i], unlabeled_dataset.x[:i+1]))
                        unlabeled_dataset.y = torch.cat((unlabeled_dataset.y[:i], unlabeled_dataset.y[:i+1]))
            # reassign teacher model 
            teacher_model = student_model
        
        # return best model 
        self.model = teacher_model

In [28]:
model = SGAN([1, 28, 28], n_classes, ConvModel(1), lr= learning_rate)

In [29]:
self_training = SelfTraining(model, X_train, y_train, num_rounds = 10, sup_samples = num_labelled)

0
0


In [30]:
self_training.selfTraining(batch_size, 0.4, 10)

100%|██████████| 25/25 [00:01<00:00, 23.95it/s]
100%|██████████| 938/938 [01:13<00:00, 12.74it/s]


IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)