In [1]:
import numpy as np
import torch
from torch import Tensor
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import MNIST
import random
from tqdm import tqdm

import matplotlib.pyplot as plt

from CNNmodel import ConvModel

In [2]:
random_seed = 0
torch.manual_seed(random_seed)
random.seed(random_seed)
np.random.seed(random_seed)

In [3]:
DATA_DIR = "./data"
n_classes = 10
num_labelled = 125
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [4]:
def one_hot(y):
	one_hot_y = torch.zeros((len(y), 10))
	one_hot_y[np.arange(len(y)), y] = 1
	return one_hot_y

In [5]:
data = MNIST(DATA_DIR, train = True, download=True)
data_X = data.data.unsqueeze(1).float().to(device)
data_X /= 255
data_Y = one_hot(data.targets).to(device)

num_data = data_Y.shape[0]

In [6]:
def supervised_samples(X: Tensor, y: Tensor, n_samples, val_ratio = 0.2):
	num_data = y.shape[0]

	ix = np.random.randint(0, num_data, n_samples)

	val = ix[:int(n_samples*val_ratio)]
	sup = ix[int(n_samples*val_ratio):]

	X_sup = X[sup]
	y_sup = y[sup]
	X_val = X[val]
	y_val = y[val]

	return X_sup, y_sup, X_val, y_val

In [7]:
X_sup, y_sup, X_val, y_val = supervised_samples(data_X, data_Y, num_labelled)
X_val.shape

torch.Size([25, 1, 28, 28])

In [8]:
class CustomDataSet(Dataset):
	def __init__(self, x: Tensor, y: Tensor) -> None:
		self.x: Tensor = x
		self.y: Tensor = y
		self.n_samples = len(y)
	
	def __getitem__(self, index):
		return self.x[index], self.y[index]
	
	def __len__(self):
		return self.n_samples

In [9]:
class Generator(nn.Module):
	def __init__(self, inp_size, out_size) -> None:
		super().__init__()


		self.NN = nn.Sequential(
			nn.Linear(inp_size, 256*7*7),
			nn.LeakyReLU(negative_slope=0.2),
		)

		self.CONV = nn.Sequential(
			nn.ConvTranspose2d(256, 128, (3, 3), (2, 2)),
			nn.BatchNorm2d(128),
			nn.LeakyReLU(0.2),
			nn.ConvTranspose2d(128, 64, (3, 3), (1, 1)),
			nn.BatchNorm2d(64),
			nn.LeakyReLU(0.2),
			
		)

		self.out = nn.Sequential(
			nn.ConvTranspose2d(64, out_size[0], (3, 3), (2, 2)),
			nn.AdaptiveAvgPool2d((out_size[1], out_size[2])),
			nn.Sigmoid()
		)

		self.optimizer = optim.Adam(self.parameters(), lr = 0.0002, betas=[0.5, 0.999])
		self.criterion = nn.BCELoss()
	
	def forward(self, X: Tensor):
		X = self.NN(X)
		X = X.view(-1, 256, 7, 7)
		X = self.CONV(X)

		X = self.out(X)

		return X

In [10]:
class Feature_Extractor(nn.Module):
	def __init__(self, inp_channel) -> None:
		super().__init__()


		self.CNN = ConvModel(inp_channel)

		self.dropout = nn.Sequential(
			nn.Dropout(0.4)
		)

	
	def forward(self, X: Tensor):
		X = self.CNN(X)
		X = self.dropout(X)
		return X

In [11]:
class Classify(nn.Module):
	def __init__(self, feature_extractor: nn.Module, num_classes) -> None:
		super().__init__()

		self.CNN = feature_extractor

		self.out = nn.Sequential(
			nn.Linear(512, num_classes),
			nn.Softmax(1)
		)

		self.optimizer = optim.Adam(self.parameters(), lr = 0.0002, betas= [0.5, 0.999])

		self.criterion = nn.BCELoss()
	
	
	def forward(self, X: Tensor):
		X = self.CNN(X)
		X = self.out(X)

		return X

In [12]:
class Discriminator(nn.Module):
	def __init__(self, feature_extractor) -> None:
		super().__init__()
		self.CNN = feature_extractor

		self.out = nn.Sequential(
			nn.Linear(512, 1),
			nn.Sigmoid()
		)
	
		self.optimizer = optim.Adam(self.parameters(), lr=0.0002, betas=[0.5, 0.999])
		self.criterion = nn.BCELoss()

	def forward(self, X: Tensor):
		X = self.CNN(X)

		X = self.out(X)

		return X

In [13]:
class SGAN:
	def __init__(self, image_size, num_classes, feature_extractor: nn.Module, latent_size = 100, lr=0.0002):

		self.latent_size = latent_size

		CNN = feature_extractor.to(device)

		self.generator = Generator(latent_size, image_size).to(device)

		self.classify = Classify(CNN, num_classes).to(device)
		self.discriminator = Discriminator(CNN).to(device)

		self.history = {}
	
	def __call__(self, X: torch.Tensor):
		return torch.argmax(self.classify(X))
	
	def save(self, PATH = "./"):
		torch.save(self.classify.state_dict(), PATH + "/classify.pt")
		torch.save(self.discriminator.state_dict(), PATH + "/discriminator.pt")
		torch.save(self.generator.state_dict(), PATH + "/generator.pt")
	
	def validation(self, X: Tensor, y: Tensor):
		self.classify.eval()

		num_data = y.shape[0]

		run_size = 10000

		current = 0
		correct = 0
		
		while current < num_data:
			correct += torch.count_nonzero(torch.argmax(self.classify(X[current: current + run_size]), 1) == torch.argmax(y[current: current + run_size], 1))
			current += run_size

		return (correct.float().item() / num_data)


	def training_step(self, model: nn.Module, optimizer: optim.Optimizer, criterion: nn.modules.loss._Loss, X: Tensor, y: Tensor):
		out: Tensor = model(X)

		loss: Tensor = criterion(out, y)

		optimizer.zero_grad()
		loss.backward()
		optimizer.step()

		return loss
	

	def fit(self, X: Tensor, y: Tensor, sup_samples, epochs = 100, batch_size = 64, val: bool = False):

		X_sup, y_sup, X_val, y_val = supervised_samples(X, y, sup_samples)

		datasets = CustomDataSet(X, y)

		sup_datasets = CustomDataSet(X_sup, y_sup)

		dataloader = DataLoader(datasets, batch_size=batch_size, shuffle=True)

		sup_dataloader = DataLoader(sup_datasets, batch_size=batch_size//2, shuffle=True)

		for epoch in range(epochs):
			self.classify.train()
			self.discriminator.train()
			self.generator.train()

			# print(f"epoch: {epoch}\nclassify: ")
			
			# for classify
			for inputs, labels in tqdm(sup_dataloader):
				sup_loss = self.training_step(self.classify, self.classify.optimizer, self.classify.criterion, inputs.to(device), labels.to(device))

			# print(f'GAN:')
			# for discriminator and generator
			for inputs, _ in tqdm(dataloader):
				real_loss = self.training_step(self.discriminator, self.discriminator.optimizer, self.discriminator.criterion, inputs, torch.ones((inputs.shape[0], 1)).to(device))

				z = torch.randn((inputs.shape[0], self.latent_size)).to(device)
				gen_out = self.generator(z)
				fake_loss = self.training_step(self.discriminator, self.discriminator.optimizer, self.discriminator.criterion, gen_out, torch.zeros(inputs.shape[0], 1).to(device))
				gen_out = self.generator(z)
				gen_loss = self.training_step(self.discriminator, self.generator.optimizer, self.discriminator.criterion, gen_out, torch.ones((inputs.shape[0], 1)).to(device))

			
			# train_acc = self.validation(X_sup, y_sup)

			# val_acc = self.validation(X_val, y_val)
			
			# print(f"train acc: {train_acc*100:.2f}%, val acc: {val_acc*100:.2f}%, classification_loss: {sup_loss:.2f}, discrimination_loss: {(real_loss+fake_loss)/2:.2f}, generation_loss: {gen_loss:.2f}")

In [14]:
import torch

# Assuming your tensor is named 'tensor'
tensor = torch.randn(1, 28, 28)  # Example tensor of size (1, 28, 28)

# Unsqueeze the tensor
unsqueezed_tensor = tensor.unsqueeze(1)

# Check the size of the unsqueezed tensor
print(unsqueezed_tensor.size())  # Output: torch.Size([1, 1, 28, 28])


torch.Size([1, 1, 28, 28])


In [60]:
import copy 
class SelfTraining: 
    def __init__(self, model: SGAN, X: Tensor, y: Tensor, num_rounds, sup_samples: int): 
        '''
            Input of self-training model:
            model: SGAN
            labeled_dataset: labelled dataset 
            unlabeled_dataset: unlabelled dataset 
            num_rounds: number of self_training rounds
        '''
        self.model = model 
        X_sup, y_sup, X_val, y_val = self.supervised_sampling(X, y, 125)

        self.labeled_dataset = CustomDataSet(X_sup, y_sup)
        self.unlabeled_dataset = CustomDataSet(X_val, y_val)
        self.num_rounds = num_rounds
        self.sup_samples = sup_samples
        
    
    def supervised_sampling(self, X: Tensor, y: Tensor, n_samples: int, val_ratio = 0.02):
        num_data = y.shape[0]

        print(f'n_samples is {n_samples}')
        ix = np.random.randint(0, num_data, n_samples)
        

        sup = ix[:int(n_samples*val_ratio)]
        val = ix[int(n_samples*val_ratio):]

        X_sup = X[sup]
        y_sup = y[sup]
        X_val = X[val]
        y_val = y[val]

        return X_sup, y_sup, X_val, y_val
    def CalDisagreement(self, h1: Classify, h2: Classify, dataset: CustomDataSet): 
        '''
            Calculate disagreement between teacher model and student model
            h1: Teacher model 
            h2: Student model
        '''
        disagreement = 0
        for x, _ in dataset: 
            disagreement += (h1(x) == h2(x))
        
        return disagreement/len(dataset)
    def training_step(self, model: nn.Module, optimizer: optim.Optimizer, criterion: nn.modules.loss._Loss, X: Tensor, y: Tensor):
        out: Tensor = model(X)

        loss: Tensor = criterion(out, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    def random_sampling(self, sample_fraction: float, dataset: CustomDataSet, n: int, teacher_model: Classify): 
        dataset_set:list[CustomDataSet] = []
        random.seed(42) 
        for _ in range(n): 
            data_X = []
            data_y = []
            for i in range(0, int(len(dataset)*sample_fraction)): 
                print(len(dataset))
                j = random.randint(0, len(dataset)-1)
                X, y = dataset[j]
                data_X.append(X) 
                data_y.append(torch.argmax(teacher_model(X)))
            dataset_set.append(CustomDataSet(data_X, data_y))
        return dataset_set

    def selfTraining(self, batch_size: int, sample_fraction: float, n: int): 
        labeled_dataset = copy.copy(self.labeled_dataset)
        unlabeled_dataset = copy.copy(self.unlabeled_dataset)
        teacher_model = copy.copy(self.model)
        for _ in range(self.num_rounds): 
            student_model = copy.copy(teacher_model) 
            # student_model.fit(torch.cat((labeled_dataset.x, unlabeled_dataset.x), 0), 
            #                   torch.cat((labeled_dataset.y, unlabeled_dataset.y), 0), 
            #                   sup_samples=self.sup_samples, epochs = 100, batch_size = batch_size)
            d=dict()
            labels = []
            for x, _ in unlabeled_dataset: 
                x = x.unsqueeze(1)
                c_labels = student_model.classify(x)
                # print(c_labels[0])
                labels.append(c_labels)
                a = torch.sum(Tensor([i*c_labels[0][i] for i in range(len(c_labels[0]))]))
                # print(a.shape)
                if not isinstance(a, Tensor): 
                    print(type(a))
                d[x] = a
            
            threshold = np.median(np.array(list(d.values())))
            threshold_X = []
            threshold_y = []
        
            for i, x in enumerate(d):
                if d[x] > threshold: 
                    threshold_X.append(x)
                    threshold_y.append(torch.argmax(labels[i]))
            threshold_ds = CustomDataSet(threshold_X, threshold_y)
            # randomly sample sample_fraction of threshold_ds
            dataset_set = self.random_sampling(sample_fraction=sample_fraction, dataset=threshold_ds, n=n, teacher_model=teacher_model)
            max = 0
            for i in range(n): 
                model = Classify(ConvModel(1), n_classes)
                model.train()
                # calculate U\U[i]
                unlabel = set(unlabeled_dataset.x)
                unlabel_i = set(dataset_set[i].x) 
                counterpart = list(unlabel - unlabel_i)
                # print(type(labeled_dataset.x), type(dataset_set[i].x), type(counterpart))
                X_data = labeled_dataset.x + Tensor(dataset_set[i].x) + Tensor(counterpart)
                y_data = labeled_dataset.y + dataset_set[i].y + [torch.argmax(teacher_model(x)) for x in counterpart]
                
                self.training_step(model, model.optimizer, model.criterion, X_data.to(device), y_data.to(device))
                if self.CalDisagreement(student_model, model, unlabeled_dataset) > max: 
                    max = self.CalDisagreement(student_model, model, unlabeled_dataset)
                    best = dataset_set[i]
            
            labeled_dataset.x = list(set(labeled_dataset.x) + set(best.x))
            labeled_dataset.y = list(set(labeled_dataset.y) + set(best.y))
            # reassign teacher model 
            teacher_model = student_model
        
        # return best model 
        self.model = teacher_model

In [61]:
model = SGAN([1, 28, 28], n_classes, ConvModel(1))

In [62]:
self_training = SelfTraining(model, data_X, data_Y, num_rounds = 10, sup_samples = 125)

n_samples is 125


In [63]:
self_training.selfTraining(64, 0.4, 10)

61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61
61


ValueError: only one element tensors can be converted to Python scalars

In [None]:
model.fit(data_X, data_Y, sup_samples=num_labelled, epochs=100, batch_size=64)

In [None]:
model.validation(data_X, data_Y)

0.74225