In [1]:
# Core PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Data loading and augmentation
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms

# Clustering (for offline K-means)
from sklearn.cluster import KMeans
import numpy as np

# Optional utilities
import random, copy, math

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.RandomHorizontalFlip(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = datasets.CIFAR10(root='./data', train=True,
                                download=True, transform=transform)

testset = datasets.CIFAR10(root='./data', train=False,
                                download=True, transform=transform)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


170499072it [00:03, 46614616.02it/s]                               


Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified


### Create a class to make a Semi Supervised dataloader

In [23]:
class SemiSupervisedDataset(Dataset):
    def __init__(self, dataset, M):
        self.dataset = dataset
        self.M = M
        self.indices = list(range(len(dataset)))
        random.shuffle(self.indices)  # Shuffle indices for randomness
        self.labeled_indices = self.indices[M:]
        self.unlabeled_indices = self.indices[:M]
        self.unlabeled_dset = self.dataset[self.unlabeled_indices]
        self.labeled_dset = self.dataset[self.labeled_indices]

    def __len__(self):
        return len(self.labeled_dset)

    def __getitem__(self, idx):
        if idx < self.labeled_dset.__len__():
            x, y = self.labeled_dset[idx]
            return x, y
        else:
            raise IndexError("Index out of range")

def semi_supervised_batch_loader(dataset, batch_size, M):
    """
    Creates a DataLoader that returns batches of (labeled_x, labeled_y, unlabeled_x).
    Args:
        dataset: The original dataset (e.g., trainset).
        batch_size: The size of the batch.
        M: Number of labeled instances.
    """
    semi_supervised_dataset = SemiSupervisedDataset(dataset, M)
    labeled_x, labeled_y, unlabeled_x = [], [], semi_supervised_dataset.unlabeled_dset

    for idx in range(len(semi_supervised_dataset.labeled_dset)):
        x, y = semi_supervised_dataset[idx]
        labeled_x.append(x)
        labeled_y.append(y)

        # Yield a batch when the batch size is reached
        if len(labeled_x) == batch_size:
            yield labeled_x, labeled_y, unlabeled_x
            labeled_x, labeled_y, unlabeled_x = [], [], []


In [24]:
trainset_semisupervised = SemiSupervisedDataset(trainset, M=1000)
trainloader = semi_supervised_batch_loader(trainset, batch_size=64, M=1000)

TypeError: list indices must be integers or slices, not list


### Create a basic CNN model for dimensionality reduction

In [5]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, 100)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x, return_logits=False):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x_logits = F.relu(self.fc2(x))
        x = self.fc3(x_logits)
        if return_logits:
            return x, x_logits
        else:
            return x

### Create Fully Connected Layer

In [6]:
class FinalConnected(nn.Module):
    def __init__(self, input_layers, num_classes=10):
        super(FinalConnected, self).__init__()
        self.fc = nn.Linear(input_layers, input_layers // 2)
        self.fc2 = nn.Linear(input_layers // 2, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.fc(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x

### Train on dataset

In [7]:
model = SimpleCNN()
optim = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [20]:
strong_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(size=32, padding=4),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomApply([transforms.GaussianBlur(3)], p=0.2),
])

weak_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(size=32, padding=4),
])


In [22]:
device = 'gpu' if torch.cuda.is_available() else 'cpu'
if device == 'gpu':
    model = model.cuda()
    criterion = criterion.cuda()


for epoch in range(10):  
    for labeled_x, labeled_y, unlabeled_x in trainloader:
        print(len(labeled_x), len(labeled_y), len(unlabeled_x))
        labeled_x = torch.stack(labeled_x).to(device)
        labeled_y = torch.tensor(labeled_y).to(device)
        unlabeled_x_batch = torch.stack(unlabeled_x).to(device)

        # ----- Feature and output pass -----
        z_labeled, logits_labeled = model(labeled_x, return_logits=True)
        z_unlabeled, logits_unlabeled = model(unlabeled_x_batch, return_logits=True)

        # ----- Loss 1: Supervised -----
        L_sup = criterion(logits_labeled, labeled_y)

        # ----- Loss 2: K-means clustering on unlabeled features -----
        with torch.no_grad():
            z_unlabeled_np = z_unlabeled.cpu().numpy()
            kmeans = KMeans(n_clusters=10, n_init=10).fit(z_unlabeled_np)
            pseudo_labels = torch.tensor(kmeans.labels_).to(torch.long).to(device)
            cluster_centers = torch.tensor(kmeans.cluster_centers_).to(device)
        L_kmeans = torch.mean((z_unlabeled - cluster_centers[pseudo_labels]) ** 2) 

        # ----- Loss 3: Consistency -----
        x_weak, x_strong = weak_transform(unlabeled_x_batch), strong_transform(unlabeled_x_batch)
        z_weak, logits_weak = model(x_weak, return_logits=True)
        z_strong, logits_strong = model(x_strong, return_logits=True)
        L_consistency = F.kl_div(F.log_softmax(logits_weak, dim=1), F.softmax(logits_strong, dim=1), reduction='batchmean')

        # ----- Combine -----
        loss = L_sup + 0.25 * L_kmeans + 0.25 * L_consistency

        # ----- Backpropagation -----
        optim.zero_grad()
        loss.backward()
        optim.step()

    print(f'Epoch [{epoch+1}/10], Loss: {loss.item():.4f}')

0 0 64


RuntimeError: stack expects a non-empty TensorList