In [1]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import datasets

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch import optim
from torch.utils.data import Dataset, DataLoader

from matplotlib import pyplot as plt

from torch.utils.data import Dataset, DataLoader
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
# Hyperparameters
in_dim = 784
out_dim = 10 
hid_dim = 300

n_epoch = 20
lr = 1e-4
batch_size = 64
MAX_ESC = 10

In [50]:
#################
# MNIST dataset #
#################

#--------------------#
# Data preprocessing #
#--------------------#
'''
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
'''

labeled_portion = 0.8
train_portion = 0.8

#------------------------#
# Load and split dataset #
#------------------------#

full_train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(),  download=True)

labeled_size = int(labeled_portion * len(full_train_dataset))
unlabeled_size = len(full_train_dataset) - labeled_size
labeled_data, unlabeled_data = torch.utils.data.random_split(full_train_dataset, [labeled_size, unlabeled_size])

train_size = int(train_portion * len(labeled_data))
val_size = len(labeled_data) - train_size
train_data, val_data = torch.utils.data.random_split(labeled_data, [train_size, val_size])

test_data = torchvision.datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

#------------------#
# Build dataloader #
#------------------#

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
unlabeled_loader = torch.utils.data.DataLoader(unlabeled_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False)

#-------------------#
# Show dataset info #
#-------------------#

print(f'Train data\t{len(train_data)}')
print(f'Unabeled data\t{len(unlabeled_data)}')
print(f'Val data\t{len(val_data)}')
print(f'Test data\t{len(test_data)}')

48000
Train data	38400
Unabeled data	12000
Val data	9600
Test data	10000


In [5]:
#########
# Model #
#########

model = torch.nn.Sequential(
    torch.nn.Linear(in_dim, hid_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(hid_dim, out_dim),
)

loss_fcn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
# optimizer = optim.Adam(model.parameters(), lr=lr)

# print(model)
# print(next(model.parameters()).device)
model.cuda()
# print(next(model.parameters()).device)

Sequential(
  (0): Linear(in_features=784, out_features=300, bias=True)
  (1): ReLU()
  (2): Linear(in_features=300, out_features=10, bias=True)
)
cpu
cuda:0


In [51]:
############
# Training #
############

n_batch = len(train_loader)
best_val_acc = 0
esc = 0


# Initialization 
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

model.apply(init_weights)

# Start training 
for epoch in range(n_epoch):
    correct_cnt, total_loss, total_cnt, train_loss, val_loss = 0, 0, 0, 0, 0
    
    for batch, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = images.to(device)
            labels = labels.to(device)
        
        predictions = model(images.view(-1, in_dim))
        loss = loss_fcn(predictions, labels)
    
        model.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate the training loss and accuracy of each iteration
        _, pred_labels = torch.max(predictions, 1)
        total_cnt += images.size(0)
        correct_cnt += (pred_labels == labels).sum().item()
        train_loss += loss.item()
        
        # Show the training information
        if batch % 100 == 0 or batch == len(train_loader):
            acc = correct_cnt / total_cnt
            print(
                f"Epoch [{epoch+1}/{n_epoch}], Step [{batch}/{n_batch}], Train loss: {loss.item():.6f}, Train acc: {acc * 100:.3f} %"
            )
    
    # Validating
    model.eval()

    with torch.no_grad():  # No need BP
        total_cnt, correct_cnt = 0, 0
        
        for batch, (images, labels) in enumerate(val_loader, 1):
            
            # Put input tensor to GPU if it's available
            if torch.cuda.is_available():
                images = images.to(device)
                labels = labels.to(device)
                # images, labels = images.cuda(), labels.cuda()

            # Forward pass
            predictions = model(images.view(-1, in_dim))
            loss = loss_fcn(predictions, labels)
            
            # Calculate the training loss and accuracy of each iteration
            _, pred_labels = torch.max(predictions, 1)
            total_cnt += images.size(0)
            correct_cnt += (pred_labels == labels).sum().item()
            val_loss += loss.item()

        val_acc = correct_cnt / total_cnt
        print(f"val_acc: {val_acc * 100:.3f} %, {esc} / {MAX_ESC}", end=' ')
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc

            # Save trained model
            torch.save(model.state_dict(), f"./checkpoint/NN.pth" )
            print('(model updated!)')
            esc = 0
        else:
            print('(model dropped)')
            esc += 1

        
    if esc > MAX_ESC:
        break

model.train()

print('Finish training')

Epoch [1/20], Step [0/600], Train loss: 2.427868, Train acc: 7.812 %
Epoch [1/20], Step [100/600], Train loss: 2.401922, Train acc: 6.745 %
Epoch [1/20], Step [200/600], Train loss: 2.476192, Train acc: 6.600 %
Epoch [1/20], Step [300/600], Train loss: 2.475604, Train acc: 6.722 %
Epoch [1/20], Step [400/600], Train loss: 2.394784, Train acc: 6.940 %
Epoch [1/20], Step [500/600], Train loss: 2.359475, Train acc: 7.148 %
val_acc: 8.438 %, 0 / 10 (model updated!)
Epoch [2/20], Step [0/600], Train loss: 2.331833, Train acc: 9.375 %
Epoch [2/20], Step [100/600], Train loss: 2.409046, Train acc: 9.452 %
Epoch [2/20], Step [200/600], Train loss: 2.419545, Train acc: 9.530 %
Epoch [2/20], Step [300/600], Train loss: 2.318299, Train acc: 9.463 %
Epoch [2/20], Step [400/600], Train loss: 2.334238, Train acc: 9.691 %
Epoch [2/20], Step [500/600], Train loss: 2.249120, Train acc: 9.952 %
val_acc: 12.146 %, 0 / 10 (model updated!)
Epoch [3/20], Step [0/600], Train loss: 2.397802, Train acc: 9.375 

In [None]:

def evaluate(dataloader, mode):
    with torch.no_grad():  # No need BP
        for batch, (images, labels) in enumerate(dataloader, 1):
            
            # Put input tensor to GPU if it's available
            if torch.cuda.is_available():
                images = images.cuda()
                labels = labels.cuda()

            # Forward pass
            predictions = model(images.view(-1, in_dim))
            loss = loss_fcn(predictions, labels)
            
            # Calculate the training loss and accuracy of each iteration
            _, pred_labels = torch.max(predictions, 1)
            total_cnt += images.size(0)
            correct_cnt += (pred_labels == labels).sum().item()
            val_loss += loss.item()

        val_acc = correct_cnt / total_cnt
        print(f"val_acc: {val_acc * 100:.3f} %, {esc} / {MAX_ESC}", end=' ')
        
        if mode is 'validation':
            if val_acc > best_val_acc:
                best_val_acc = val_acc

                # Save trained model
                torch.save(model.state_dict(), f"./checkpoint/NN.pth" )
                print('(model updated!)')
                esc = 0
            else:
                print('(model dropped)')
                esc += 1

In [41]:
###########
# Testing #
###########

model.load_state_dict(torch.load("./checkpoint/NN.pth"))
model.cuda()

with torch.no_grad(): # No need BP
    
    # Record variables and containers
    n_correct = 0
    n_samples = 0
    n_class_correct = [0] * 10
    n_class_samples = [0] * 10
    
    # Loop through batches in test_loader
    for images, labels in test_loader:
        
        # Get the GPU support
        if torch.cuda.is_available():
            images = images.to(device)
            labels = labels.to(device)
        
        # Predict via forward pass
        predictions = model(images.view(-1, 784))
        _, pred_labels = torch.max(predictions, 1)
        n_samples += labels.size(0)
        n_correct += (pred_labels == labels).sum().item()
        
        # Record correctness of each classes in this batch
        for i in range(labels.size(0)):
            label = labels[i]
            pred = pred_labels[i]
            if (label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1
    
    # Overall performance
    acc = 100.0 * n_correct / n_samples
    print(f'Testing accuracy of network:\t{acc:.2f} %')

Testing accuracy of network:	76.35 %


In [7]:
# Load model
model.load_state_dict(torch.load("./checkpoint/NN.pth"))
model = model.to(device)

# Find threshold

confidence_list = np.array([])

model.eval()

with torch.no_grad(): # No need BP
    
    # Record variables and containers
    n_correct = 0
    n_samples = 0
    n_class_correct = [0] * 10
    n_class_samples = [0] * 10
    
    # Loop through batches in test_loader
    for images, labels in train_loader:
        
        # Get the GPU support
        if torch.cuda.is_available():
            images = images.to(device)
            labels = labels.to(device)
        
        # Predict via forward pass
        predictions = model(images.view(-1, 784))
        confidence, predicted_labels = torch.max(predictions, dim=1)        
        confidence_list = np.concatenate((confidence_list, confidence.cpu()), axis=0)
        
        n_samples += labels.size(0)
        n_correct += (predicted_labels == labels).sum().item()
        
        # Record correctness of each classes in this batch
        for i in range(labels.size(0)):
            label = labels[i]
            pred = predicted_labels[i]
            if (label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1
    
    # Overall performance
    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of network:\t{acc:.2f} %')

print(len(confidence_list))
print(confidence_list.max())
print(confidence_list.min())
threshold = np.sort(confidence_list)[int(len(confidence_list) / 2)]
print(threshold)

Accuracy of network:	72.15 %
38400
3.727449655532837
-0.008956225588917732
1.2272506952285767


In [8]:
###################
# Pseudo-labeling #
###################

import itertools

confident_unlabels = []
pseudo_labels = []

# Load model
model.load_state_dict(torch.load("./checkpoint/NN.pth"))
model = model.to(device)

with torch.no_grad(): # No need BP
    
    # Record variables and containers
    n_correct = 0
    n_samples = 0
    n_class_correct = [0] * 10
    n_class_samples = [0] * 10
    
    # Loop through batches in test_loader

    # top = itertools.islice(unlabeled_loader, 5)
    for images, labels in unlabeled_loader:
        
        # Get the GPU support
        if torch.cuda.is_available():
            images = images.to(device)
            labels = labels.to(device)
        
        # Predict via forward pass
        predictions = model(images.view(-1, 784))
        
        confidence, predicted_labels = torch.max(predictions, dim=1)
        
        n_samples += labels.size(0)
        n_correct += (predicted_labels == labels).sum().item()

        for i, c in enumerate(confidence):
            if c.item() > threshold:                
                confident_unlabels.append(images[i])
                pseudo_labels.append(predicted_labels[i])
        
        # Record correctness of each classes in this batch
        for i in range(labels.size(0)):
            label = labels[i]
            pred = predicted_labels[i]
            if (label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1
    
    # Overall performance
    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of network:\t{acc:.2f} %')

print(unlabeled_size)
print(len(confident_unlabels))
print(len(pseudo_labels))
confident_unlabels = torch.stack(confident_unlabels)
pseudo_labels = torch.stack(pseudo_labels)

Accuracy of network:	72.66 %
12000
5920
5920


In [9]:
# original_dataset = torch.empty(1, 28, 28)
# original_labels = torch.empty((1,), dtype=torch.long)

original_dataset = None
original_labels = None

for images, labels in train_loader:
    if original_dataset is None:
        original_dataset = images
    else:
        original_dataset = torch.cat((original_dataset, images))
    
    if original_labels is None:
        original_labels = labels
    else:
        original_labels = torch.cat((original_labels, labels))
    
    # print(images.shape)
    # print(original_dataset.shape)
    # print(labels.shape)
    # original_dataset = torch.cat((original_dataset, images), 0)
    # original_labels = torch.cat((original_labels, labels), 0)
    # original_dataset.append(images)
    # original_labels.append(labels)

# original_dataset = torch.stack(original_dataset)
# original_labels = torch.stack(original_labels)
print(original_dataset.shape)
print(original_labels.shape)

torch.Size([38400, 1, 28, 28])
torch.Size([38400])


In [30]:
print(confident_unlabels.shape)
print(pseudo_labels.shape)

torch.Size([5920, 1, 28, 28])
torch.Size([5920])


In [31]:
new_dataset = torch.cat((original_dataset, confident_unlabels.cpu()))
new_labels = torch.cat((original_labels, pseudo_labels.cpu()))
print(len(new_dataset))
print(len(new_labels))

44320
44320


In [32]:
class mydata(Dataset):
    def __init__(self):
        self.samples = new_dataset
        self.labels = new_labels
        self.n_samples = len(new_dataset)

    def __getitem__(self, index):
        # print(self.samples[index].unsqueeze(0).shape)
        return self.samples[index], self.labels[index]
    
    def __len__(self):
        return self.n_samples

new_data = mydata()
new_loader = torch.utils.data.DataLoader(new_data, batch_size=batch_size, shuffle=True)


In [40]:
###########
# Retrain #
###########

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model.load_state_dict(torch.load("./checkpoint/NN.pth"))

n_batch = len(new_loader)

def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

model.apply(init_weights)

############
# Training #
############
best_val_acc = 0
esc = 0

model = model.to(device)

for epoch in range(n_epoch):
    correct_cnt, total_loss, total_cnt, train_loss, val_loss = 0, 0, 0, 0, 0
    
    for batch, (images, labels) in enumerate(new_loader):
        if torch.cuda.is_available():
            images = images.to(device)
            labels = labels.to(device)
        
        predictions = model(images.view(-1, in_dim))
        loss = loss_fcn(predictions, labels)
    
        model.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate the training loss and accuracy of each iteration
        _, pred_labels = torch.max(predictions, 1)
        total_cnt += images.size(0)
        correct_cnt += (pred_labels == labels).sum().item()
        train_loss += loss.item()
        
        # Show the training information
        if batch % 100 == 0 or batch == len(new_loader):
            acc = correct_cnt / total_cnt
            print(
                f"Epoch [{epoch+1}/{n_epoch}], Step [{batch}/{n_batch}], Train loss: {loss.item():.6f}, Train acc: {acc * 100:.3f} %"
            )
    
    ##############
    # Validating #
    ##############
    
    model.eval()

    with torch.no_grad():  # No need BP
        for batch, (images, labels) in enumerate(val_loader, 1):
            
            # Put input tensor to GPU if it's available
            if torch.cuda.is_available():
                images = images.to(device)
                labels = labels.to(device)
                # images, labels = images.cuda(), labels.cuda()

            # Forward pass
            predictions = model(images.view(-1, in_dim))
            loss = loss_fcn(predictions, labels)
            
            # Calculate the training loss and accuracy of each iteration
            _, pred_labels = torch.max(predictions, 1)
            total_cnt += images.size(0)
            correct_cnt += (pred_labels == labels).sum().item()
            val_loss += loss.item()

        val_acc = correct_cnt / total_cnt
        print(f"val_acc: {val_acc * 100:.3f} %, {esc} / {MAX_ESC}", end=' ')
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc

            # Save trained model
            torch.save(model.state_dict(), f"./checkpoint/NN.pth" )
            print('(model updated!)')
            esc = 0
        else:
            print('(model dropped)')
            esc += 1

        
    if esc > MAX_ESC:
        break

model.train()

print('Finish training')

Epoch [1/20], Step [0/693], Train loss: 2.471171, Train acc: 3.125 %
Epoch [1/20], Step [100/693], Train loss: 2.423783, Train acc: 3.311 %
Epoch [1/20], Step [200/693], Train loss: 2.405116, Train acc: 3.234 %
Epoch [1/20], Step [300/693], Train loss: 2.326942, Train acc: 3.364 %
Epoch [1/20], Step [400/693], Train loss: 2.346068, Train acc: 3.671 %
Epoch [1/20], Step [500/693], Train loss: 2.366001, Train acc: 4.023 %
Epoch [1/20], Step [600/693], Train loss: 2.357221, Train acc: 4.352 %
val_acc: 5.045 %, 0 / 10 (model updated!)
Epoch [2/20], Step [0/693], Train loss: 2.341733, Train acc: 7.812 %
Epoch [2/20], Step [100/693], Train loss: 2.345186, Train acc: 7.596 %
Epoch [2/20], Step [200/693], Train loss: 2.269467, Train acc: 8.388 %
Epoch [2/20], Step [300/693], Train loss: 2.273064, Train acc: 8.768 %
Epoch [2/20], Step [400/693], Train loss: 2.297820, Train acc: 9.348 %
Epoch [2/20], Step [500/693], Train loss: 2.292525, Train acc: 9.858 %
Epoch [2/20], Step [600/693], Train los