In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preparing the Dataset

In [4]:
# First Defining directories
import os

DATA_DIR_PATH = os.path.join(os.getcwd(), 'data') # inputs of the model will be stored here

In [6]:
import torch
from torchvision.datasets import MNIST
from torchvision import transforms

# We usually normalize the image data with their mean and std. deviation so that the data has mean and std. deviation as 0.
# MNIST data has mean as 0.1307 and std. deviation as 0.3081.
# More information could be found here: https://datascience.stackexchange.com/questions/46228/how-mean-and-deviation-come-out-with-mnist-dataset

mean, std = 0.1307, 0.3081

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((mean, ), (std, ))
])

mnist_train_dataset = MNIST(root=DATA_DIR_PATH , train=True, download=True, transform=transform)

mnist_test_dataset  = MNIST(root=DATA_DIR_PATH , train=False, download=True, transform=transform)

# Since there 10 digits which would be used to classify. hence there are 10 classes.
n_classes = 10 

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /mnt/d/Contrastive Learning/data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

Extracting /mnt/d/Contrastive Learning/data/MNIST/raw/train-images-idx3-ubyte.gz to /mnt/d/Contrastive Learning/data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /mnt/d/Contrastive Learning/data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

Extracting /mnt/d/Contrastive Learning/data/MNIST/raw/train-labels-idx1-ubyte.gz to /mnt/d/Contrastive Learning/data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /mnt/d/Contrastive Learning/data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

Extracting /mnt/d/Contrastive Learning/data/MNIST/raw/t10k-images-idx3-ubyte.gz to /mnt/d/Contrastive Learning/data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /mnt/d/Contrastive Learning/data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

Extracting /mnt/d/Contrastive Learning/data/MNIST/raw/t10k-labels-idx1-ubyte.gz to /mnt/d/Contrastive Learning/data/MNIST/raw
Processing...


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Done!


## Common Setup

In [8]:
import torch
from torch.optim import lr_scheduler
import torch.optim as optim
from torch.autograd import Variable

from trainer import fit
import numpy as np

cuda = torch.cuda.is_available()

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

mnist_classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
colors        = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
              '#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
              '#bcbd22', '#17becf']


def plot_embeddings(embeddings, targets, xlim=None, ylim=None):
    plt.figure(figsize=(10,10))

    for i in range(10):
        inds = np.where(targets == i)[0]
        plt.scatter(embeddings[inds, 0], embeddings[inds, 1], alpha=0.5, colors=colors[i])

    if xlim:
        plt.xlim(xlim[0], xlim[1])
    if ylim:
        plt.ylim(ylim[0], ylim[1])
    plt.legend(mnist_classes)


def extract_embeddings(dataloader, model):
    with torch.no_grad():
        model.eval()
        embeddings = np.zeros((len(dataloader.dataset), 2))
        labels     = np.zeros(len(dataloader.dataset))

        k = 0

        for images, target in dataloader:
            if cuda:
                images = images.cuda()
            embeddings[k:k+len(images)] = model.get_embedding(images).data.cpu().numpy()
            labels[k:k+len(images)]     = target.numpy()
            k += len(images)
    return embeddings, labels





## Baseline: Classification with Softmax

We'll train the model for classification and use outputs of penultimate layer as embedding

In [12]:
# Set up the dataloaders
batch_size = 256
kwargs     = {'num_workers': 1, 'pin_memory': True} if cuda else {}

train_loader = torch.utils.data.DataLoader(mnist_train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
test_loader  = torch.utils.data.DataLoader(mnist_test_dataset, batch_size=batch_size, shuffle=False, **kwargs)

# Set up the network and training parameters
from networks import EmbeddingNet, ClassificationNet
from metrics import AccumulatedAccuracyMetric

embedding_net = EmbeddingNet()
model         = ClassificationNet(embedding_net, n_classes=n_classes)

if cuda:
    model.cuda()
    
loss_fn = torch.nn.NLLLoss()
lr      = 1e-2

optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)
n_epochs  = 20
log_interval = 50

TypeError: __init__() missing 2 required positional arguments: 'embedding_net' and 'n_classes'

In [10]:
fit(train_loader, test_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, log_interval, metrics=[AccumulatedAccuracyMetric()])

NameError: name 'train_loader' is not defined