In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, utils
from torch. utils.data import DataLoader
from torchvision.transforms import Resize, Grayscale, ToTensor, Compose, Normalize

In [2]:
# For reproducibility
import numpy as np

np.random.seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.abenchmark = False
torch.manual_seed(0)

<torch._C.Generator at 0x7f69712bd510>

# Init Models

In [3]:
batch_size = 128
test_batch_size = 100

# Transformations
# Mnist to size 32, svhn to grayscale
# Normalize all datasets
mnist_transformations = Compose([
    Resize(32),
    ToTensor(),
    Normalize((0.5,), (0.5,))
])
svhn_transformations = Compose([
    Grayscale(num_output_channels=1),
    ToTensor(),
    Normalize((0.5,), (0.5,))
])
# Data Source
svhn_train = datasets.SVHN('../data', split='train', download=True,
                           transform=svhn_transformations)
svhn_test = datasets.SVHN('../data', split='test', download=True,
                          transform=svhn_transformations)
mnist_train = datasets.MNIST('../data', train=True, download=True,
                             transform=mnist_transformations)
mnist_test = datasets.MNIST('../data', train=False, download=True,
                            transform=mnist_transformations)

# Data loaders
svhn_train_loader = DataLoader(svhn_train,
                               batch_size=batch_size, shuffle=True)
svhn_test_loader = DataLoader(svhn_test,
                              batch_size=test_batch_size, shuffle=True)
mnist_test_loader = DataLoader(mnist_test,
                               batch_size=test_batch_size, shuffle=True)
mnist_train_loader = DataLoader(mnist_train,
                                batch_size=batch_size, shuffle=True)

Using downloaded and verified file: ../data/train_32x32.mat
Using downloaded and verified file: ../data/test_32x32.mat


In [4]:
# Try to use cuda
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

In [5]:
# Definition of Encoder network
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.seq1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(stride=2, kernel_size=3, padding=1),
            nn.Conv2d(32, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(stride=2, kernel_size=3, padding=1),
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.seq2 = nn.Sequential(
            nn.Linear(4096, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.seq1(x)
        x = x.view(x.size(0), 4096)
        x = F.dropout(self.seq2(x), training=self.training)
        return x

In [6]:
# Definition of classifier network
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.seq = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.seq(x)

In [7]:
# Send models to cuda
cls1_model = Classifier().to(device)
cls2_model = Classifier().to(device)
enc_model = Encoder().to(device)

In [8]:
# Method for xavier initialization
def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.xavier_normal_(m.weight)
        m.bias.data.fill_(0.01)

In [9]:
# Apply xavier initialization
print(cls1_model.apply(init_weights))
print(cls2_model.apply(init_weights))
print(enc_model.apply(init_weights))

Classifier(
  (seq): Sequential(
    (0): Linear(in_features=2048, out_features=1024, bias=True)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=1024, out_features=10, bias=True)
  )
)
Classifier(
  (seq): Sequential(
    (0): Linear(in_features=2048, out_features=1024, bias=True)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=1024, out_features=10, bias=True)
  )
)
Encoder(
  (seq1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True

# Init tensorboard writer

In [10]:
# Init tensorboard writer
from torch.utils.tensorboard import SummaryWriter

tb = SummaryWriter()

In [11]:
with torch.no_grad():
    images_mnist, labels = next(iter(mnist_train_loader))
    images_mnist = images_mnist.to(device)
    temp = enc_model(images_mnist)
    
    # Send mnist images to tensorboard
    grid = utils.make_grid(images_mnist)
    tb.add_image("original images mnist", grid)
    
    # Send graphs of the model to tensorboard
    tb.add_graph(enc_model, images_mnist)
    tb.add_graph(cls1_model, temp)
    del temp
    torch.cuda.empty_cache()
    
with torch.no_grad():
    images_svhn, labels = next(iter(svhn_train_loader))
    images_svhn = images_svhn.to(device)
    
    # Send svhn images to tensorboard
    grid = utils.make_grid(images_svhn)
    tb.add_image("original images svhn", grid)
    # Send graphs of the model to tensorboard
    torch.cuda.empty_cache()

def get_images_and_encoding(n, loader):
    # Get n batches of images from loader and return their encoded version
    with torch.no_grad(): # So that our cuda memory will be flushed after usage
        images, labels = next(iter(loader))
        images = images.to(device)
        temp = enc_model(images)
        enc = temp.to('cpu')
        del temp
        torch.cuda.empty_cache()
        
        for i in range(n - 1):
            imgs, l = next(iter(loader))
            with torch.no_grad():
                imgs = imgs.to(device)
                labels = torch.cat((labels, l), 0)
                temp = enc_model(imgs)
                enc = torch.cat((enc, temp.to('cpu')), 0)
                del temp
                torch.cuda.empty_cache()
        return enc, labels


def snapshot_latent_space(flavour_text):
    # Get latent space images from mnist and svhn, send them to tensorboard for view later
    enc, labels = get_images_and_encoding(10, mnist_train_loader)
    enc2, labels2 = get_images_and_encoding(10, svhn_train_loader)
    enc = torch.cat((enc, enc2), 0)
    labels = torch.cat((labels, labels2), 0)
    tb.add_embedding(enc,
                     metadata=labels + 10 * torch.cat((torch.zeros(enc.shape[0] // 2), torch.ones(enc.shape[0] // 2)),0),
                     tag=flavour_text)

In [12]:
# Make snapshot of latent space
snapshot_latent_space("Latent Space without training")

In [13]:
# Create optimizers and learning rate schedulers
opt_enc = optim.Adam(enc_model.parameters(),
                     lr=0.0002, weight_decay=0.0005)
opt_cls1 = optim.Adam(cls1_model.parameters(),
                      lr=0.0002, weight_decay=0.0005)
opt_cls2 = optim.Adam(cls2_model.parameters(),
                      lr=0.0002, weight_decay=0.0005)
enc_scheduler = optim.lr_scheduler.MultiStepLR(opt_enc, milestones=[10, 20, 30, 40], gamma=0.1)
cls1_scheduler = optim.lr_scheduler.MultiStepLR(opt_cls1, milestones=[10, 20, 30, 40], gamma=0.1)
cls2_scheduler = optim.lr_scheduler.MultiStepLR(opt_cls2, milestones=[10, 20, 30, 40], gamma=0.1)

In [14]:
# Discrepancy function
def discrepancy(out1, out2):
    # We use softmax here as classifiers doesn't perform it, as we use their output in various losses
    return torch.mean(torch.abs(F.softmax(out1, dim=1) - F.softmax(out2, dim=1)))

In [15]:
# Function to test the performance of the network
def test(test_loader):
    enc_model.eval()
    cls1_model.eval()
    cls2_model.eval()
    correct_amount = 0
    size = 0

    # Iterate through test loader
    for batch_idx, (data, labels) in enumerate(test_loader):
        data, labels = data.to(device), labels.to(device)

        feat = enc_model(data)
        output1 = cls1_model(feat)
        output2 = cls2_model(feat)
        # Get ensemble of the output
        output_ensemble = output1 + output2
        pred_ensemble = output_ensemble.data.max(1)[1]
        k = labels.data.size()[0]
        correct_amount += pred_ensemble.eq(labels.data).cpu().sum()
        size += k
    return 100. * correct_amount / size

# Train/Test

In [16]:
# Parameters
source_only_epochs = 2 # Number of epochs to train the classifiers for source domain
n_epochs = 50
num_k = 4  # Number of time we repeat last step
cross_entropy_loss = nn.CrossEntropyLoss().to(device)
svhn_train_res = []
svhn_test_res = []
mnist_test_res = []

In [17]:
# Train the network on source only before performing domain adaptation
for epoch in range(source_only_epochs):
    cls1_model.train()
    cls2_model.train()
    enc_model.train()

    for idx, (x_s, y_s) in enumerate(svhn_train_loader):
        x_s, y_s = x_s.to(device), y_s.to(device)
        opt_cls1.zero_grad()
        opt_cls2.zero_grad()
        opt_enc.zero_grad()

        # STEP 1 of the training - simply learn on source data
        feat_s = enc_model(x_s)
        out_s1 = cls1_model(feat_s)
        out_s2 = cls2_model(feat_s)

        loss_s1 = cross_entropy_loss(out_s1, y_s)
        loss_s2 = cross_entropy_loss(out_s2, y_s)
        loss_s = loss_s1 + loss_s2
        loss_s.backward()
        opt_enc.step()
        opt_cls1.step()
        opt_cls2.step()
acc1 = test(svhn_train_loader)
acc2 = test(svhn_test_loader)
acc3 = test(mnist_test_loader)
print(f"Accuracy after source only learning")
print("SVHN Train Accuracy", acc1.item())
print("SVHN Test Accuracy", acc2.item())
print("MNIST Test Accuracy", acc3.item())

Accuracy after source only learning
SVHN Train Accuracy 93.14331817626953
SVHN Test Accuracy 89.92394256591797
MNIST Test Accuracy 62.4900016784668


In [18]:
# Training of the model
for epoch in range(n_epochs):
    cls1_model.train()
    cls2_model.train()
    enc_model.train()

    # Create iterator over mnist (as we will use it inside of the training)
    mn = iter(mnist_train_loader)
    for idx, (x_s, y_s) in enumerate(svhn_train_loader):

        # Try to get images from iterator, if it ended - reload
        try:
            x_t, _ = mn.next()
            x_t = x_t.to(device)
        except StopIteration:
            mn = iter(mnist_train_loader)
            x_t, _ = mn.next()
            x_t = x_t.to(device)
        x_s, y_s = x_s.to(device), y_s.to(device)
        opt_cls1.zero_grad()
        opt_cls2.zero_grad()
        opt_enc.zero_grad()

        # STEP 1 of the training - simply learn on source data
        feat_s = enc_model(x_s)
        out_s1 = cls1_model(feat_s)
        out_s2 = cls2_model(feat_s)

        loss_s1 = cross_entropy_loss(out_s1, y_s)
        loss_s2 = cross_entropy_loss(out_s2, y_s)
        loss_s = loss_s1 + loss_s2
        loss_s.backward()
        opt_enc.step()
        opt_cls1.step()
        opt_cls2.step()

        opt_cls1.zero_grad()
        opt_cls2.zero_grad()
        opt_enc.zero_grad()

        # STEP 2 of the training - maximize discrepancy
        feat_s = enc_model(x_s)
        out_s1 = cls1_model(feat_s)
        out_s2 = cls2_model(feat_s)

        feat_t = enc_model(x_t)
        out_t1 = cls1_model(feat_t)
        out_t2 = cls2_model(feat_t)

        loss_s1 = cross_entropy_loss(out_s1, y_s)
        loss_s2 = cross_entropy_loss(out_s2, y_s)
        loss_s = loss_s1 + loss_s2
        loss_dis = discrepancy(out_t1, out_t2)
        loss = loss_s - loss_dis
        loss.backward()
        opt_cls1.step()
        opt_cls2.step()

        opt_cls1.zero_grad()
        opt_cls2.zero_grad()
        opt_enc.zero_grad()

        # Step 3 of the training - minimize discrepancy, repeat it num_k times
        for i in range(num_k):
            feat_t = enc_model(x_t)
            out_t1 = cls1_model(feat_t)
            out_t2 = cls2_model(feat_t)
            loss_dis = discrepancy(out_t1, out_t2)
            loss_dis.backward()
            opt_enc.step()

            opt_cls1.zero_grad()
            opt_cls2.zero_grad()
            opt_enc.zero_grad()

    # Test the network after each epoch and report the results
    svhn_train_res.append(test(svhn_train_loader))
    svhn_test_res.append(test(svhn_test_loader))
    mnist_test_res.append(test(mnist_test_loader))
    print(f"\n\nEpoch {epoch}\n")
    print("SVHN Train Accuracy", svhn_train_res[-1].item())
    print("SVHN Test Accuracy", svhn_test_res[-1].item())
    print("MNIST Test Accuracy", mnist_test_res[-1].item())
    tb.add_scalar("SVHN Train Accuracy", svhn_train_res[-1])
    tb.add_scalar("SVHN Test Accuracy", svhn_test_res[-1])
    tb.add_scalar("MNIST Test Accuracy", mnist_test_res[-1])

    # Save states of the networks
    torch.save(enc_model.state_dict(), "encoder.pt")
    torch.save(cls1_model.state_dict(), "classifier1.pt")
    torch.save(cls2_model.state_dict(), "classifier2.pt")

    # Step into the learning rate scheduler
    enc_scheduler.step()
    cls1_scheduler.step()
    cls2_scheduler.step()



Epoch 0

SVHN Train Accuracy 58.5732421875
SVHN Test Accuracy 58.48571014404297
MNIST Test Accuracy 81.76000213623047


Epoch 1

SVHN Train Accuracy 69.25208282470703
SVHN Test Accuracy 69.6412124633789
MNIST Test Accuracy 82.06999969482422


Epoch 2

SVHN Train Accuracy 62.930503845214844
SVHN Test Accuracy 62.27335739135742
MNIST Test Accuracy 87.19999694824219


Epoch 3

SVHN Train Accuracy 74.95802307128906
SVHN Test Accuracy 73.77842712402344
MNIST Test Accuracy 90.48999786376953


Epoch 4

SVHN Train Accuracy 81.39154052734375
SVHN Test Accuracy 79.64044189453125
MNIST Test Accuracy 93.0199966430664


Epoch 5

SVHN Train Accuracy 83.137451171875
SVHN Test Accuracy 81.5842056274414
MNIST Test Accuracy 94.1500015258789


Epoch 6

SVHN Train Accuracy 75.00579833984375
SVHN Test Accuracy 73.93976593017578
MNIST Test Accuracy 95.27999877929688


Epoch 7

SVHN Train Accuracy 83.92645263671875
SVHN Test Accuracy 82.6329116821289
MNIST Test Accuracy 95.12000274658203


Epoch 8

SVHN Tr

In [19]:
# Make snapshot of latent space after training
snapshot_latent_space("Latent Space after training all")
tb.close()

In [20]:
from matplotlib import pyplot as plt

plt.title("Accuracy on SVHN train")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.plot(list(range(1, len(svhn_train_res) + 1)), svhn_train_res)
plt.savefig("svhn_train_res.png")

plt.title("Accuracy on SVHN test")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.plot(list(range(1, len(svhn_test_res) + 1)), svhn_test_res)
plt.savefig("svhn_test_res.png")

plt.title("Accuracy on MNIST test")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.plot(list(range(1, len(mnist_test_res) + 1)), mnist_test_res)
plt.savefig("mnist_test_res.png")