In [None]:
#From mnist example in pytorch example

In [2]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torch.utils.data
import time

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [4]:
def train( model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [5]:
device = torch.device("cpu")
batch_size = 64
#kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_dataset =  datasets.MNIST('../data', train=True, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))
test_dataset =  datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))

model1_train = torch.utils.data.Subset(train_dataset,range(25000))
model2_train = torch.utils.data.Subset(train_dataset,range(25000,50000))
combine_train = torch.utils.data.Subset(train_dataset,range(50000,60000))

model1_train_loader = torch.utils.data.DataLoader(model1_train,batch_size=batch_size, shuffle=True)
model2_train_loader = torch.utils.data.DataLoader(model2_train,batch_size=batch_size, shuffle=True)
combine_train_loader = torch.utils.data.DataLoader(combine_train,batch_size=batch_size, shuffle=True)

train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=batch_size, shuffle=True)


In [15]:
model = Net()

optimizer = optim.SGD(model.parameters(), lr=0.05)

single_model_start_time = time.time()
for epoch in range(1, 20):
    train( model, device, train_loader, optimizer, epoch)
    test( model, device, test_loader)
single_model_end_time = time.time()    


Test set: Average loss: 0.1212, Accuracy: 9625/10000 (96%)


Test set: Average loss: 0.0810, Accuracy: 9750/10000 (98%)


Test set: Average loss: 0.0586, Accuracy: 9826/10000 (98%)


Test set: Average loss: 0.0531, Accuracy: 9832/10000 (98%)


Test set: Average loss: 0.0486, Accuracy: 9849/10000 (98%)


Test set: Average loss: 0.0468, Accuracy: 9849/10000 (98%)


Test set: Average loss: 0.0437, Accuracy: 9861/10000 (99%)


Test set: Average loss: 0.0424, Accuracy: 9880/10000 (99%)


Test set: Average loss: 0.0403, Accuracy: 9883/10000 (99%)


Test set: Average loss: 0.0379, Accuracy: 9887/10000 (99%)


Test set: Average loss: 0.0406, Accuracy: 9882/10000 (99%)


Test set: Average loss: 0.0381, Accuracy: 9877/10000 (99%)


Test set: Average loss: 0.0362, Accuracy: 9894/10000 (99%)


Test set: Average loss: 0.0329, Accuracy: 9906/10000 (99%)




Test set: Average loss: 0.0324, Accuracy: 9897/10000 (99%)


Test set: Average loss: 0.0332, Accuracy: 9893/10000 (99%)


Test set: Average loss: 0.0343, Accuracy: 9904/10000 (99%)


Test set: Average loss: 0.0301, Accuracy: 9899/10000 (99%)


Test set: Average loss: 0.0312, Accuracy: 9898/10000 (99%)



In [None]:
#train both models separately on differnt datasets
model1 = Net()
model2 = Net()

optimizer1 = optim.SGD(model1.parameters(), lr=0.05)

model1_start_time = time.time()
for epoch in range(1, 20):
    train( model1, device, model1_train_loader, optimizer1, epoch)
    test( model1, device, test_loader)
model1_end_time = time.time()

optimizer2 = optim.SGD(model2.parameters(), lr=0.05)
model2_start_time = time.time()
for epoch in range(1, 20):
    train( model2, device, model2_train_loader, optimizer2, epoch)
    test( model2, device, test_loader)   
model2_end_time = time.time()    

In [14]:
#Combined training
combine_train_start_time = time.time()
for epoch in range(1, 20):
    model1.train()
    model2.train()
    for batch_idx, (data, target) in enumerate(combine_train_loader):
        data, target = data.to(device), target.to(device)
        optimizer1.zero_grad()
        optimizer2.zero_grad()
        output1 = model1(data)
        output2 = model2(data)
        pred1 = output1.max(1, keepdim=True)[1]
        pred2 = output2.max(1, keepdim=True)[1]
        loss1 = F.nll_loss(output1, pred2.view(-1))
        loss1.backward()
        optimizer1.step()
        loss2 = F.nll_loss(output2, pred1.view(-1))
        loss2.backward()
        optimizer2.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(combine_train_loader.dataset),
                100. * batch_idx / len(combine_train_loader), loss1.item(), loss2.item))
combine_train_end_time = time.time()            



In [16]:
single_model_end_time-single_model_start_time

839.3560137748718

In [21]:
model1_end_time-model1_start_time + combine_train_end_time -combine_train_start_time

705.5945563316345

In [19]:
test( model2, device, test_loader) 
test( model1, device, test_loader) 


Test set: Average loss: 0.0736, Accuracy: 9776/10000 (98%)


Test set: Average loss: 0.0735, Accuracy: 9772/10000 (98%)

