In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import torch.optim as optim
import math

In [2]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        nb_hidden = 200
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(5*5 * 64, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.fc1(x.view(-1, 5*5 * 64)))
        x = self.fc2(x)
        # print("x = ",x.size(), x)
        return x

In [3]:
#2 
def train_model(model, train_input, train_target, mini_batch_size):
    # We do this with mini-batches
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=η, momentum=0.3)
    # print("Size = ", train_input.size(), train_target.size())
    acc_loss = 0
    for b in range(0, train_input.size(0), mini_batch_size):
        #--------------
          # torch.narrow(input, dim, start, length) → Tensor
        #--------------
        # print("train_input.narrow",train_input.narrow(0, b, mini_batch_size).size())
        output = model(train_input.narrow(0, b, mini_batch_size))#表示取出train_input中第0维上索引从b开始到index+mini_batch_size-1的所有元素
        loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
        acc_loss = acc_loss + loss.item()
        model.zero_grad()
        loss.backward()
        optimizer.step()

    return model, acc_loss 

In [4]:
import dlc_practical_prologue as prologue
N = 1000
data = prologue.generate_pair_sets(N)
train_data = data[:3]
test_data = data[3:]

train_input, train_target, train_classes = train_data[0], train_data[1], train_data[2]
test_input, test_target, test_classes = test_data[0], test_data[1], test_data[2]
print(train_input.size(), train_target.size(), train_classes.size())

train_input_1 =  train_input[:,0].unsqueeze(1)
train_input_2 =  train_input[:,1].unsqueeze(1)
train_classes_1 =  train_classes[:,0]
train_classes_2 =  train_classes[:,1]
test_input_1 =  test_input[:,0].unsqueeze(1)
test_input_2 =  test_input[:,1].unsqueeze(1)
test_classes_1 =  test_classes[:,0]
test_classes_2 =  test_classes[:,1]
print(test_input_1.size(), test_classes_1.size(),test_classes_1[1])

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw

torch.Size([1000, 2, 14, 14]) torch.Size([1000]) torch.Size([1000, 2])
torch.Size([1000, 1, 14, 14]) torch.Size([1000]) tensor(4)


In [5]:
def compute_nb_errors(model, data_input, data_target, mini_batch_size):
  error = 0
  output_class = torch.ones(data_input.size(0))
  for b in range(0, data_input.size(0), mini_batch_size): 
    output = model(data_input.narrow(0, b, mini_batch_size))
    
    for i in range(mini_batch_size): 
      output_class[b+i] = torch.argmax(output[i])
      if output_class[b+i] != data_target[b+i]:
        error += 1
  acc = 1 - (error/data_input.size(0))
  return acc, output_class

In [34]:
model = Net()
η = 0.002 #If it too big, it will fail!
mini_batch_size = 50
nb_epochs =30
#print(train_input, train_target)
for e in range(nb_epochs):
    #print(train_input_1, train_classes_1)
    model, acc_loss = train_model(model, train_input_1, train_classes_1, mini_batch_size)
    acc, output_class_1 = compute_nb_errors(model, test_input_1, test_classes_1, mini_batch_size)
    # print('epoch :', e, ' loss :', round(acc_loss))
    # print('acc : ', acc)
    # print('----------')
print('test_input_1 acc : ', acc)
model1 = model #save first input training model

test_input_1 acc :  0.933


In [35]:
# [Method 1: Train on the same Model] train_input_1 & train_input_2  
for e in range(nb_epochs):
    model, acc_loss = train_model(model, train_input_2, train_classes_2, mini_batch_size)
    acc, output_class_2 = compute_nb_errors(model, test_input_2, test_classes_2, mini_batch_size)
    # print('epoch :', e, ' loss :', round(acc_loss))
    # print('acc : ', acc)
    # print('----------')
print('test_input_2 acc : ', acc)

test_input_2 acc :  0.942


In [36]:
# [Method 2: Train on two Models] train_input_1 & train_input_2  
model2 = Net()
for e in range(nb_epochs):
    model2, acc_loss2 = train_model(model2, train_input_2, train_classes_2, mini_batch_size)
    acc2, output_class2_2 = compute_nb_errors(model2, test_input_2, test_classes_2, mini_batch_size)
    # print('epoch :', e, ' loss :', round(acc_loss2))
    # print('acc : ', acc2)
    # print('----------')
print('test_input_2 acc : ', acc2)

test_input_2 acc :  0.938


In [9]:
def compute_num_errors(data_ouput, data_target):
  error = 0
  for i in range(data_ouput.size(0)): 
      if data_ouput[i] != data_target[i]:
        error += 1
  acc = 1 - (error/data_ouput.size(0))
  return acc

In [10]:
def predict_target(data_output1, data_output2):
  output = torch.ones(len(data_output1))
  for i in range(len(data_output1)):
    if data_output1[i].item() > data_output2[i].item():
        output[i] = 0
    else:
        output[i] = 1
    
    #print("**",test_classes_1[i], test_classes_2[i], test_target[i])
    #print(data_output1[i].item(), data_output2[i].item(), output[i])
  return output

In [37]:
# [Method 1: Train on the same Model] train_input_1 & train_input_2  
acc1, output_class_1 = compute_nb_errors(model, test_input_1, test_classes_1, mini_batch_size)
acc2, output_class_2 = compute_nb_errors(model, test_input_2, test_classes_2, mini_batch_size)
target_output = predict_target(output_class_1, output_class_2)
#print(target_output.size())
target_acc = compute_num_errors(target_output, test_target)
print('acc : ', target_acc)

acc :  0.964


In [38]:
# [Method 2: Train on two Models] train_input_1 & train_input_2  
acc1, output_class_2_1 = compute_nb_errors(model1, test_input_1, test_classes_1, mini_batch_size)
acc2, output_class_2_2 = compute_nb_errors(model2, test_input_2, test_classes_2, mini_batch_size)
target_output2 = predict_target(output_class_2_1, output_class_2_2)
#print(target_output.size())
target_acc = compute_num_errors(target_output2, test_target)
print('acc : ', target_acc)

acc :  0.962


In [71]:
for e in range(nb_epochs):
    model, acc_loss = train_model(model, train_input_2, train_classes_2, mini_batch_size)
    acc = compute_nb_errors(model, test_input_2, test_classes_2, mini_batch_size)
    print('epoch :', e, ' loss :', round(acc_loss))
    print('acc : ', acc)
    print('----------')

epoch : 0  loss : 0
acc :  (0.951, tensor([6., 2., 6., 7., 6., 6., 7., 8., 2., 6., 9., 9., 3., 4., 7., 1., 0., 1.,
        3., 4., 1., 1., 2., 2., 8., 8., 3., 5., 9., 2., 0., 7., 2., 3., 6., 2.,
        6., 1., 4., 7., 9., 4., 7., 7., 4., 8., 8., 4., 9., 4., 1., 5., 0., 1.,
        5., 1., 8., 7., 1., 9., 8., 5., 6., 4., 4., 2., 4., 1., 2., 8., 3., 3.,
        6., 1., 4., 7., 1., 5., 8., 8., 4., 1., 1., 3., 6., 9., 8., 4., 7., 0.,
        3., 5., 4., 8., 0., 9., 7., 4., 1., 2., 3., 7., 7., 3., 8., 9., 0., 7.,
        2., 2., 7., 5., 9., 2., 4., 9., 6., 5., 1., 5., 9., 9., 6., 3., 7., 6.,
        3., 0., 2., 4., 0., 8., 0., 4., 7., 4., 3., 1., 3., 1., 5., 5., 8., 7.,
        8., 7., 9., 8., 3., 2., 3., 6., 3., 6., 8., 2., 6., 1., 1., 8., 1., 9.,
        0., 3., 9., 4., 4., 5., 1., 0., 3., 7., 7., 1., 6., 7., 6., 4., 8., 3.,
        3., 7., 1., 2., 8., 0., 1., 5., 3., 7., 1., 8., 7., 2., 4., 5., 8., 5.,
        2., 7., 7., 1., 1., 7., 3., 9., 7., 3., 9., 0., 0., 8., 6., 6., 1., 2.,
     

In [72]:
model1 = Net()
model2 = Net()
η = 0.2
mini_batch_size = 2
nb_epochs = 25
#print(train_input, train_target)
for e in range(nb_epochs):
    print("Data = ", train_data[0].size(), train_data[2].size())
    model, acc_loss = train_model(model1, train_data[0][0], train_data[2][0], mini_batch_size)

    acc = compute_nb_errors(model, test_input, test_target)
    print('epoch :', e, ' loss :', round(acc_loss, 3))
    print('acc : ', acc)
    print('----------')

for e in range(nb_epochs):
    model, acc_loss = train_model(model2, train_data[0][1], train_data[2][1], mini_batch_size)

    acc = compute_nb_errors(model, test_input, test_target)
    print('epoch :', e, ' loss :', round(acc_loss, 3))
    print('acc : ', acc)
    print('----------')

Data =  torch.Size([1000, 2, 14, 14]) torch.Size([1000, 2])


RuntimeError: ignored

In [None]:
class Net2(nn.Module):
    def __init__(self):
        super().__init__()
        nb_hidden = 200
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(2*32, 2*32, kernel_size=5)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
        self.fc1 = nn.Linear(9 * 64, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x1 = x[0]
        print("x1= ", x1)
        x1 = F.relu(F.max_pool2d(self.conv1(x1), kernel_size=2))
        x1 = F.relu(F.max_pool2d(self.conv2(x1), kernel_size=2))
        x1 = F.relu(self.conv3(x1))
        x1 = F.relu(self.fc1(x1.view(-1, 9 * 64)))
        x1 = self.fc2(x1)
        x2 = x[1]
        x2 = F.relu(F.max_pool2d(self.conv1(x2), kernel_size=2))
        x2 = F.relu(F.max_pool2d(self.conv2(x2), kernel_size=2))
        x2 = F.relu(self.conv3(x2))
        x2 = F.relu(self.fc1(x2.view(-1, 9 * 64)))
        x2 = self.fc2(x2)
        y = [x1, x2]
        return y