Q 1-1

In [None]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

In [None]:
learning_rate = 0.1
training_epochs = 15
batch_size = 100

In [None]:
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

In [None]:
train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          drop_last=True)

In [None]:
linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.3)
bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

In [None]:
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

In [None]:
model = torch.nn.Sequential(linear1, bn1, relu, dropout,
                            linear2, bn2, relu, dropout,
                            linear3)

In [None]:
criterion = torch.nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
train_total_batch = len(train_loader)

In [None]:
for epoch in range(training_epochs):
    model.train()
    avg_cost=0
    
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28)
        Y = Y

        optimizer.zero_grad()
        prediction = model(X)
        loss = criterion(prediction, Y)
        loss.backward()
        optimizer.step()
        
        avg_cost += loss / train_total_batch

    
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
    avg_cost_o=avg_cost
print('Learning finished')

In [None]:
with torch.no_grad():
    model.eval()
    
    for X_test, Y_test in test_loader:
        X_test = X.view(-1, 28 * 28)
        Y_test = Y

        prediction = model(X_test)
        correct_prediction = torch.argmax(prediction, 1) == Y_test
        accuracy = correct_prediction.float().mean()
        
    print('Accuracy:', accuracy.item())
    accuracy_o= accuracy
    
    r = random.randint(0, len(mnist_test)-1)
    X_single_data = mnist_test.data[r:r + 1].view(-1, 28 *28).float()
    Y_single_data = mnist_test.targets[r:r + 1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Q.1-2

In [None]:
a1 = torch.nn.Linear(784, 200, bias=True)
a2 = torch.nn.Linear(200, 50, bias=True)
a3 = torch.nn.Linear(50, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.3)
a_bn1 = torch.nn.BatchNorm1d(200)
a_bn2 = torch.nn.BatchNorm1d(50)

torch.nn.init.xavier_uniform_(a1.weight)
torch.nn.init.xavier_uniform_(a2.weight)
torch.nn.init.xavier_uniform_(a3.weight)

model_a = torch.nn.Sequential(a1, a_bn1, relu, dropout,
                            a2, a_bn2, relu, dropout,
                            a3)
criterion = torch.nn.CrossEntropyLoss()  
optimizer_a= torch.optim.Adam(model_a.parameters(), lr=learning_rate)

In [None]:
b1 = torch.nn.Linear(784, 50, bias=True)
b2 = torch.nn.Linear(50, 200, bias=True)
b3 = torch.nn.Linear(200, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.3)
b_bn1 = torch.nn.BatchNorm1d(50)
b_bn2 = torch.nn.BatchNorm1d(200)

torch.nn.init.xavier_uniform_(b1.weight)
torch.nn.init.xavier_uniform_(b2.weight)
torch.nn.init.xavier_uniform_(b3.weight)

model_b=torch.nn.Sequential(b1, b_bn1, relu, dropout,
                            b2, b_bn2, relu, dropout,
                            b3)
criterion = torch.nn.CrossEntropyLoss()  
optimizer_b= torch.optim.Adam(model_b.parameters(), lr=learning_rate)

In [None]:
for epoch in range(training_epochs):
    model_a.train()
    model_b.train()
    avg_cost_a=0
    avg_cost_b=0
    loss_a=0
    loss_b=0
    
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28)
        Y = Y

        optimizer_a.zero_grad()
        prediction_a = model_a(X)
        loss_a = criterion(prediction_a, Y)
        loss_a.backward()
        optimizer_a.step()
        
        avg_cost_a += loss_a / train_total_batch
                
        optimizer_b.zero_grad()
        prediction_b = model_b(X)
        loss_b = criterion(prediction_b, Y)
        loss_b.backward()
        optimizer_b.step()
        
        avg_cost_b += loss_b / train_total_batch
    print('Epoch:', '%04d' % (epoch + 1), 'cost a =', '{:.9f}'.format(avg_cost_a),'cost b =', '{:.9f}'.format(avg_cost_b))
    

In [None]:
with torch.no_grad():
    model_a.eval()
    model_b.eval()
    
    for X_test, Y_test in test_loader:
        X_test = X.view(-1, 28 * 28)
        Y_test = Y

        prediction_a = model_a(X_test)
        correct_prediction_a = torch.argmax(prediction_a, 1) == Y_test
        accuracy_a = correct_prediction_a.float().mean()

        prediction_b = model_b(X_test)
        correct_prediction_b = torch.argmax(prediction_b, 1) == Y_test
        accuracy_b = correct_prediction_b.float().mean()
        
    print('Accuracy_a:', accuracy_a.item(), 'Accuracy_b', accuracy_b.item())

In [None]:
print('Accuracy_o:', accuracy_o.item(), 'Accuracy_a:', accuracy_a.item(), 'Accuracy_b', accuracy_b.item())