Q1-1)

In [5]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pylab as plt
import random
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 파라미터 설정 (learning rate, training epochs, batch_size)
learning_rate = 0.1
training_epochs = 15
batch_size = 100


#train과 test set으로 나누어 MNIST data 불러오기
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

#dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)
train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

# Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)
linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)
relu = torch.nn.ReLU()
bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)
dropout = torch.nn.Dropout(p=0.3)

#xavier initialization을 이용하여 각 layer의 weight 초기화
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)

bn_model = torch.nn.Sequential(linear1, bn1, relu, dropout, 
                            linear2, bn2, relu, dropout, 
                            linear3).to(device)

# Loss Function 정의하기 (CrossEntropy를 사용할 것!)
criterion = torch.nn.CrossEntropyLoss().to(device) 

#optimizer 정의하기 (Adam optimizer를 사용할 것!)
bn_optimizer = torch.optim.Adam(bn_model.parameters(), lr=learning_rate)

#cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)
train_losses = []
train_accs = []
test_total_batch = len(test_loader)

valid_losses = []
valid_accs = []


#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
for epoch in range(training_epochs):
    bn_model.train()
    avg_cost=0
    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        bn_optimizer.zero_grad()
        bn_prediction = bn_model(X)
        bn_loss = criterion(bn_prediction, Y)
        bn_loss.backward()
        bn_optimizer.step()

        avg_cost += bn_loss / train_total_batch
        
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것

with torch.no_grad():
    bn_model.eval() 
    bn_loss, nn_loss, bn_acc, nn_acc = 0, 0, 0, 0
    
    for i, (X, Y) in enumerate(train_loader):
            X = X.view(-1, 28 * 28).to(device)
            Y = Y.to(device)

            bn_prediction = bn_model(X)
            bn_correct_prediction = torch.argmax(bn_prediction, 1) == Y
            bn_loss += criterion(bn_prediction, Y)
            bn_acc += bn_correct_prediction.float().mean()

    bn_loss, bn_acc = bn_loss / train_total_batch, bn_acc / train_total_batch
    
    train_losses.append([bn_loss, nn_loss])
    train_accs.append([bn_acc, nn_acc])
    
    print("Accuracy: ", bn_acc.item())
    
    
    
    ##Test set에서 random으로 data를 뽑아 Label과 Prediction을 비교하는 코드 
    r = random.randint(0, len(mnist_test)-1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 *28).float()
    Y_single_data = mnist_test.test_labels[r:r + 1]
    
    print('Label: ', Y_single_data.item())
    single_prediction = bn_model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())


Epoch: 0001 cost = 0.512086689
Epoch: 0002 cost = 0.367699087
Epoch: 0003 cost = 0.330312788
Epoch: 0004 cost = 0.317997634
Epoch: 0005 cost = 0.297968209
Epoch: 0006 cost = 0.289918840
Epoch: 0007 cost = 0.278227717
Epoch: 0008 cost = 0.263488799
Epoch: 0009 cost = 0.261512011
Epoch: 0010 cost = 0.254014283
Epoch: 0011 cost = 0.258301258
Epoch: 0012 cost = 0.248831883
Epoch: 0013 cost = 0.242640913
Epoch: 0014 cost = 0.240847632
Epoch: 0015 cost = 0.240734071
Learning finished
Accuracy:  0.9788986444473267
Label:  7
Prediction:  7




Q1-2)

In [6]:
linear1 = torch.nn.Linear(784, 200, bias=True)
linear2 = torch.nn.Linear(200, 150, bias=True)
linear3 = torch.nn.Linear(150, 10, bias=True)


bn1 = torch.nn.BatchNorm1d(200)
bn2 = torch.nn.BatchNorm1d(150)

In [7]:
#xavier initialization을 이용하여 각 layer의 weight 초기화
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)

bn_model = torch.nn.Sequential(linear1, bn1, relu, dropout, 
                            linear2, bn2, relu, dropout, 
                            linear3).to(device)

# Loss Function 정의하기 (CrossEntropy를 사용할 것!)
criterion = torch.nn.CrossEntropyLoss().to(device) 

#optimizer 정의하기 (Adam optimizer를 사용할 것!)
bn_optimizer = torch.optim.Adam(bn_model.parameters(), lr=learning_rate)

#cost 계산을 위한 변수 설정
train_total_batch = len(train_loader)
train_losses = []
train_accs = []
test_total_batch = len(test_loader)

valid_losses = []
valid_accs = []


#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
for epoch in range(training_epochs):
    bn_model.train()
    avg_cost=0
    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        bn_optimizer.zero_grad()
        bn_prediction = bn_model(X)
        bn_loss = criterion(bn_prediction, Y)
        bn_loss.backward()
        bn_optimizer.step()

        avg_cost += bn_loss / train_total_batch
        
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것

with torch.no_grad():
    bn_model.eval() 
    bn_loss, nn_loss, bn_acc, nn_acc = 0, 0, 0, 0
    
    for i, (X, Y) in enumerate(train_loader):
            X = X.view(-1, 28 * 28).to(device)
            Y = Y.to(device)

            bn_prediction = bn_model(X)
            bn_correct_prediction = torch.argmax(bn_prediction, 1) == Y
            bn_loss += criterion(bn_prediction, Y)
            bn_acc += bn_correct_prediction.float().mean()

    bn_loss, bn_acc = bn_loss / train_total_batch, bn_acc / train_total_batch
    
    train_losses.append([bn_loss, nn_loss])
    train_accs.append([bn_acc, nn_acc])
    
    print("Accuracy: ", bn_acc.item())

Epoch: 0001 cost = 0.460198015
Epoch: 0002 cost = 0.331224680
Epoch: 0003 cost = 0.291717321
Epoch: 0004 cost = 0.280150235
Epoch: 0005 cost = 0.260297269
Epoch: 0006 cost = 0.243855476
Epoch: 0007 cost = 0.232145831
Epoch: 0008 cost = 0.238162726
Epoch: 0009 cost = 0.241705492
Epoch: 0010 cost = 0.225788429
Epoch: 0011 cost = 0.218761802
Epoch: 0012 cost = 0.216190457
Epoch: 0013 cost = 0.213295698
Epoch: 0014 cost = 0.209682345
Epoch: 0015 cost = 0.202548966
Learning finished
Accuracy:  0.9821653962135315


Cost는 감소하고 accuracy는 증가하였다!