## Q1)

In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import matplotlib.pylab as plt
import random

In [2]:
# 파라미터 설정 (learning rate, training epochs, batch_size)
device = torch.device('cuda')
learning_rate = 0.1
training_epochs = 15
batch_size = 100


#train과 test set으로 나누어 MNIST data 불러오기

train_dataset= dsets.MNIST(root='./data',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)
test_dataset = dsets.MNIST(root ='./data',  
                           train = False,  
                           transform = transforms.ToTensor())


#dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, 
                                           batch_size = batch_size, 
                                           shuffle = True,
                                           drop_last=True) 
  
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, 
                                          batch_size = batch_size, 
                                          shuffle = False,
                                          drop_last=True) 


# Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)

linear1 = torch.nn.Linear(784, 100, bias=True)
linear2 = torch.nn.Linear(100, 100, bias=True)
linear3 = torch.nn.Linear(100, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.03)
bn1 = torch.nn.BatchNorm1d(100)
bn2 = torch.nn.BatchNorm1d(100)

#xavier initialization을 이용하여 각 layer의 weight 초기화

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)



# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)

model = torch.nn.Sequential(linear1, bn1, relu, dropout,
                            linear2, bn2,  relu, dropout,
                            linear3).to(device)



# Loss Function 정의하기 (CrossEntropy를 사용할 것!)

criterion = torch.nn.CrossEntropyLoss().to(device)




#optimizer 정의하기 (Adam optimizer를 사용할 것!)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)




#cost 계산을 위한 변수 설정

train_total_batch = len(train_loader)


In [3]:
#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 
model.train()
for epoch in range(training_epochs):
    
    avg_cost = 0

    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        bn_loss = criterion(hypothesis, Y)
        bn_loss.backward()
        optimizer.step()

        avg_cost += bn_loss / train_total_batch

    
      
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')


Epoch: 0001 cost = 0.300638944
Epoch: 0002 cost = 0.195512712
Epoch: 0003 cost = 0.160685763
Epoch: 0004 cost = 0.148514271
Epoch: 0005 cost = 0.139201418
Epoch: 0006 cost = 0.123943336
Epoch: 0007 cost = 0.133087322
Epoch: 0008 cost = 0.118435383
Epoch: 0009 cost = 0.104033753
Epoch: 0010 cost = 0.097112417
Epoch: 0011 cost = 0.104472876
Epoch: 0012 cost = 0.099545717
Epoch: 0013 cost = 0.098316140
Epoch: 0014 cost = 0.088534087
Epoch: 0015 cost = 0.087023184
Learning finished


In [4]:
#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것

with torch.no_grad():
    model.eval()
    accuracy=0

    X_test = test_dataset.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = test_dataset.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Get one and predict
    r = random.randint(0, len(test_dataset) - 1)
    X_single_data = test_dataset.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = test_dataset.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.8269999623298645
Label:  2
Prediction:  2




## Q1-2)

### i) Hidden layer node 수 증가

In [5]:
# 파라미터 설정 (learning rate, training epochs, batch_size)
device = torch.device('cuda:0')
learning_rate = 0.1
training_epochs = 15
batch_size = 100


#train과 test set으로 나누어 MNIST data 불러오기

train_dataset= dsets.MNIST(root='./data',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)
test_dataset = dsets.MNIST(root ='./data',  
                           train = False,  
                           transform = transforms.ToTensor())


#dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, 
                                           batch_size = batch_size, 
                                           shuffle = True,
                                           drop_last=True) 
  
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, 
                                          batch_size = batch_size, 
                                          shuffle = False,
                                          drop_last=True) 


# Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)

linear1 = torch.nn.Linear(784, 200, bias=True)
linear2 = torch.nn.Linear(200, 150, bias=True)
linear3 = torch.nn.Linear(150, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.03)
bn1 = torch.nn.BatchNorm1d(200)
bn2 = torch.nn.BatchNorm1d(150)

#xavier initialization을 이용하여 각 layer의 weight 초기화

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)



# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)

model = torch.nn.Sequential(linear1, bn1, relu, dropout,
                            linear2, bn2,  relu, dropout,
                            linear3).to(device)



# Loss Function 정의하기 (CrossEntropy를 사용할 것!)

criterion = torch.nn.CrossEntropyLoss().to(device)


#optimizer 정의하기 (Adam optimizer를 사용할 것!)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


#cost 계산을 위한 변수 설정

train_total_batch = len(train_loader)



#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 

for epoch in range(training_epochs):
    model.train()
    avg_cost = 0

    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        bn_loss = criterion(hypothesis, Y)
        bn_loss.backward()
        optimizer.step()

        avg_cost += bn_loss / train_total_batch

    
      
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')



#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것

with torch.no_grad():
    model.eval()
    accuracy=0

    X_test = test_dataset.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = test_dataset.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Get one and predict
    r = random.randint(0, len(test_dataset) - 1)
    X_single_data = test_dataset.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = test_dataset.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())


Epoch: 0001 cost = 0.305562794
Epoch: 0002 cost = 0.179520071
Epoch: 0003 cost = 0.155518338
Epoch: 0004 cost = 0.135117471
Epoch: 0005 cost = 0.127021611
Epoch: 0006 cost = 0.121332474
Epoch: 0007 cost = 0.106230646
Epoch: 0008 cost = 0.103180945
Epoch: 0009 cost = 0.098527424
Epoch: 0010 cost = 0.090761736
Epoch: 0011 cost = 0.088379502
Epoch: 0012 cost = 0.085002348
Epoch: 0013 cost = 0.073775344
Epoch: 0014 cost = 0.083117932
Epoch: 0015 cost = 0.078070484
Learning finished
Accuracy: 0.8282999992370605
Label:  8
Prediction:  5




### ii) Hidden layer node 수 감소

In [6]:
# 파라미터 설정 (learning rate, training epochs, batch_size)
device = torch.device('cuda:0')
learning_rate = 0.1
training_epochs = 15
batch_size = 100


#train과 test set으로 나누어 MNIST data 불러오기

train_dataset= dsets.MNIST(root='./data',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)
test_dataset = dsets.MNIST(root ='./data',  
                           train = False,  
                           transform = transforms.ToTensor())


#dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, 
                                           batch_size = batch_size, 
                                           shuffle = True,
                                           drop_last=True) 
  
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, 
                                          batch_size = batch_size, 
                                          shuffle = False,
                                          drop_last=True) 


# Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)

linear1 = torch.nn.Linear(784, 75, bias=True)
linear2 = torch.nn.Linear(75, 50, bias=True)
linear3 = torch.nn.Linear(50, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.03)
bn1 = torch.nn.BatchNorm1d(75)
bn2 = torch.nn.BatchNorm1d(50)

#xavier initialization을 이용하여 각 layer의 weight 초기화

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)



# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)

model = torch.nn.Sequential(linear1, bn1, relu, dropout,
                            linear2, bn2,  relu, dropout,
                            linear3).to(device)



# Loss Function 정의하기 (CrossEntropy를 사용할 것!)

criterion = torch.nn.CrossEntropyLoss().to(device)


#optimizer 정의하기 (Adam optimizer를 사용할 것!)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


#cost 계산을 위한 변수 설정

train_total_batch = len(train_loader)



#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 

for epoch in range(training_epochs):
    model.train()
    avg_cost = 0

    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        bn_loss = criterion(hypothesis, Y)
        bn_loss.backward()
        optimizer.step()

        avg_cost += bn_loss / train_total_batch

    
      
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')



#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것

with torch.no_grad():
    model.eval()
    accuracy=0

    X_test = test_dataset.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = test_dataset.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Get one and predict
    r = random.randint(0, len(test_dataset) - 1)
    X_single_data = test_dataset.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = test_dataset.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Epoch: 0001 cost = 0.303691298
Epoch: 0002 cost = 0.202788413
Epoch: 0003 cost = 0.170268387
Epoch: 0004 cost = 0.154466957
Epoch: 0005 cost = 0.145586014
Epoch: 0006 cost = 0.140276745
Epoch: 0007 cost = 0.133625925
Epoch: 0008 cost = 0.130788818
Epoch: 0009 cost = 0.114367209
Epoch: 0010 cost = 0.119733892
Epoch: 0011 cost = 0.117148787
Epoch: 0012 cost = 0.110618398
Epoch: 0013 cost = 0.107757166
Epoch: 0014 cost = 0.114211902
Epoch: 0015 cost = 0.097845942
Learning finished
Accuracy: 0.8382999897003174
Label:  4
Prediction:  4




In [8]:
# 파라미터 설정 (learning rate, training epochs, batch_size)
device = torch.device('cuda:0')
learning_rate = 0.1
training_epochs = 15
batch_size = 100


#train과 test set으로 나누어 MNIST data 불러오기

train_dataset= dsets.MNIST(root='./data',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)
test_dataset = dsets.MNIST(root ='./data',  
                           train = False,  
                           transform = transforms.ToTensor())


#dataset loader에 train과 test할당하기(batch size, shuffle, drop_last 잘 설정할 것!)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, 
                                           batch_size = batch_size, 
                                           shuffle = True,
                                           drop_last=True) 
  
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, 
                                          batch_size = batch_size, 
                                          shuffle = False,
                                          drop_last=True) 


# Layer 쌓기 (조건: 3개의 Layer 사용, DropOut 사용 (p=0.3), ReLU 함수 사용, Batch normalization하기)
# 각 Layer의 Hidden node 수 : 1st Layer (784,100), 2nd Layer(100,100),3rd Layer(100,10)

linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 256, bias=True)
linear3 = torch.nn.Linear(256, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.03)
bn1 = torch.nn.BatchNorm1d(256)
bn2 = torch.nn.BatchNorm1d(256)

#xavier initialization을 이용하여 각 layer의 weight 초기화

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)



# torch.nn.Sequential을 이용하여 model 정의하기(쌓는 순서: linear-Batch Normalization Layer - ReLU- DropOut)

model = torch.nn.Sequential(linear1, bn1, relu, dropout,
                            linear2, bn2,  relu, dropout,
                            linear3).to(device)



# Loss Function 정의하기 (CrossEntropy를 사용할 것!)

criterion = torch.nn.CrossEntropyLoss().to(device)




#optimizer 정의하기 (Adam optimizer를 사용할 것!)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)




#cost 계산을 위한 변수 설정

train_total_batch = len(train_loader)



#Training epoch (cost 값 초기 설정(0으로)과 model의 train 설정 꼭 할 것) 

for epoch in range(training_epochs):
    model.train()
    avg_cost = 0

    
#train dataset을 불러오고(X,Y 불러오기), back propagation과 optimizer를 사용하여 loss를 최적화하는 코드

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        bn_loss = criterion(hypothesis, Y)
        bn_loss.backward()
        optimizer.step()

        avg_cost += bn_loss / train_total_batch

    
      
        
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')



#test data로 모델의 정확도를 검증하는 코드 (model의 evaluation mode 설정 꼭 할 것)
#X_test 불러올 때 view를 사용하여 차원 변환할 것/ Y_test를 불러올때 labels사용
#accuracy의 초기 값 설정(0으로) 꼭 할 것

with torch.no_grad():
    model.eval()
    accuracy=0

    X_test = test_dataset.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = test_dataset.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Get one and predict
    r = random.randint(0, len(test_dataset) - 1)
    X_single_data = test_dataset.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = test_dataset.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Epoch: 0001 cost = 0.310552359
Epoch: 0002 cost = 0.184533507
Epoch: 0003 cost = 0.150780648
Epoch: 0004 cost = 0.136335269
Epoch: 0005 cost = 0.120968997
Epoch: 0006 cost = 0.118954331
Epoch: 0007 cost = 0.108289070
Epoch: 0008 cost = 0.105005130
Epoch: 0009 cost = 0.090616472
Epoch: 0010 cost = 0.090751357
Epoch: 0011 cost = 0.093109652
Epoch: 0012 cost = 0.086847045
Epoch: 0013 cost = 0.083666965
Epoch: 0014 cost = 0.076678947
Epoch: 0015 cost = 0.077916332
Learning finished
Accuracy: 0.7560999989509583
Label:  0
Prediction:  0




In [12]:
!pip install tensorboardX

Collecting tensorboardX
[?25l  Downloading https://files.pythonhosted.org/packages/af/0c/4f41bcd45db376e6fe5c619c01100e9b7531c55791b7244815bac6eac32c/tensorboardX-2.1-py2.py3-none-any.whl (308kB)
[K     |█                               | 10kB 22.2MB/s eta 0:00:01[K     |██▏                             | 20kB 2.9MB/s eta 0:00:01[K     |███▏                            | 30kB 4.0MB/s eta 0:00:01[K     |████▎                           | 40kB 4.3MB/s eta 0:00:01[K     |█████▎                          | 51kB 3.5MB/s eta 0:00:01[K     |██████▍                         | 61kB 3.7MB/s eta 0:00:01[K     |███████▍                        | 71kB 4.2MB/s eta 0:00:01[K     |████████▌                       | 81kB 4.6MB/s eta 0:00:01[K     |█████████▌                      | 92kB 4.9MB/s eta 0:00:01[K     |██████████▋                     | 102kB 4.6MB/s eta 0:00:01[K     |███████████▊                    | 112kB 4.6MB/s eta 0:00:01[K     |████████████▊                   | 122kB 4.

In [16]:
from tensorboardX import SummaryWriter
writer = SummaryWriter('run/mnist_graph')

In [17]:
writer.add_scalars('loss/L1_loss', 0.2, 13)

AttributeError: ignored