In [0]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable

In [0]:
input_size=784
hidden_size=400
output_size=10
epochs=10
batch_size=100
learning_rate=0.001

In [0]:
train_dataset= datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset=datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

In [0]:
train_loader=torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader=torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [0]:
class Net(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(Net,self).__init__()
    self.fc1=nn.Linear(input_size,hidden_size) # 1. layer
    self.relu=nn.ReLU() 
    self.fc2=nn.Linear(hidden_size, hidden_size) # 2.layer
    self.fc3=nn.Linear(hidden_size, output_size)# 3. layer
    
  def forward(self,x):
    output=self.fc1(x)
    output=self.relu(output)
    output=self.fc2(output)
    output=self.relu(output)
    output=self.fc3(output)
    return output

In [0]:
net=Net(input_size, hidden_size, output_size)
CUDA=torch.cuda.is_available()
if CUDA:
  net=net.cuda()

In [14]:
#print(list(net.parameters()))
net.parameters

<bound method Module.parameters of Net(
  (fc1): Linear(in_features=784, out_features=400, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=400, out_features=400, bias=True)
  (fc3): Linear(in_features=400, out_features=10, bias=True)
)>

In [0]:
# Loss and optimizer
criterion=nn.CrossEntropyLoss() # classification problem, more than 2 classifications
optimizer=torch.optim.Adam(net.parameters(), lr=learning_rate)

In [33]:
# training
correct_train=0
total_train=0
for epoch in range(epochs):
  for i, (images, labels) in enumerate(train_loader):
    images=Variable(images.view(-1,28*28)) # flatten image data
    labels=Variable(labels)
    if CUDA: # transfer to CUDA
      images=images.cuda()
      labels=labels.cuda()
    optimizer.zero_grad() # clear param_grad in eq. param=param-(lr*pram_grad)
    outputs=net(images) # forward pass
    max_predicted_data, predicted= torch.max(outputs.data,1) # find the max of softmax and map the predicted list
    
    total_train+=labels.size(0) # increment batch size
    
    if CUDA: 
      correct_train+=(predicted.cpu()==labels.cpu()).sum() # sum() function works only on CPU, not on GPU  
    else:
      correct_train+=(predicted==labels).sum()
     
    loss=criterion(outputs,labels) # loss 
    loss.backward() # backpropagation

    optimizer.step() # update weights
    
    if(i+1)%100==0:
      #print('Epoch [{}/{}], Iteration [{}/{}]%'.format
      #     (epoch+1,epochs, i+1,len(train_dataset)//batch_size))
      print('Epoch [{}/{}], Iteration [{}/{}], Training Loss:{}, Training Accuracy:{}%'.format
           (epoch+1,epochs, i+1,len(train_dataset)//batch_size, loss.data, (100*correct_train/total_train)))
print("Training model is finished!")

Epoch [1/10], Iteration [100/600], Training Loss:0.09528876096010208, Training Accuracy:96%
Epoch [1/10], Iteration [200/600], Training Loss:0.20748241245746613, Training Accuracy:96%
Epoch [1/10], Iteration [300/600], Training Loss:0.10818041115999222, Training Accuracy:96%
Epoch [1/10], Iteration [400/600], Training Loss:0.04674253612756729, Training Accuracy:96%
Epoch [1/10], Iteration [500/600], Training Loss:0.10555576533079147, Training Accuracy:96%
Epoch [1/10], Iteration [600/600], Training Loss:0.08904946595430374, Training Accuracy:97%
Epoch [2/10], Iteration [100/600], Training Loss:0.04823092371225357, Training Accuracy:97%
Epoch [2/10], Iteration [200/600], Training Loss:0.04080823436379433, Training Accuracy:97%
Epoch [2/10], Iteration [300/600], Training Loss:0.04175937548279762, Training Accuracy:97%
Epoch [2/10], Iteration [400/600], Training Loss:0.024706119671463966, Training Accuracy:97%
Epoch [2/10], Iteration [500/600], Training Loss:0.019872425124049187, Training

In [35]:
# test
correct=0
total=0
for images, labels in test_loader:
  images=Variable(images.view(-1,28*28))
  if CUDA:
    images=images.cuda()
  outputs=net(images)
  _,predicted=torch.max(outputs.data,1)
  total+=labels.size(0)
  if CUDA:
    correct+=(predicted.cpu()==labels.cpu()).sum()
  else:
    correct+=(predicted==labels).sum()
  print("Test Accuracy: %d " % (100*correct/total))



Test Accuracy: 100 
Test Accuracy: 98 
Test Accuracy: 98 
Test Accuracy: 98 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 96 
Test Accuracy: 96 
Test Accuracy: 96 
Test Accuracy: 96 
Test Accuracy: 96 
Test Accuracy: 96 
Test Accuracy: 96 
Test Accuracy: 96 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 96 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accuracy: 97 
Test Accura