In [0]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch
import numpy as np
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable

In [0]:
# parametrs 
input_size=784  # input size of the image (28*28=784)
hidden_size=400 # hiiden layer neuron size
out_size=10  # out put layer label size
epochs=30     
batch_size=100    
learning_rate=0.001

In [7]:
train_dataset=datasets.MNIST(root='.\data', train=True, transform=transforms.ToTensor(),download=True)
test_dataset=datasets.MNIST(root='.\data', train=False, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [0]:
# make data itrable to loading it to a loader,  shuffle the training data to make it independent of the order
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size, shuffle=True)
test_loader=torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


In [9]:
print(len(train_loader))
print(len(train_loader))

600
600


In [0]:
# Neural Net Initialization
class Net(nn.Module):
    def __init__(self,input_size,hidden_size,out_size):
        super(Net, self).__init__()
        self.fc1=nn.Linear(input_size, hidden_size) # first Layer
        self.relu=nn.ReLU()
        self.fc2=nn.Linear(hidden_size,hidden_size) # hidden Layer1
        self.fc3=nn.Linear(hidden_size,out_size)
    def forward(self, x):
        out=self.fc1(x)
        out=self.relu(out)
        out=self.fc2(out)
        out=self.relu(out)
        out=self.fc3(out)
        return out


In [0]:
# create an object of  a class
net=Net(input_size,hidden_size,out_size)
CUDA=torch.cuda.is_available()
if CUDA:
    net=net.cuda()
# the LOSS function comes along softmax, no need to declare loss function
criterian=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(net.parameters(), lr=learning_rate)


In [12]:
# training
correct_train=0
total_train=0
for epoch in range(epochs):
    for i, (images,labels) in enumerate(train_loader):
        # flattern the image from(batch,1,28,28) -->(100,1,28,28) where 1 represent the channel gray scale
        # to size (100,784) and wrap it in avariable
        images=Variable(images.view(-1,28*28))
        labels=Variable(labels)
        
        if CUDA:
            images=images.cuda()
            labels=labels.cuda()
        # clear the param_grad in param=param - lr*param_grad so it wont accumulated
        optimizer.zero_grad()
        outputs=net(images)  # forward pass
        _,predicted=torch.max(outputs.data,1) # prediction
        total_train +=labels.size(0)
        if CUDA:
            correct_train +=(predicted.cpu()==labels.cpu()).sum()
        else:
            correct_train +=(predicted==labels).sum()
        loss=criterian(outputs,labels)   # difference between actual and predicted(loss function)
        loss.backward()  # backpropagation
        optimizer.step()  # update the weights
        if (i+1)%100==0 :
            print('epoch[{}/{}],Iteration[{}/{}],trainingloss :{},trainingAccuracy:{}%'.format(epoch+1,epochs,i+1,len(train_dataset)//batch_size,loss.data[0],(100*correct_train/total_train)))
        



epoch[1/30],Iteration[100/600],trainingloss :0.30798834562301636,trainingAccuracy:82%
epoch[1/30],Iteration[200/600],trainingloss :0.22854702174663544,trainingAccuracy:86%
epoch[1/30],Iteration[300/600],trainingloss :0.1981799453496933,trainingAccuracy:89%
epoch[1/30],Iteration[400/600],trainingloss :0.15040524303913116,trainingAccuracy:90%
epoch[1/30],Iteration[500/600],trainingloss :0.09021374583244324,trainingAccuracy:91%
epoch[1/30],Iteration[600/600],trainingloss :0.08337249606847763,trainingAccuracy:92%
epoch[2/30],Iteration[100/600],trainingloss :0.2190277874469757,trainingAccuracy:92%
epoch[2/30],Iteration[200/600],trainingloss :0.06463165581226349,trainingAccuracy:93%
epoch[2/30],Iteration[300/600],trainingloss :0.034868475049734116,trainingAccuracy:93%
epoch[2/30],Iteration[400/600],trainingloss :0.08129429817199707,trainingAccuracy:94%
epoch[2/30],Iteration[500/600],trainingloss :0.09453517198562622,trainingAccuracy:94%
epoch[2/30],Iteration[600/600],trainingloss :0.04430421

In [14]:
# testing sets
correct=0
total=0
for images,labels in test_loader:
    images=Variable(images.view(-1,28*28))
    
    if CUDA:
        images=images.cuda()
    output=net(images)
    _,predict=torch.max(output.data, 1)
    total +=labels.size(0)
    if CUDA:
        correct +=(predict.cpu()==labels.cpu()).sum()
    else:
        correct +=(predict==labels).sum()
print('final test accuracy: %d %%' %( 100 * correct/total ))

final test accuracy: 98 %
