In [3]:
#import necessary packages
import numpy as np
from torch import nn, optim
import torch
from torchvision import transforms, datasets
from torch.utils.data import random_split, DataLoader

In [4]:
#transforms.ToTensor() converts images into numbers with three color channels
#transforms.Normalize() normalizes the tensor with a mean and std
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,),(0.5,))
                                ])

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
#load MNIST dataset to the working directory, and if the file is not alreay there, download it
trainset = datasets.MNIST('MNIST', train = True,
                                    download = True, transform = transform
                                  )
'''testset = datasets.MNIST('MNIST', train = False,
                                    download = True, transform = transform
                                )'''

"testset = datasets.MNIST('MNIST', train = False,\n                                    download = True, transform = transform\n                                )"

In [7]:
#split data into train and test sets
train, test = random_split(trainset, [55000, 5000])

In [8]:
#load the data for training, and give the batchsize accordingly
trainloader = DataLoader(train, batch_size = 64, shuffle=True)
testloader = DataLoader(test, batch_size = 64, shuffle=True)

In [9]:
#see the shape of the images and labels
dataiter = iter(trainloader)
images, labels = dataiter.next()

print(images.shape)
print(labels.shape)

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [10]:
#create the model here 
model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    #nn.Dropout(0.2),
    nn.Linear(128,64),
    nn.ReLU(),
    #nn.Dropout(0.2),
    nn.Linear(64,10),
    nn.LogSoftmax(dim=1)
)
print(model)

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=10, bias=True)
  (5): LogSoftmax(dim=1)
)


In [11]:
#here we give the loss type and optmizer properties
from time import time
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
time0 = time ()
epochs = 30

In [12]:
#train the model here
for epoch in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        images = images.view(images.shape[0],-1)
        

        #1: Feed image to the network       
        output = model.forward(images)
        #2: Calculate the loss
        loss = criterion(output,labels)
        #3: Cleaning the gradient
        optimizer.zero_grad()
        #4: Accumulate the partial partial derivative wet params
        loss.backward()
        #5: Step in the opposite direction of the gradient
        optimizer.step()
        #Add the loss
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(epoch+1, running_loss/len(trainloader)))
print("\nTraining Time (in minutes) =",(time()-time0)/60)


Epoch 1 - Training loss: 2.236738299214563
Epoch 2 - Training loss: 2.0190170716407687
Epoch 3 - Training loss: 1.636873779047367
Epoch 4 - Training loss: 1.217534734481989
Epoch 5 - Training loss: 0.9142357387515001
Epoch 6 - Training loss: 0.7242602819620176
Epoch 7 - Training loss: 0.6089548842158429
Epoch 8 - Training loss: 0.5380934163581493
Epoch 9 - Training loss: 0.4914513547753179
Epoch 10 - Training loss: 0.45844964514984643
Epoch 11 - Training loss: 0.43459881284209184
Epoch 12 - Training loss: 0.41589466965822286
Epoch 13 - Training loss: 0.4010923460125923
Epoch 14 - Training loss: 0.38896381541393527
Epoch 15 - Training loss: 0.3784743438279906
Epoch 16 - Training loss: 0.3697805682067261
Epoch 17 - Training loss: 0.3619996112792991
Epoch 18 - Training loss: 0.3554578417608904
Epoch 19 - Training loss: 0.348635112936067
Epoch 20 - Training loss: 0.3432929854666771
Epoch 21 - Training loss: 0.3380547388348468
Epoch 22 - Training loss: 0.3332468438633653
Epoch 23 - Training

In [24]:
#evaluate the model with the test data here
correct_count, all_count = 0, 0
for images,labels in testloader:
  for i in range(len(labels)):
    img = images[i].view(1, 784) #feed one image at a time
    with torch.no_grad():
      logps = model.forward(img) #feed the image to the forward pass    
      ps = torch.exp(logps)      #take the exponent of the output from forward pass
      probab = list(ps.numpy()[0]) #make a list of the number got from the forward pass
      pred_label = probab.index(max(probab)) #predicted label
      true_label = labels.numpy()[i] #actual label
      if(true_label == pred_label):
        correct_count += 1
      all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

Number Of Images Tested = 5000

Model Accuracy = 0.91


In [14]:
#we can save the model with the command below
torch.save(model, './my_mnist_model.pt') 

In [15]:
#We can feed a new image with right size to the network and make a prediction
from PIL import Image
# Image.open() can also open other image types
img = Image.open("download.png")
# WIDTH and HEIGHT are integers
resized_img = img.resize((28, 28))
resized_img.save("resized_download.png")

In [25]:
#Check the shape of the image
import matplotlib.pyplot as plt 
img = plt.imread('resized_download.png')
img.shape

(28, 28, 4)

In [27]:
#Convert the image to gray scale i.e: image with one channel only
from PIL import Image
img = Image.open("resized_download.png") #for example image size : 28x28x3
img1 = img.convert('L')  #convert a gray scale
print(img1.size)


(28, 28)


In [28]:
#expand one dimension
y = np.expand_dims(img1, axis=0)
print(y.shape)
#y = torch.from_numpy(y)
y = torch.tensor(y,dtype=torch.float)
y.shape
y = y.view(1,784)
y.shape

(1, 28, 28)


torch.Size([1, 784])

In [30]:
#Feed the image to the network
with torch.no_grad():
    logps = model(y)    
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
print("Probability = ", probab)
print("Prediction is: ", pred_label)

Probability =  [8.852069e-14, 7.155863e-07, 0.9999865, 0.0, 0.0, 2.5422273e-15, 2.6654467e-26, 1.7665733e-31, 1.2805907e-05, 9.480968e-23]
Prediction is:  2
