In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets 
import torchvision.transforms as transforms

In [2]:
input_size = 784         #Number of input neurons (image pixels) (28 * 28)
hidden_size = 400       #Number of hidden neurons (~ imput_size + out_size / 2)
out_size = 10           #Number of classes (0-9) 
epochs = 20           #How many times we pass our entire dataset into our network 
batch_size = 100        #Input size of the data during one iteration 
learning_rate = 0.001   #How fast we are learning

In [3]:
#https://pytorch.org/docs/stable/torchvision/datasets.html#fashion-mnist
train_dataset = datasets.FashionMNIST(root='./data',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)

test_dataset = datasets.FashionMNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())

In [4]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

![nn mnist](https://user-images.githubusercontent.com/30661597/61593615-5eb8bf00-ac14-11e9-8087-f880971b3543.png)


In [5]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, out_size):
        super(Net, self).__init__()                    
        self.fc1 = nn.Linear(input_size, hidden_size)    #First Layer                           
        self.fc2 = nn.Linear(hidden_size, hidden_size)      #Second Layer Activation
        self.fc3 = nn.Linear(hidden_size, out_size)
        self.relu = nn.ReLU()
        self.init_weights()
        
    def init_weights(self):
        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)

    def forward(self, x):                          
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

In [6]:
#Create an object of the class, which represents our network 
net = Net(input_size, hidden_size, out_size)
CUDA = torch.cuda.is_available()
if CUDA:
    net = net.cuda()
#The loss function. The Cross Entropy loss comes along with Softmax. Therefore, no need to specify Softmax as well
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [7]:
net.parameters

<bound method Module.parameters of Net(
  (fc1): Linear(in_features=784, out_features=400, bias=True)
  (fc2): Linear(in_features=400, out_features=400, bias=True)
  (fc3): Linear(in_features=400, out_features=10, bias=True)
  (relu): ReLU()
)>

In [8]:
# for i, (images, labels) in enumerate(train_loader):   
#         #Flatten the image from size (batch,1,28,28) --> (100,1,28,28) where 1 represents the number of channels (grayscale-->1),
#         # to size (100,784) and wrap it in a variable
#         # 100 is the batch size, 1 is the grayscale channel
#         print(images.size())
#         # -1 figure out the dimension.
#         images = images.view(-1, 28*28)
#         print(images.size())

In [9]:
#Train the network
# epochs
for epoch in range(epochs):
    correct_train = 0
    running_loss = 0
    # iteration - at every iteration we have 256 batches.
    for i, (images, labels) in enumerate(train_loader):   
        #Flatten the image from size (batch,1,28,28) --> (100,1,28,28) where 1 represents the number of channels (grayscale-->1),
        # to size (100,784) and wrap it in a variable
        images = images.view(-1, 28*28)    
        if CUDA:
            images = images.cuda()
            labels = labels.cuda()
            
        outputs = net(images)       
        _, predicted = torch.max(outputs.data, 1)                                              
        correct_train += (predicted == labels).sum() 
        loss = criterion(outputs, labels)                 # Difference between the actual and predicted (loss function)
        running_loss += loss.item()
        optimizer.zero_grad()                             # Set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
        loss.backward()                                   # Backpropagation
        optimizer.step()                                  # Update the weights
        
    if (i+1) % 100 == 0: 
        print('Epoch [{}/{}], Iteration [{}/{}], Training Loss: {:.3f}, Training Accuracy: {:.3f}%'.format
              (epoch+1, epochs, i+1, len(train_dataset)// batch_size, running_loss/len(train_loader), (100*correct_train.double()/len(train_dataset))))
print("DONE TRAINING!")

Epoch [1/20], Iteration [600/600], Training Loss: 0.488, Training Accuracy: 82.727%
Epoch [2/20], Iteration [600/600], Training Loss: 0.355, Training Accuracy: 86.977%
Epoch [3/20], Iteration [600/600], Training Loss: 0.317, Training Accuracy: 88.247%
Epoch [4/20], Iteration [600/600], Training Loss: 0.292, Training Accuracy: 89.162%
Epoch [5/20], Iteration [600/600], Training Loss: 0.278, Training Accuracy: 89.553%
Epoch [6/20], Iteration [600/600], Training Loss: 0.258, Training Accuracy: 90.297%
Epoch [7/20], Iteration [600/600], Training Loss: 0.246, Training Accuracy: 90.598%
Epoch [8/20], Iteration [600/600], Training Loss: 0.235, Training Accuracy: 91.152%
Epoch [9/20], Iteration [600/600], Training Loss: 0.223, Training Accuracy: 91.570%
Epoch [10/20], Iteration [600/600], Training Loss: 0.213, Training Accuracy: 91.988%
Epoch [11/20], Iteration [600/600], Training Loss: 0.203, Training Accuracy: 92.247%
Epoch [12/20], Iteration [600/600], Training Loss: 0.196, Training Accurac

In [10]:
with torch.no_grad():
    correct = 0
    for images, labels in test_loader:
        if CUDA:
            images = images.cuda()
            labels = labels.cuda()
        images = images.view(-1, 28*28)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / len(test_dataset)))

Accuracy of the network on the 10000 test images: 89.67 %
