In [1]:
import torch as t
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms

In [10]:
# Device configuration
device = t.device('cuda' if t.cuda.is_available() else 'cpu')

In [11]:
input_size = 784
hidden_size = 500
batch_size = 64
num_epochs = 5
learning_rate = 0.01
num_classes = 10

In [5]:
train_dataset = torchvision.datasets.MNIST(root='../../data/',
                                           download=True,
                                           transform=transforms.ToTensor(),
                                           train=True)

test_dataset = torchvision.datasets.MNIST(root='../../data/',
                                           transform=transforms.ToTensor(),
                                           train=False)

In [6]:
train_loader = t.utils.data.DataLoader(dataset=train_dataset,
                                      shuffle=True,
                                      batch_size=batch_size)
test_loader = t.utils.data.DataLoader(dataset=test_dataset,
                                      shuffle=True,
                                      batch_size=batch_size)

In [28]:
# Feed Forward NN
# Fully connected neural network with one hidden layer

class FF_NN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FF_NN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
    # Forward
    def forward(self, X):
        out = self.fc1(X)
        out = self.relu1(out)
        out = self.fc2(out)
        return(out)           

In [29]:
model = FF_NN(input_size, hidden_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = t.optim.Adam(model.parameters(), lr=learning_rate)

In [30]:
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        predict = model(images)
        loss = criterion(predict, labels)
        
        # Backward pass
        optimizer.zero_grad() # set the gradients to zero
        loss.backward()
        optimizer.step() # update parameters
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/5], Step [100/600], Loss: 0.2501
Epoch [1/5], Step [200/600], Loss: 0.3223
Epoch [1/5], Step [300/600], Loss: 0.2263
Epoch [1/5], Step [400/600], Loss: 0.0795
Epoch [1/5], Step [500/600], Loss: 0.1081
Epoch [1/5], Step [600/600], Loss: 0.1786
Epoch [2/5], Step [100/600], Loss: 0.0752
Epoch [2/5], Step [200/600], Loss: 0.0563
Epoch [2/5], Step [300/600], Loss: 0.4314
Epoch [2/5], Step [400/600], Loss: 0.0549
Epoch [2/5], Step [500/600], Loss: 0.2403
Epoch [2/5], Step [600/600], Loss: 0.0369
Epoch [3/5], Step [100/600], Loss: 0.0037
Epoch [3/5], Step [200/600], Loss: 0.0294
Epoch [3/5], Step [300/600], Loss: 0.1528
Epoch [3/5], Step [400/600], Loss: 0.0752
Epoch [3/5], Step [500/600], Loss: 0.1525
Epoch [3/5], Step [600/600], Loss: 0.0206
Epoch [4/5], Step [100/600], Loss: 0.0182
Epoch [4/5], Step [200/600], Loss: 0.0343
Epoch [4/5], Step [300/600], Loss: 0.2521
Epoch [4/5], Step [400/600], Loss: 0.2178
Epoch [4/5], Step [500/600], Loss: 0.2209
Epoch [4/5], Step [600/600], Loss:

In [34]:
# Test the model
with t.no_grad():
    total = 0
    correct = 0
    for i, (images,labels) in enumerate(test_loader):
        images = images.reshape(-1, 28*28)
        pred = model(images)
        _, predicted = t.max(pred.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
    print("Test Accuracy:{}%".format(100*(correct/total)))

Test Accuracy:96.89999999999999%


In [35]:
# Save the model
t.save(model.state_dict(), "FF_NN.ckpt")