In [1]:
from matplotlib import pyplot as plt
import numpy as np
from torchvision import transforms, datasets
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,)),  # mean value = 0.1307, standard deviation value = 0.3081
])

In [3]:
data_path = './MNIST'

data_test   = datasets.MNIST(root = data_path, train= True, download=True, transform= transform)
data_train  = datasets.MNIST(root = data_path, train= False, download=True, transform= transform)

In [4]:
print("the number of your training data (must be 10,000) = ", data_train.__len__())
print("hte number of your testing data (must be 60,000) = ", data_test.__len__())

the number of your training data (must be 10,000) =  10000
hte number of your testing data (must be 60,000) =  60000


In [5]:
data_loader = torch.utils.data.DataLoader(data_train, batch_size=20, shuffle=True, drop_last=True)

In [10]:
import torch.nn as nn
import torch.nn.functional as F
import torch

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # layer definition
        self.fc1 = nn.Linear(784, 256, bias = True)
        self.fc2 = nn.Linear(256, 256, bias = True)
        self.fc3 = nn.Linear(256, 10, bias = True)
        self.dropout = nn.Dropout(p=0.2)
        self.RELU = nn.ReLU()
        
        # weight init
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.xavier_uniform_(self.fc3.weight)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.RELU(x)
        x = self.dropout(x)
        
        x = self.fc2(x)
        x = self.RELU(x)
        x = self.dropout(x)
        
        x = self.fc3(x)
        return x
        
net = Net()

In [11]:
net

Net(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (RELU): ReLU()
)

In [13]:
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-4, weight_decay=1e-3)
scheduler = StepLR(optimizer, step_size=1, gamma=0.1)

In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [15]:
net.to(device)

Net(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (RELU): ReLU()
)

In [16]:
train_l = []
train_acc = []

test_l = []
test_acc = []

In [None]:
net.train()
num_epochs = 20
for epoch in range(num_epochs):  # loop over the dataset multiple times
    running_loss = 0.0
    for X, Y in data_loader: 
        X = X.view(-1, 28*28).to(device) 
        Y = Y.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        outputs = net(X)
        loss = criterion(outputs, Y)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()

    with torch.no_grad(): 
        net.eval() 
        X_train = data_train.data.view(-1, 28*28).float().to(device) 
        Y_train = data_train.targets.to(device) 
        prediction = net(X_train)
        train_loss = criterion(prediction, Y_train) 
        predicted_classes = torch.argmax(prediction, 1) 
        correct_count = (predicted_classes == Y_train) 
        train_accuracy = correct_count.float().mean()
        train_l.append(train_loss.item()) 
        train_acc.append(train_accuracy.item())

        X_test = data_test.data.view(-1, 28*28).float().to(device) 
        Y_test = data_test.targets.to(device) 
        prediction = net(X_test)
        test_loss = criterion(prediction, Y_test) 
        predicted_classes = torch.argmax(prediction, 1) 
        correct_count = (predicted_classes == Y_test) 
        test_accuracy = correct_count.float().mean()

        test_l.append(test_loss.item()) 
        test_acc.append(test_accuracy.item())

    print("train acc: {} loss: {}".format(train_accuracy, train_loss))
    print("test acc: {} loss: {}".format(test_accuracy, test_loss))

print('Finished Training')

train acc: 0.12489999830722809 loss: 99.99818420410156
test acc: 0.12626667320728302 loss: 100.08952331542969
