In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [18]:
# Hyperparameters
input_size = 784
hidden_size = 10
num_classes = 10
num_epochs = 100
batch_size = 128
learning_rate = 0.005



# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# MNIST dataset
train_dataset = dsets.MNIST(root='./data',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

# Neural network model
class Net(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, hidden_size)
        self.relu = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out
    
class LinearNet(torch.nn.Module):
    def __init__(self, input_size, num_classes):
        super(LinearNet, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, num_classes)
        
    def forward(self, x):
        out = self.fc1(x)
        return out

# model = Net(input_size, hidden_size, num_classes).to(device)

model = LinearNet(input_size, num_classes).to(device)

# Loss and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Test the model        
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the 10000 test images after {} epoch is : {} %'.format(epoch+1, 100 * correct / total))        

# Test the model
# with torch.no_grad():
#     correct = 0
#     total = 0
#     for images, labels in test_loader:
#         images = images.reshape(-1, 28*28).to(device)
#         labels = labels.to(device)
#         outputs = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

#     print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))


Epoch [1/100], Step [100/469], Loss: 0.3725
Epoch [1/100], Step [200/469], Loss: 0.5069
Epoch [1/100], Step [300/469], Loss: 0.3222
Epoch [1/100], Step [400/469], Loss: 0.3588
Accuracy of the network on the 10000 test images after 1 epoch is : 92.02 %
Epoch [2/100], Step [100/469], Loss: 0.4136
Epoch [2/100], Step [200/469], Loss: 0.2625
Epoch [2/100], Step [300/469], Loss: 0.2484
Epoch [2/100], Step [400/469], Loss: 0.2563
Accuracy of the network on the 10000 test images after 2 epoch is : 92.29 %
Epoch [3/100], Step [100/469], Loss: 0.3060
Epoch [3/100], Step [200/469], Loss: 0.2697
Epoch [3/100], Step [300/469], Loss: 0.2151
Epoch [3/100], Step [400/469], Loss: 0.1575
Accuracy of the network on the 10000 test images after 3 epoch is : 92.61 %
Epoch [4/100], Step [100/469], Loss: 0.3414
Epoch [4/100], Step [200/469], Loss: 0.0838
Epoch [4/100], Step [300/469], Loss: 0.3298
Epoch [4/100], Step [400/469], Loss: 0.1974
Accuracy of the network on the 10000 test images after 4 epoch is : 

In [17]:
for name, param in model.named_parameters():
    print(name, param.size(), param.data)

fc1.weight torch.Size([10, 784]) tensor([[ 0.0266,  0.0298,  0.0208,  ...,  0.0197, -0.0107,  0.0326],
        [-0.0321, -0.0165, -0.0162,  ...,  0.0280, -0.0326, -0.0200],
        [-0.0348, -0.0004,  0.0238,  ..., -0.0194, -0.0023,  0.0220],
        ...,
        [-0.0145, -0.0268, -0.0184,  ..., -0.0173, -0.0189, -0.0331],
        [ 0.0016, -0.0006,  0.0010,  ..., -0.0209,  0.0007,  0.0005],
        [-0.0223,  0.0186,  0.0107,  ...,  0.0254, -0.0232,  0.0088]],
       device='cuda:0')
fc1.bias torch.Size([10]) tensor([-1.4811,  1.7778,  0.3291, -0.8535,  0.4202,  2.4721, -0.8108,  1.4979,
        -2.5406, -0.5720], device='cuda:0')
