In [33]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# Device configuration - If you have CUDA configured, you must use it. Try training with CPU and observe what happens
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#Setting a seed for torch
'''step - Your seed will be the last 6 digits of your A# excluding any leading zeros'''
torch.manual_seed(278024) 


<torch._C.Generator at 0x7f26683ae090>

In [34]:
#Check your Current Working Directory
!pwd

3602.64s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


/home/sean/sourcecode/school/deep-learning/hw3


In [35]:
#Set Batch Size
'''Step - Set the correct batch size. '''
batch_size = 20


# Download MNIST dataset to local drive. A new folder "data" will be created in the current directory to store data
train_dataset = torchvision.datasets.MNIST(root='./data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data', 
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader for shuffling and batching.
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)



In [36]:
# Hyper-parameters - We are specifying these apriori
#Network Architecture
input_size = 784
'''Output layer has 10 nodes because we want to predict 10 classes'''
num_classes = 10 

#Training Parameters
''' - Define the number of epochs and observe the changes'''
num_epochs = 10


# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        '''
        Step - Define the N/w architecture. Use RELU Activation
        '''
        ''' Step - Define a Linear Unit with input size and hidden size''' 
        self.fc1 = nn.Linear(input_size, hidden_size)
        ''' Step - Define a RELU Activation unit'''
        self.relu = nn.ReLU()
        ''' Step - Define a Linear Unit with input size and output size (number of classes for MNIST)''' 
        self.fc2 = nn.Linear(hidden_size, num_classes) 
    
    def forward(self, x):
        '''
        Step - Forward Propagate through the layers as defined above. Fill in params in place of ...
        '''
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out



# Define the Loss Function and optimizer
'''Step - Define a proper loss function'''
criterion = nn.CrossEntropyLoss() 



In [37]:
import itertools



# Train the model
"""step - choose different learning rates and store them in a list and observe the changes """
learning_rates = [.1]
"""Vary this number and observe the changes, define a list of possible values"""
hidden_sizes = [100, 1000] 


for hidden_size, learning_rate in itertools.product(hidden_sizes, learning_rates):
    total_step = len(train_loader)
    #Define the Model Object and your optimizer
    model = NeuralNet(input_size, hidden_size, num_classes).to(device)
    """Step - Invoke an appropriate optimizer that takes a ...?"""  
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):  
            # Move tensors to the configured device
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)

            # Forward pass
            """Step - Get Network outputs with forward propagation with current network weights"""
            outputs = model(images)
            """Step - Get Loss by comparing outputs with True Labels after forward propagation"""
            loss = criterion(outputs, labels)

            # Backward and optimize

            """Step - ... below needs to be replaced with functions"""
            """Step - clear the gradients after each pass - Strongly recommended"""
            optimizer.zero_grad()
            """Backpropagate the Loss to calculate gradient for each weight"""
            loss.backward()
            """Update the weight using the learning rate"""
            optimizer.step()

            #Print Progress every 100 steps
            if (i+1) % 100 == 0:
                print (
                    f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{total_step}], "
                    f"Loss: {loss.item():.4f}"
                )
                
    # Test the model once you finish training 

    with torch.no_grad(): # In test phase, we don"t need to compute gradients (for memory efficiency)
        correct = 0
        total = 0
        for images, labels in test_loader:
            """Step - Move images to device after appropriate reshaping"""
            images = images.reshape(-1, 28*28).to(device)
            """Step  - Move labels to device"""
            labels = labels.to(device)


            #get network outputs
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Accuracy of the network on the 10000 test images: {(100 * correct / total)} %, with learning rate: {learning_rate}, and {hidden_size} hidden neurons")

Epoch [1/10], Step [100/3000], Loss: 0.5199
Epoch [1/10], Step [200/3000], Loss: 0.3954
Epoch [1/10], Step [300/3000], Loss: 0.5237
Epoch [1/10], Step [400/3000], Loss: 0.2990
Epoch [1/10], Step [500/3000], Loss: 0.4709
Epoch [1/10], Step [600/3000], Loss: 0.2801
Epoch [1/10], Step [700/3000], Loss: 0.1271
Epoch [1/10], Step [800/3000], Loss: 0.0724
Epoch [1/10], Step [900/3000], Loss: 0.3436
Epoch [1/10], Step [1000/3000], Loss: 0.1587
Epoch [1/10], Step [1100/3000], Loss: 0.2273
Epoch [1/10], Step [1200/3000], Loss: 0.1285
Epoch [1/10], Step [1300/3000], Loss: 0.5680
Epoch [1/10], Step [1400/3000], Loss: 0.4211
Epoch [1/10], Step [1500/3000], Loss: 0.1628
Epoch [1/10], Step [1600/3000], Loss: 0.2717
Epoch [1/10], Step [1700/3000], Loss: 0.0979
Epoch [1/10], Step [1800/3000], Loss: 0.4230
Epoch [1/10], Step [1900/3000], Loss: 0.0553
Epoch [1/10], Step [2000/3000], Loss: 0.1103
Epoch [1/10], Step [2100/3000], Loss: 0.0917
Epoch [1/10], Step [2200/3000], Loss: 0.2444
Epoch [1/10], Step 

In [38]:
# Save the model checkpoint for future use
torch.save(model.state_dict(), 'model.ckpt')