In [1]:
#import the necessary packages
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time
import psutil


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|███████████████████████████| 9912422/9912422 [00:00<00:00, 11760833.64it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|█████████████████████████████████| 28881/28881 [00:00<00:00, 795819.69it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|████████████████████████████| 1648877/1648877 [00:00<00:00, 7130622.98it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|███████████████████████████████████| 4542/4542 [00:00<00:00, 445772.39it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [2]:
#define transformations to prepare dataset for training neural network
#ToTensor - converts PIL Image/ Numpy Arrays into PyTorch tensor
#Normalize - normalizes tensor images with mean and sd
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

#load datasets as train and test
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

#create dataloaders
#load in 64 samples at a time
#shuffled at every epoch to prevent learning unintended patterns/ overfitting
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

#define a simple neural network
class SimpleNN(nn.Module):
    #initializes layers of the neural network
    def __init__(self):
        #constructor of parent class
        super(SimpleNN, self).__init__()
        #defines 3 linear (fully connected) layers
        self.fc1 = nn.Linear(28 * 28, 128) #matches dimension size of input images, with 128 features in the layer
        self.fc2 = nn.Linear(128, 64) 
        self.fc3 = nn.Linear(64, 10) #10 matches number of classification classes

    #defines forward pass of the neural network
    def forward(self, x):
        #flattens input tensor
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        #should return classification class, a digit 0-9
        return x

In [3]:
#function to get system metrics (cpu usage and memory)
def get_system_metrics():
    cpu_usage = psutil.cpu_percent()
    memory_info = psutil.virtual_memory()
    return cpu_usage, memory_info.percent

In [4]:
#function for training loop
#set number of epoch to 5
def train_model(model, train_loader, criterion, optimizer, device, num_epochs=5):
    model.train() #sets model to training mode
    total_training_time = 0  #initialize total training time
    model.to(device)  # move model to the specified device
    #loops over each 5 epoch
    for epoch in range(num_epochs):
        #starts timer for time parameters
        start_time = time.time()
        epoch_loss = 0
        #inner loop iterates over the batches of data from the training dataset
        for batch_idx, (data, target) in enumerate(train_loader): 
            data, target = data.to(device), target.to(device)  # move data and target to the specified device
            optimizer.zero_grad() #clears the gradients of optimized tensors
            output = model(data) #passes training data through model
            loss = criterion(output, target) #calculates loss (how well the model's predictions match the target values)
            loss.backward()
            optimizer.step() #updates model params
            epoch_loss += loss.item()

        end_time = time.time()
        total_epoch_time = end_time - start_time #calculates total time taken for epoch
        total_training_time += total_epoch_time  # Accumulate total training time

        cpu_usage, memory_usage = get_system_metrics()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(train_loader):.4f}, Time: {total_epoch_time:.2f}s, CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage}%')

    #print total training time after all epochs
    print(f'Total Training Time: {total_training_time:.2f}s')

In [5]:
#inference loop
#create function for evaluation, with model and test data as parameters
def evaluate_model(model, test_loader, device):
    model.eval() #sets model to evaluation mode
    model.to(device)  # move model to the specified device
    #initialize metircs
    total_correct = 0
    total_samples = 0
    total_inference_time = 0

    with torch.no_grad(): #disables gradient calculation (reduces memory usage and speeds up)
        for batch_idx, (data, target) in enumerate(test_loader): #loops through batches from the test dataset
            data, target = data.to(device), target.to(device)  # move data and target to the specified device
            start_time = time.time()
            output = model(data)
            end_time = time.time()
            
            inference_time = end_time - start_time #calculates inference time for current batch
            total_inference_time += inference_time #adds up each inference time
            
            _, predicted = torch.max(output.data, 1) #finds the class w highest predicted score for each sample in the batch
            total_correct += (predicted == target).sum().item() #compares predicted with actual label, counts the total num of correct predictions
            total_samples += target.size(0) #gets the number of samples in the current batch and adds to count of total samples processed

    accuracy = total_correct / total_samples 
    avg_inference_time = total_inference_time / len(test_loader)
    throughput = total_samples / total_inference_time #computes the num of samples processed per second
    
    cpu_usage, memory_usage = get_system_metrics() #uses function from above
    
    print(f'Accuracy: {accuracy:.4f}, Average Inference Time: {avg_inference_time:.4f}s, Throughput: {throughput:.2f} samples/s, CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage}%')


In [6]:
#function to run the entire workflow on a specified device
def run_experiment(device):
    #initialize the model
    model = SimpleNN()
    #define loss function and optimizer
    criterion = nn.CrossEntropyLoss() #CEL measures how well the model's predictions match the actual labels, best for classification
    optimizer = optim.SGD(model.parameters(), lr=0.01) #SGD updates the model params 

    #train the model by running training loop
    train_model(model, train_loader, criterion, optimizer, device, num_epochs=5)
    #evaluate the model using function 
    evaluate_model(model, test_loader, device)

In [7]:
#run the experiment on CPU
print("Running on CPU:")
run_experiment('cpu')


Running on CPU:
Epoch [1/5], Loss: 1.0518, Time: 2.18s, CPU Usage: 15.8%, Memory Usage: 84.3%
Epoch [2/5], Loss: 0.3828, Time: 2.13s, CPU Usage: 85.0%, Memory Usage: 84.3%
Epoch [3/5], Loss: 0.3252, Time: 2.09s, CPU Usage: 86.9%, Memory Usage: 84.3%
Epoch [4/5], Loss: 0.2941, Time: 2.11s, CPU Usage: 82.7%, Memory Usage: 84.3%
Epoch [5/5], Loss: 0.2694, Time: 2.07s, CPU Usage: 81.2%, Memory Usage: 84.3%
Total Training Time: 10.58s
Accuracy: 0.9281, Average Inference Time: 0.0001s, Throughput: 956337.27 samples/s, CPU Usage: 82.4%, Memory Usage: 84.4%


In [8]:
#run the experiment on GPU (if available)
if torch.cuda.is_available():
    print("Running on GPU:")
    run_experiment('cuda')
else:
    print("CUDA is not available. Skipping GPU run.")


CUDA is not available. Skipping GPU run.
