In [6]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device}:", torch.cuda.get_device_name())

Using cuda: GeForce RTX 3050 Laptop GPU


### Hyperparameters

In [3]:
input_size = 784 # 28x28 images flattened
hidden_size = 128
num_classes = 10 # 10 digits
num_epochs = 5
batch_size = 100
learning_rate = 0.001


**input_size:**  
Each MNIST image is 28×28 pixels, which we flatten to a 784-element vector  

**hidden_size:**  
Number of neurons in our hidden layer  

**num_classes:**  
We have 10 possible outputs (digits 0-9)  

**num_epochs:**  
Number of complete passes through the training dataset  

**batch_size:**  
Number of images processed together  

**learning_rate:**  
Controls how much we adjust our model in response to errors  

### Data Preparation

In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [7]:
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

100%|██████████| 9.91M/9.91M [00:38<00:00, 258kB/s] 
100%|██████████| 28.9k/28.9k [00:00<00:00, 120kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 965kB/s] 
100%|██████████| 4.54k/4.54k [00:00<00:00, 2.28MB/s]


In [10]:
from torch.utils.data import DataLoader
train_loader = DataLoader(dataset = train_dataset, batch_size=batch_size, shuffle=True)

#### Neural network architecture

In [12]:
class NeuralNet(nn.Module):
    def __init__(self,input_size,hidden_size,num_classes):
        super(NeuralNet,self).__init__()
        self.fc1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size,hidden_size)
        self.fc3 = nn.Linear(hidden_size,num_classes)
    
    def forward(self,x):
        out = x.reshape(-1,input_size)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out
        

#### Model instance

In [13]:
model = NeuralNet(input_size,hidden_size,num_classes).to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [16]:
import time
import matplotlib.pyplot as plt
def train_model():
    total_step = len(train_loader)
    train_losses = []
    
    start_time = time.time()
    for epoch in range(num_epochs):
        running_loss = 0.0
        
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            
            if (i+1) % 100 == 0:
                print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}')
                
        avg_loss = running_loss / total_step
        train_losses.append(avg_loss)
        print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.4f}')
    end_time = time.time()
    print(f'Training time: {end_time - start_time:.2f} seconds')
    
    plt.figure(figsize=(10,5))
    plt.plot(range(1, num_epochs + 1), train_losses, 'b-',label='Training Loss')
    plt.title('Training Loss vs. Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    return train_losses
            
    

In [17]:
def save_model():
    torch.save(model.state_dict(), 'model.pth')
    print('Model saved to model.pth')

In [None]:
train_model()

Epoch [1/5], Step [100/600], Loss: 0.2977
Epoch [1/5], Step [200/600], Loss: 0.1595
Epoch [1/5], Step [300/600], Loss: 0.3387
Epoch [1/5], Step [400/600], Loss: 0.1361
Epoch [1/5], Step [500/600], Loss: 0.2144
Epoch [1/5], Step [600/600], Loss: 0.1226
Epoch [1/5], Average Loss: 0.2897
Epoch [2/5], Step [100/600], Loss: 0.0689
