In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
Input_layer_size = 784 #fixed size of the input 28 x 28 = 748
Out_layer_size = 10 #fixed size of output 10 digits

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # This is for hidden layer of neuron size 10
        # If you want to increase your layer size, change 10 in both
        self.fc1 = nn.Linear(Input_layer_size, 10)  # Input layer to Hidden layer
        # If you want another layer, add one between these two
        self.fc2 = nn.Linear(10, Out_layer_size)   # Hidden layer to Output layer

    def forward(self, x):
        #If you add more layers, you need to change the following logic
        #relu is for hidden layers
        #softmax is the classifier applied only to the output layer
        x = F.relu(self.fc1(x.view(-1, 784)))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x

In [4]:
# Data preprocessing, preprocess the data 
# don't modify this pre processing, since the same preprocessing we apply during inference
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [5]:
# Dataset loading
train_dataset = datasets.MNIST('data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('data', train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

In [6]:
# Training functions
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [7]:
def validate(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [8]:
# Setup device, model, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

# Training loop
for epoch in range(1, 10):  # Adjust the number of epochs as needed
    train(model, device, train_loader, optimizer, epoch)
    validate(model, device, test_loader)


Test set: Average loss: 0.3300, Accuracy: 9087/10000 (91%)


Test set: Average loss: 0.2906, Accuracy: 9160/10000 (92%)


Test set: Average loss: 0.2813, Accuracy: 9186/10000 (92%)


Test set: Average loss: 0.2754, Accuracy: 9203/10000 (92%)


Test set: Average loss: 0.2784, Accuracy: 9183/10000 (92%)


Test set: Average loss: 0.2667, Accuracy: 9242/10000 (92%)


Test set: Average loss: 0.2620, Accuracy: 9229/10000 (92%)


Test set: Average loss: 0.2626, Accuracy: 9236/10000 (92%)


Test set: Average loss: 0.2614, Accuracy: 9244/10000 (92%)



In [9]:
import numpy as np

# Function to save model weights and biases as binary files
# you can save these weignts and biases with any names, but make sure these names match 
# while loading the files into the main.cpp c code we use on DE1-SoC 
def save_weights_and_biases(model):
    for name, parameter in model.named_parameters():
        #parameter.data.cpu().numpy()
        param_data = parameter.data.cpu().numpy().flatten()  
        file_name = f"{name.replace('.', '_')}.bin"  
        param_data.tofile(file_name)
        print(f"Saved {file_name}")

In [10]:
save_weights_and_biases(model)

Saved fc1_weight.bin
Saved fc1_bias.bin
Saved fc2_weight.bin
Saved fc2_bias.bin
