<a href="https://colab.research.google.com/github/lr-crypto/MNIST-on-FPGA/blob/main/py_AI/colab/my_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# My Model

## MNIST References

* [Colab Pytorch example](https://github.com/rafaela00castro/pytorch-hands-on/blob/master/mnist_cnn.ipynb)
* [Interactive MNIST Demo](https://adamharley.com/nn_vis/cnn/2d.html)

## Get Dataset

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
import matplotlib.pyplot as plt


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device:{device}")


Using device:cpu


## Design Model

In [4]:
# Convolutional Neural Network
class CNN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_layer1= nn.Sequential(
            # convolution layer 1: input 1 channel，output 10 channel, 5x5kernal
            nn.Conv2d(1, 10, kernel_size=5, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(5,stride=2),  # Image size after pooling: 14x14

        )
        self.conv_layer2 = torch.nn.Sequential(
            # convolution layer 2: input 32 channel, output 64 channel, 3x3kernal
            nn.Conv2d(10, 20, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(2,stride=2)
        )
        self.fc1= nn.Linear(20*4*4,50)
        self.fc2=nn.Linear(50,10)
        self.dropout = nn.Dropout(0.2)


    def forward(self, x):
        x = self.conv_layer1(x)
        x = self.conv_layer2(x)
        x = x.view(-1,20*4*4)  # Flatten layer
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return torch.nn.functional.log_softmax(x, dim=1)



In [9]:
net = CNN().to(device)
print(net)

CNN(
  (conv_layer1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=5, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_layer2): Sequential(
    (0): Conv2d(10, 20, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


## Train Model

In [7]:
# Optimized image preprocessing (add normalization)
def get_data_loader(is_train):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))  # Average and standard deviation (SD) of MNIST dataset
    ])
    dataset = MNIST("", is_train, transform=transform, download=True)
    return DataLoader(dataset, batch_size=16, shuffle=True)

def evaluate(test_data, net):
    n_correct = 0
    n_total = 0
    net.eval()
    with torch.no_grad():
        for (x, y) in test_data:
            x, y = x.to(device), y.to(device)
            outputs = net(x)  # 不再需要展平
            preds = torch.argmax(outputs, dim=1)
            n_correct += (preds == y).sum().item()
            n_total += y.size(0)
    return n_correct / n_total


In [11]:
train_data = get_data_loader(is_train=True)
test_data = get_data_loader(is_train=False)

print("Initial accuracy:", evaluate(test_data, net))
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

# training epoch
for epoch in range(3):
    net.train()
    for batch_idx, (x, y) in enumerate(train_data):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        output = net(x)  # Input origin format, no need for flattenning
        # Define loss function
        loss = torch.nn.functional.nll_loss(output, y)
        loss.backward()
        optimizer.step()

    acc = evaluate(test_data, net)
    print(f"Epoch {epoch+1}, Accuracy: {acc:.4f}")


Initial accuracy: 0.9883
Epoch 1, Accuracy: 0.9871
Epoch 2, Accuracy: 0.9882
Epoch 3, Accuracy: 0.9896


In [12]:
# save model
torch.save(net.state_dict(), 'mnist_model_cnn.pth')
print("Model saved as mnist_model.pth")

Model saved as mnist_model.pth


## Export Model with OpenVINO

In [13]:
pip install openvino==2024.6.0

Collecting openvino==2024.6.0
  Downloading openvino-2024.6.0-17404-cp311-cp311-manylinux2014_x86_64.whl.metadata (8.3 kB)
Collecting openvino-telemetry>=2023.2.1 (from openvino==2024.6.0)
  Downloading openvino_telemetry-2025.2.0-py3-none-any.whl.metadata (2.3 kB)
Downloading openvino-2024.6.0-17404-cp311-cp311-manylinux2014_x86_64.whl (44.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.7/44.7 MB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading openvino_telemetry-2025.2.0-py3-none-any.whl (25 kB)
Installing collected packages: openvino-telemetry, openvino
Successfully installed openvino-2024.6.0 openvino-telemetry-2025.2.0


In [14]:
import openvino as ov

In [16]:
ov_model = ov.convert_model(net,input=[1,1, 28, 28])

In [17]:
ov.save_model(ov_model, 'mnist_cnn.xml')

## DLA Compile Commands

Example command for the FPGA AI DLA compiler

```

dla_compiler --march $COREDLA_ROOT/example_architectures/AGX7_Performance.arch --network-file ./mnist_3x.xml --foutput-format=open_vino_hetero --o $COREDLA_WORK/demo/mnist_perf.bin --batch-size=1 --fanalyze-performance

dla_compiler --march $COREDLA_ROOT/example_architectures/AGX7_Performance.arch --network-file ./mnist_3x.xml --foutput-format=open_vino_hetero --o $COREDLA_WORK/demo/mnist_stream.bin --batch-size=1 --fanalyze-performance --ffolding-option=0


uio-devices restart
export compiled_model=~/resnet-50-tf/mnist_perf.bin
export imgdir=~/resnet-50-tf/sample_images
export archfile=~/resnet-50-tf/AGX7_Performance.arch
cd ~/app
export COREDLA_ROOT=/home/root/app
./dla_benchmark -b=1 -cm $compiled_model -d=HETERO:FPGA,CPU -i $imgdir -niter=5 -plugins_xml_file ./plugins.xml -arch_file $archfile -api=async -perf_est  -nireq=4 -bgr


dla_compiler --fanalyze-area --march $COREDLA_ROOT/example_architectures/AGX7_Performance.arch

dla_compiler --gen-arch --mmax-resources=427200,2713,1518 --gen-min-sb=2048 --network-file ./mnist_3x.xml --march=$COREDLA_ROOT/example_architectures/AGX7_Performance.arch --mmax-resources-alm-util=75 --fassumed-fmax-core=300 --network-weightings=1 2

dla_compiler --fanalyze-area --march ./generated_arch.arch

dla_compiler --gen-arch --mmax-resources=427200,2713,1518 --gen-min-sb=2048 --network-file ./mnist_3x.xml --march=$COREDLA_ROOT/example_architectures/AGX7_Performance.arch --mmax-resources-alm-util=75 --fassumed-fmax-core=300 --network-weightings=1 --mtarget-fps=100.0

dla_compiler --fanalyze-area --march ./generated_arch.arch
```