In [1]:
!kaggle datasets download -d crawford/emnist

Dataset URL: https://www.kaggle.com/datasets/crawford/emnist
License(s): CC0-1.0
Downloading emnist.zip to /kaggle/working
 99%|██████████████████████████████████████▋| 1.23G/1.24G [00:07<00:00, 181MB/s]
100%|███████████████████████████████████████| 1.24G/1.24G [00:07<00:00, 178MB/s]


In [2]:
!pip install torch torchvision openvino

Collecting openvino
  Downloading openvino-2024.4.0-16579-cp310-cp310-manylinux2014_x86_64.whl.metadata (8.3 kB)
Collecting openvino-telemetry>=2023.2.1 (from openvino)
  Downloading openvino_telemetry-2024.1.0-py3-none-any.whl.metadata (2.3 kB)
Downloading openvino-2024.4.0-16579-cp310-cp310-manylinux2014_x86_64.whl (42.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[?25hDownloading openvino_telemetry-2024.1.0-py3-none-any.whl (23 kB)
Installing collected packages: openvino-telemetry, openvino
Successfully installed openvino-2024.4.0 openvino-telemetry-2024.1.0


In [None]:
!pip install openvino-dev[ONNX]

Collecting openvino-dev[ONNX]
  Downloading openvino_dev-2024.4.0-16579-py3-none-any.whl.metadata (16 kB)
Collecting networkx<=3.1.0 (from openvino-dev[ONNX])
  Downloading networkx-3.1-py3-none-any.whl.metadata (5.3 kB)
Collecting fastjsonschema<2.18,>=2.15.1 (from openvino-dev[ONNX])
  Downloading fastjsonschema-2.17.1-py3-none-any.whl.metadata (2.0 kB)
Collecting onnx<=1.16.0,>=1.8.1 (from openvino-dev[ONNX])
  Downloading onnx-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Downloading fastjsonschema-2.17.1-py3-none-any.whl (23 kB)
Downloading networkx-3.1-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading onnx-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m87.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m


In [11]:
import tensorflow as tf
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time
import os
import zipfile
from openvino.runtime import Core
import numpy as np

# Ensure CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Unzip the EMNIST dataset
with zipfile.ZipFile('/kaggle/working/emnist.zip', 'r') as zip_ref:
    zip_ref.extractall('/kaggle/working/')

# Define the CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 62)  # 62 classes in EMNIST balanced

    def forward(self, x):
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = nn.functional.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = nn.functional.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        return nn.functional.log_softmax(x, dim=1)

# Load and preprocess the EMNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.EMNIST(root='/kaggle/working/emnist', split='balanced', train=True, download=True, transform=transform)
test_dataset = datasets.EMNIST(root='/kaggle/working/emnist', split='balanced', train=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Training function
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = nn.functional.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
                  f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

# Testing function
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += nn.functional.nll_loss(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n')
    return accuracy

# Train on GPU
model_gpu = SimpleCNN().to(device)
optimizer_gpu = optim.Adam(model_gpu.parameters())

start_time = time.time()
for epoch in range(1, 3):  # Train for 2 epochs
    train(model_gpu, device, train_loader, optimizer_gpu, epoch)
gpu_train_time = time.time() - start_time

gpu_accuracy = test(model_gpu, device, test_loader)

# Convert to ONNX
dummy_input = torch.randn(1000,1,28,28).to(device)
torch.onnx.export(model_gpu, dummy_input, "emnist_model.onnx")

# Optimize with OpenVINO
core = Core()
ov_model = core.read_model("emnist_model.onnx")
compiled_model = core.compile_model(ov_model, "CPU")

# Inference function for OpenVINO model
def infer_openvino(model, data):
    # Reshape the input data to match the expected input shape of the model
    input_tensor = data.numpy()
    # Get the expected input shape from the model
    input_shape = model.input(0).get_shape()  
    # Reshape the input tensor to match the expected shape, padding with zeros if necessary
    input_tensor = np.pad(input_tensor, ((0, input_shape[0] - input_tensor.shape[0]), (0, 0), (0, 0), (0, 0)), 'constant')
    result = model(input_tensor)[0]
    return torch.from_numpy(result)[:data.shape[0]]  # Only take the relevant part of the output

# Test OpenVINO model
start_time = time.time()
correct = 0
for data, target in test_loader:
    output = infer_openvino(compiled_model, data)  # Call the modified infer_openvino function
    pred = output.argmax(dim=1, keepdim=True)
    correct += pred.eq(target.view_as(pred)).sum().item()

cpu_inference_time = time.time() - start_time
cpu_accuracy = 100. * correct / len(test_dataset) # Divide by len(test_dataset) not len(test_loader.dataset)

# GPU Inference Time and FPS Measurement
gpu_inference_frames = 0
start_time = time.time()
with torch.no_grad():
    for data, target in test_loader:
        data = data.to(device)
        output = model_gpu(data)
        gpu_inference_frames += data.size(0)  # Increment frames processed by batch size
gpu_inference_time = time.time() - start_time
gpu_inference_fps = gpu_inference_frames / gpu_inference_time if gpu_inference_time > 0 else 0

# CPU (OpenVINO) Inference FPS Measurement
cpu_inference_frames = 0
start_time = time.time()
for data, target in test_loader:
    output = infer_openvino(compiled_model, data)  # Call the modified infer_openvino function
    cpu_inference_frames += data.size(0)  # Increment frames processed by batch size
cpu_inference_time = time.time() - start_time
cpu_inference_fps = cpu_inference_frames / cpu_inference_time if cpu_inference_time > 0 else 0


# Print the results
print(f"GPU Training Time: {gpu_train_time:.2f} seconds")
print(f"GPU Inference Accuracy: {gpu_accuracy:.2f}%")
print(f"GPU Inference Time: {gpu_inference_time:.2f} seconds")
print(f"GPU Inference FPS: {gpu_inference_fps:.2f}")
print(f"CPU (OpenVINO) Inference Time: {cpu_inference_time:.2f} seconds")
print(f"CPU (OpenVINO) Inference Accuracy: {cpu_accuracy:.2f}%")
print(f"CPU (OpenVINO) Inference FPS: {cpu_inference_fps:.2f}")



Test set: Average loss: 0.4387, Accuracy: 16049/18800 (85.37%)

GPU Training Time: 52.58 seconds
GPU Inference Accuracy: 85.37%
GPU Inference Time: 3.78 seconds
GPU Inference FPS: 4967.58
CPU (OpenVINO) Inference Time: 5.88 seconds
CPU (OpenVINO) Inference Accuracy: 85.37%
CPU (OpenVINO) Inference FPS: 3195.59
