In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# -------------------------------
# CONFIG
# -------------------------------
DATA_DIR = "C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/data_fpga"
OUTPUT_H_DIR = "C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights"
os.makedirs(OUTPUT_H_DIR, exist_ok=True)

BATCH_SIZE = 32
EPOCHS = 15
LR = 0.001
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
INT_BITS = 6
FRAC_BITS = 10

# -------------------------------
# CUSTOM DATASET CLASS
# -------------------------------
class NPZDataset(Dataset):
    def __init__(self, npz_path):
        data = np.load(npz_path)
        self.X = data['X'].astype(np.float32) / 255.0  # normalize
        self.y = data['y'].astype(np.int64)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        x = torch.tensor(self.X[idx].transpose(2,0,1))  # HWC -> CHW
        y = torch.tensor(self.y[idx])
        return x, y

# -------------------------------
# LOAD DATA
# -------------------------------
train_dataset = NPZDataset(os.path.join(DATA_DIR, "train.npz"))
test_dataset  = NPZDataset(os.path.join(DATA_DIR, "test.npz"))

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

NUM_CLASSES = len(np.unique(train_dataset.y))

# -------------------------------
# SMALL FPGA-FRIENDLY CNN
# -------------------------------
class FPGA_CNN(nn.Module):
    def __init__(self, num_classes):
        super(FPGA_CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2,2)
        self.fc1 = nn.Linear(32*8*8, 64)
        self.fc2 = nn.Linear(64, num_classes)
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# -------------------------------
# TRAINING LOOP
# -------------------------------
model = FPGA_CNN(NUM_CLASSES).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss()

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * imgs.size(0)
        _, predicted = outputs.max(1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    
    acc = correct / total * 100
    print(f"Epoch [{epoch+1}/{EPOCHS}] Loss: {running_loss/total:.4f} Train Acc: {acc:.2f}%")

# -------------------------------
# TEST ACCURACY
# -------------------------------
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        outputs = model(imgs)
        _, predicted = outputs.max(1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
print(f"Test Accuracy: {correct/total*100:.2f}%")

# -------------------------------
# FIXED-POINT CONVERSION
# -------------------------------
def float_to_fixed(w, int_bits=INT_BITS, frac_bits=FRAC_BITS):
    scale = 2**frac_bits
    w_fp = np.round(w * scale).astype(np.int16)
    return w_fp

def save_hls_array(filename, arr, name):
    arr_flat = arr.flatten()
    with open(filename, "w") as f:
        f.write(f"const short {name}[] = {{")
        f.write(",".join(map(str, arr_flat)))
        f.write("};\n")

# -------------------------------
# EXTRACT AND SAVE WEIGHTS
# -------------------------------
layers = ['conv1','conv2','conv3','fc1','fc2']
for layer_name in layers:
    layer = getattr(model, layer_name)
    weight_fp = float_to_fixed(layer.weight.detach().cpu().numpy())
    bias_fp   = float_to_fixed(layer.bias.detach().cpu().numpy())
    save_hls_array(os.path.join(OUTPUT_H_DIR, f"{layer_name}_weight.h"), weight_fp, f"{layer_name}_weight")
    save_hls_array(os.path.join(OUTPUT_H_DIR, f"{layer_name}_bias.h"), bias_fp, f"{layer_name}_bias")

print("All weights and biases saved as fixed-point .h files in:", OUTPUT_H_DIR)

Epoch [1/15] Loss: 0.2605 Train Acc: 91.94%
Epoch [2/15] Loss: 0.0833 Train Acc: 96.77%
Epoch [3/15] Loss: 0.0621 Train Acc: 97.72%
Epoch [4/15] Loss: 0.0462 Train Acc: 98.05%
Epoch [5/15] Loss: 0.0409 Train Acc: 98.47%
Epoch [6/15] Loss: 0.0337 Train Acc: 98.64%
Epoch [7/15] Loss: 0.0263 Train Acc: 99.05%
Epoch [8/15] Loss: 0.0189 Train Acc: 99.25%
Epoch [9/15] Loss: 0.0218 Train Acc: 99.21%
Epoch [10/15] Loss: 0.0175 Train Acc: 99.37%
Epoch [11/15] Loss: 0.0132 Train Acc: 99.55%
Epoch [12/15] Loss: 0.0078 Train Acc: 99.72%
Epoch [13/15] Loss: 0.0151 Train Acc: 99.46%
Epoch [14/15] Loss: 0.0089 Train Acc: 99.67%
Epoch [15/15] Loss: 0.0046 Train Acc: 99.84%
Test Accuracy: 98.64%
All weights and biases saved as fixed-point .h files in: C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights


In [11]:
import os
import numpy as np
import torch

# -------------------------------
# CONFIG
# -------------------------------
OUTPUT_H_DIR = "C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights"
os.makedirs(OUTPUT_H_DIR, exist_ok=True)

FRAC_BITS = 10  # Q-format fraction bits

# -------------------------------
# SAVE HLS WEIGHTS AS weight_t (ap_fixed) COMPATIBLE
# -------------------------------
def save_hls_array_nd(filename, arr, name, frac_bits=FRAC_BITS):
    """
    Save a numpy array as HLS C-style multi-dimensional array compatible with 'weight_t' (ap_fixed).
    Values are scaled to fixed-point and written as float literals.
    """
    scale = 2 ** frac_bits
    arr_fp = np.round(arr * scale) / scale  # scale and back to float

    # Generate shape string
    shape_str = "".join(f"[{d}]" for d in arr_fp.shape)

    # Recursive function to format nested arrays with float literals
    def format_array(a):
        if a.ndim == 1:
            return "{" + ",".join(f"{x:.6f}" for x in a) + "}"
        else:
            return "{" + ",".join(format_array(x) for x in a) + "}"

    # Write to file using 'weight_t'
    with open(filename, "w") as f:
        f.write(f"const weight_t {name}{shape_str} = {format_array(arr_fp)};\n")

    print(f"Saved '{filename}' as weight_t | scale={scale} | max={arr_fp.max():.6f} | min={arr_fp.min():.6f}")
# -------------------------------
# EXTRACT AND SAVE MODEL WEIGHTS
# -------------------------------
# Example: Replace with your actual PyTorch model
# model = MyCNNModel().to('cpu')  # ensure model is on CPU
# model.load_state_dict(torch.load('model.pth'))

layers = ['conv1','conv2','conv3','fc1','fc2']

for layer_name in layers:
    layer = getattr(model, layer_name)
    weight_arr = layer.weight.detach().cpu().numpy()
    bias_arr   = layer.bias.detach().cpu().numpy()
    
    save_hls_array_nd(os.path.join(OUTPUT_H_DIR, f"{layer_name}_weight.h"), weight_arr, f"{layer_name}_weight")
    save_hls_array_nd(os.path.join(OUTPUT_H_DIR, f"{layer_name}_bias.h"), bias_arr, f"{layer_name}_bias")

print("✅ All weights and biases saved as HLS fixed-point arrays in:", OUTPUT_H_DIR)

Saved 'C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights\conv1_weight.h' as weight_t | scale=1024 | max=0.263672 | min=-0.278320
Saved 'C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights\conv1_bias.h' as weight_t | scale=1024 | max=0.195312 | min=-0.140625
Saved 'C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights\conv2_weight.h' as weight_t | scale=1024 | max=0.378906 | min=-0.397461
Saved 'C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights\conv2_bias.h' as weight_t | scale=1024 | max=0.175781 | min=-0.137695
Saved 'C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights\conv3_weight.h' as weight_t | scale=1024 | max=0.382812 | min=-0.523438
Saved 'C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights\conv3_bias.h' as weight_t | scale=1024 | max=0.070312 | min=-0.123047
Saved 'C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights\fc1_weight.h' as weight_t | scale=1024 | max=0.376953 | min=-0.300781
Saved 'C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights\fc1_bias

In [None]:
def save_hls_array_nd(filename, arr, name):
    """
    Save a numpy array as HLS C-style multi-dimensional fixed-point array.
    Handles conv/fc layers of arbitrary shape.
    """
    # Convert to fixed-point Q6.10
    arr_fp = np.round(arr * (2**FRAC_BITS)).astype(np.int16)
    
    # Determine C-style array shape string
    shape_str = "".join([f"[{d}]" for d in arr_fp.shape])
    
    with open(filename, "w") as f:
        f.write(f"const short {name}{shape_str} = {{")
        
        # Recursive function to write nested arrays
        def write_nested(a):
            if a.ndim == 1:
                f.write("{" + ",".join(map(str, a)) + "}")
            else:
                f.write("{")
                for i, sub in enumerate(a):
                    write_nested(sub)
                    if i != len(a)-1:
                        f.write(",")
                f.write("}")
        
        write_nested(arr_fp)
        f.write("};\n")

In [2]:
import time
from PIL import Image

# -------------------------------
# 1️⃣ SAVE THE MODEL
# -------------------------------
MODEL_PATH = os.path.join(OUTPUT_H_DIR, "fpga_cnn.pth")
torch.save(model.state_dict(), MODEL_PATH)
print("Model saved to:", MODEL_PATH)

Model saved to: C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/hls_weights\fpga_cnn.pth


In [5]:


# -------------------------------
# 2️⃣ LOAD MODEL (optional)
# -------------------------------
model = FPGA_CNN(NUM_CLASSES).to(DEVICE)
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model.eval()

# -------------------------------
# 3️⃣ IMAGE INFERENCE
# -------------------------------
# List of image paths
# List of image paths
IMAGE_PATHS = [
    "C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/cat.png",
    "C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/dog.png",
    "C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/face.jpg",
    "C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/plane.jpg",
    "C:/Users/nvgok/OneDrive/Apps/AISOC/trybest/car.jpg"
]

def preprocess_image(img_path, target_size=(32,32)):
    img = Image.open(img_path).convert('RGB')
    img = img.resize(target_size, Image.LANCZOS)
    img_np = np.array(img, dtype=np.float32)/255.0   # normalize
    img_tensor = torch.tensor(img_np.transpose(2,0,1)).unsqueeze(0)  # CHW + batch
    return img_tensor.to(DEVICE)

# Make sure class_names is the same order as labels used in dataset
class_names = ['face', 'plane', 'car', 'cat', 'dog']
# You already have this from dataset preprocessing step
# class_names = ['class0', 'class1', ...]  # replace with your actual classes

model.eval()
with torch.no_grad():
    for img_path in IMAGE_PATHS:
        img_tensor = preprocess_image(img_path)
        
        start_time = time.time()
        output = model(img_tensor)
        end_time = time.time()
        
        pred_class_idx = output.argmax(1).item()
        pred_class_name = class_names[pred_class_idx]   # map index to name
        pred_score = torch.softmax(output, dim=1)[0, pred_class_idx].item()
        inference_time = (end_time - start_time) * 1000  # ms
        
        print(f"Image: {os.path.basename(img_path)}")
        print(f"  Predicted class: {pred_class_name}")
        print(f"  Confidence: {pred_score:.4f}")
        print(f"  Inference time: {inference_time:.3f} ms\n")

Image: cat.png
  Predicted class: cat
  Confidence: 1.0000
  Inference time: 1.000 ms

Image: dog.png
  Predicted class: dog
  Confidence: 0.9987
  Inference time: 0.999 ms

Image: face.jpg
  Predicted class: face
  Confidence: 1.0000
  Inference time: 0.998 ms

Image: plane.jpg
  Predicted class: plane
  Confidence: 0.9962
  Inference time: 1.000 ms

Image: car.jpg
  Predicted class: car
  Confidence: 0.9994
  Inference time: 0.000 ms

