In [None]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Change directory to a specific folder in Google Drive
os.chdir('/content/drive/MyDrive/FPGA_Acce_pytorch')

# Verify current working directory
print("Current working directory:", os.getcwd())

Mounted at /content/drive
Current working directory: /content/drive/MyDrive/FPGA_Acce_pytorch


In [None]:
!pwd
!ls

/content/drive/MyDrive/FPGA_Acce_pytorch
 biasValues.h			    'w_b (1)'
 data				     w_b_test
 mnist_simple_nn.pth		     weightsandbiases_final_128.txt
 mnist_simple_nn_with_accuracy.pth   weightsandbiases_final_2.txt
 sigContent.mif			     weightsandbiases_final_normalized.txt
 testData			     weightsandbiases_final.txt
 w_b				     weightValues.h


In [None]:
#Sigmoid Generation

# -*- coding: utf-8 -*-
"""
Created on Mon Oct  6 19:23:47 2025

@author: Rhodz
"""


import math

def genSigContent(dataWidth,sigmoidSize,weightIntSize,inputIntSize):
    f = open("sigContent.mif","w")
    fractBits = sigmoidSize-(weightIntSize+inputIntSize)
    if fractBits < 0: #Sigmoid size is smaller the integer part of the MAC operation
        fractBits = 0
    x = -2**(weightIntSize+inputIntSize-1)#Smallest input going to the Sigmoid LUT from the neuron
    for i in range(0,2**sigmoidSize):
        y = sigmoid(x)
        z = DtoB(y,dataWidth,dataWidth-inputIntSize)
        print(y,z)
        f.write(z+'\n')
        x=x+(2**-fractBits)
    f.close()

def DtoB(num,dataWidth,fracBits):#funtion for converting into two's complement format
    if num >= 0:
        num = num * (2**fracBits)
        num = int(num)
        e = bin(num)[2:]
    else:
        num = -num
        num = num * (2**fracBits)#number of fractional bits
        num = int(num)
        if num == 0:
            d = 0
        else:
            d = 2**dataWidth - num
        e = bin(d)[2:]
    return e


def sigmoid(x):
    try:
        return 1 / (1+math.exp(-x))#for x less than -1023 will give value error
    except:
        return 0


if __name__ == "__main__":
    genSigContent(dataWidth=16,sigmoidSize=10,weightIntSize=4,inputIntSize=1)
#output sigmoid.mif is loaded for the sigmoid ROM

1.12535162055095e-07 0
1.1610741114742279e-07 0
1.1979305557162662e-07 0
1.2359569488020953e-07 0
1.275190428876251e-07 0
1.3156693129733423e-07 0
1.3574331344399593e-07 0
1.4005226815444813e-07 0
1.4449800373124837e-07 0
1.4908486206266502e-07 0
1.5381732286313315e-07 0
1.587000080483163e-07 0
1.63737686249047e-07 0
1.6893527746855403e-07 0
1.7429785788752586e-07 0
1.7983066482170178e-07 0
1.8553910183683314e-07 0
1.914287440260102e-07 0
1.9750534345450816e-07 0
2.0377483477747004e-07 0
2.1024334103591293e-07 0
2.1691717963671816e-07 0
2.238028685224453e-07 0
2.3090713253699577e-07 0
2.38236909993343e-07 0
2.4579935944974304e-07 0
2.5360186670104374e-07 0
2.6165205199192004e-07 0
2.6995777745908016e-07 0
2.7852715480971087e-07 0
2.8736855324366057e-07 0
2.964906076270974e-07 0
3.059022269256247e-07 0
3.156126029050898e-07 0
3.256312191085833e-07 0
3.359678601183963e-07 0
3.466326211119807e-07 0
3.576359177212449e-07 0
3.6898849620481354e-07 0
3.8070144394318623e-07 0
3.927862002670442

In [None]:
# simple_mnist_nn.py
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),                     # Convert images to PyTorch tensors
    transforms.Normalize((0.1307,), (0.3081,)) # Normalize using MNIST mean and std
])

train_dataset = datasets.MNIST(root="./data", train=True, transform=transform, download=True)
test_dataset  = datasets.MNIST(root="./data", train=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=1000, shuffle=False)

In [None]:
print(device)

cuda


In [None]:
# 2. Define a simple neural network
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 10)
       # self.fc3 = nn.Linear(30, 10)
        #self.fc4 = nn.Linear(10, 10)

        # Xavier init (like Keras)
        for layer in [self.fc1, self.fc2]:
            nn.init.xavier_uniform_(layer.weight)
            nn.init.zeros_(layer.bias)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = torch.sigmoid(self.fc1(x))
        #x = torch.sigmoid(self.fc2(x))
        #x = torch.sigmoid(self.fc3(x))
        x = self.fc2(x)   # raw logits for CrossEntropyLoss
        return x

model = SimpleNN().to(device)

# 3. Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 4. Training loop with accuracy tracking
EPOCHS = 5

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for data, target in train_loader:
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # compute training accuracy
        preds = output.argmax(dim=1)
        correct += preds.eq(target).sum().item()
        total += target.size(0)

    train_acc = 100. * correct / total
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {avg_loss:.4f} - Train Acc: {train_acc:.2f}%")

# 5. Test the model
model.eval()
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()

test_acc = 100. * correct / len(test_loader.dataset)
print(f"\nFinal Test Accuracy: {test_acc:.2f}%")

# 6. Save model
torch.save(model.state_dict(), "mnist_simple_nn_with_accuracy.pth")
print("Model saved as mnist_simple_nn_with_accuracy.pth")

Epoch 1/5 - Loss: 0.3449 - Train Acc: 90.70%
Epoch 2/5 - Loss: 0.1711 - Train Acc: 95.13%
Epoch 3/5 - Loss: 0.1257 - Train Acc: 96.47%
Epoch 4/5 - Loss: 0.0978 - Train Acc: 97.29%
Epoch 5/5 - Loss: 0.0781 - Train Acc: 97.86%

Final Test Accuracy: 97.32%
Model saved as mnist_simple_nn_with_accuracy.pth


In [None]:
# 5. Test the model
model.eval()
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()

accuracy = 100. * correct / len(test_loader.dataset)
print(f"Test Accuracy: {accuracy:.2f}%")

# 6. Save the trained model
torch.save(model.state_dict(), "mnist_simple_nn.pth")
print("Model saved as mnist_simple_nn.pth")

Test Accuracy: 97.32%
Model saved as mnist_simple_nn.pth


In [None]:
#Extract weights and biases
import json
import torch
import torch.nn as nn
weightList = []
biasList = []

# Extract weights and biases from all Linear layers
for layer in model.children():
    if isinstance(layer, nn.Linear):
        weights = layer.weight.detach().cpu().numpy().tolist()  # shape [out_features, in_features]
        biases  = layer.bias.detach().cpu().numpy().reshape(-1, 1).tolist()  # shape [out_features, 1]

        # Transpose weights to match Keras-style orientation
        weightList.append(list(map(list, zip(*weights))))
        biasList.append(biases)

# Combine into dictionary
data = {"weights": weightList, "biases": biasList}

# Save to text file for FPGA post-processing
with open("weightsandbiases_final_128_normalized.txt", "w") as f:
    json.dump(data, f)

print("✅ Weights and biases exported to weightsandbiases_final_128_normalized.txt")

✅ Weights and biases exported to weightsandbiases_final_128_normalized.txt


In [None]:
img, label = train_dataset[0]
print(f"Image shape: {img.shape}, Label: {label}")

Image shape: torch.Size([1, 28, 28]), Label: 5


In [None]:
#Weights and Biases conversion to .if file
import json

dataWidth = 16
dataIntWidth = 1
weightIntWidth = 4
inputFile = "weightsandbiases_final_128.txt" # output of the previous cell
dataFracWidth = dataWidth-dataIntWidth
weightFracWidth = dataWidth-weightIntWidth
biasIntWidth = dataIntWidth+weightIntWidth
biasFracWidth = dataWidth-biasIntWidth
outputPath = "./w_b/" # location of weights and biases files. Make this folder manually
headerPath = "./"

def DtoB(num,dataWidth,fracBits):						#funtion for converting into two's complement format
	if num >= 0:
		num = num * (2**fracBits)
		num = int(num)
		d = num
	else:
		num = -num
		num = num * (2**fracBits)		#number of fractional bits
		num = int(num)
		if num == 0:
			d = 0
		else:
			d = 2**dataWidth - num
	return d

def genWaitAndBias(dataWidth,weightFracWidth,biasFracWidth,inputFile):
	weightIntWidth = dataWidth-weightFracWidth
	biasIntWidth = dataWidth-biasFracWidth
	myDataFile = open(inputFile,"r")
	weightHeaderFile = open(headerPath+"weightValues.h","w")
	myData = myDataFile.read()
	myDict = json.loads(myData)
	myWeights = myDict['weights']
	myBiases = myDict['biases']
	weightHeaderFile.write("int weightValues[]={")
	for layer in range(0,len(myWeights)):
		for neuron in range(0,len(myWeights[layer])):
			fi = 'w_'+str(layer+1)+'_'+str(neuron)+'.mif'
			f = open(outputPath+fi,'w')
			for weight in range(0,len(myWeights[layer][neuron])):
				if 'e' in str(myWeights[layer][neuron][weight]):
					p = '0'
				else:
					if myWeights[layer][neuron][weight] > 2**(weightIntWidth-1):
						myWeights[layer][neuron][weight] = 2**(weightIntWidth-1)-2**(-weightFracWidth)
					elif myWeights[layer][neuron][weight] < -2**(weightIntWidth-1):
						myWeights[layer][neuron][weight] = -2**(weightIntWidth-1)
					wInDec = DtoB(myWeights[layer][neuron][weight],dataWidth,weightFracWidth)
					p = bin(wInDec)[2:]
				f.write(p+'\n')
				weightHeaderFile.write(str(wInDec)+',')
			f.close()
	weightHeaderFile.write('0};\n')
	weightHeaderFile.close()

	biasHeaderFile = open(headerPath+"biasValues.h","w")
	biasHeaderFile.write("int biasValues[]={")
	for layer in range(0,len(myBiases)):
		for neuron in range(0,len(myBiases[layer])):
			fi = 'b_'+str(layer+1)+'_'+str(neuron)+'.mif'
			p = myBiases[layer][neuron][0]
			if 'e' in str(p): #To remove very small values with exponents
				res = '0'
			else:
				if p > 2**(biasIntWidth-1):
					p = 2**(biasIntWidth-1)-2**(-biasFracWidth)
				elif p < -2**(biasIntWidth-1):
					p = -2**(biasIntWidth-1)
				bInDec = DtoB(p,dataWidth,biasFracWidth)
				res = bin(bInDec)[2:]
			f = open(outputPath+fi,'w')
			f.write(res)
			biasHeaderFile.write(str(bInDec)+',')
			f.close()
	biasHeaderFile.write('0};\n')
	biasHeaderFile.close()

if __name__ == "__main__":
	genWaitAndBias(dataWidth,weightFracWidth,biasFracWidth,inputFile)

In [None]:
#Generating Test Data
import sys

outputPath = "./testData/" #Manually create this folder
headerFilePath = "./testData/"

try:
    import cPickle as pickle
except:
    import pickle
import gzip
import numpy as np

dataWidth = 16                    #specify the number of bits in test data
IntSize = 1 #Number of bits of integer portion including sign bit

try:
    testDataNum = int(sys.argv[1])
except:
    testDataNum = 3

# PyTorch default MNIST normalization constants
MNIST_MEAN = 0.1307
MNIST_STD = 0.3081

def DtoB(num,dataWidth,fracBits):                        #funtion for converting into two's complement format
    if num >= 0:
        num = num * (2**fracBits)
        d = int(num)
    else:
        num = -num
        num = num * (2**fracBits)        #number of fractional bits
        num = int(num)
        if num == 0:
            d = 0
        else:
            d = 2**dataWidth - num
    return d


def load_data():
    f = gzip.open('mnist.pkl.gz', 'rb')         #change this location to the resiprositry where MNIST dataset sits
    try:
        training_data, validation_data, test_data = pickle.load(f,encoding='latin1')
    except:
        training_data, validation_data, test_data = pickle.load(f)
    f.close()
    return (training_data, validation_data, test_data)

def genTestData(dataWidth,IntSize,testDataNum):
    dataHeaderFile = open(headerFilePath+"dataValues.h","w")
    dataHeaderFile.write("int dataValues[]={")
    tr_d, va_d, te_d = load_data()


    # Normalize like PyTorch
    normalized_inputs = (te_d[0] - MNIST_MEAN) / MNIST_STD
    test_inputs = [np.reshape(x, (1, 784)) for x in normalized_inputs]

    #test_inputs = [np.reshape(x, (1, 784)) for x in te_d[0]]
    x = len(test_inputs[0][0])
    d=dataWidth-IntSize
    count = 0
    fileName = 'test_data.txt'
    f = open(outputPath+fileName,'w')
    fileName = 'visual_data'+str(te_d[1][testDataNum])+'.txt'
    g = open(outputPath+fileName,'w')
    k = open('testData.txt','w')
    for i in range(0,x):
        k.write(str(test_inputs[testDataNum][0][i])+',')
        dInDec = DtoB(test_inputs[testDataNum][0][i],dataWidth,d)
        myData = bin(dInDec)[2:]
        dataHeaderFile.write(str(dInDec)+',')
        f.write(myData+'\n')
        if test_inputs[testDataNum][0][i]>0:
            g.write(str(1)+' ')
        else:
            g.write(str(0)+' ')
        count += 1
        if count%28 == 0:
            g.write('\n')
    k.close()
    g.close()
    f.close()
    dataHeaderFile.write('0};\n')
    dataHeaderFile.write('int result='+str(te_d[1][testDataNum])+';\n')
    dataHeaderFile.close()


def genAllTestData(dataWidth,IntSize):
    tr_d, va_d, te_d = load_data()

    # Normalize like PyTorch
    normalized_inputs = (te_d[0] - MNIST_MEAN) / MNIST_STD
    test_inputs = [np.reshape(x, (1, 784)) for x in normalized_inputs]


   # test_inputs = [np.reshape(x, (1, 784)) for x in te_d[0]]
    x = len(test_inputs[0][0])
    d=dataWidth-IntSize
    for i in range(len(test_inputs)):
        if i < 10:
            ext = "000"+str(i)
        elif i < 100:
            ext = "00"+str(i)
        elif i < 1000:
            ext = "0"+str(i)
        else:
            ext = str(i)
        fileName = 'test_data_'+ext+'.txt'
        f = open(outputPath+fileName,'w')
        for j in range(0,x):
            dInDec = DtoB(test_inputs[i][0][j],dataWidth,d)
            myData = bin(dInDec)[2:]
            f.write(myData+'\n')
        f.write(bin(DtoB((te_d[1][i]),dataWidth,0))[2:])
        f.close()



if __name__ == "__main__":
    #genTestData(dataWidth,IntSize,testDataNum=1)
    genAllTestData(dataWidth,IntSize)

In [None]:
train_dataset[1]

(tensor([[[-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
           -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
           -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
           -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
           -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
           -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
           -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
           -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
           -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
           -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242, -0.4242,
           -0.4242, -0.4242, -0.424

In [None]:
# ===============================================================
# Generate Verilog neuron instantiations automatically
# Example: python generate_neurons.py > neurons_inst.vh
# ===============================================================

num_neurons = 10  # change this to any number of neurons you need
layer_num = 2       # layer index (used in w_<layer>_<neuron>.mif)
indent = " " * 8    # indentation for readability

for i in range(num_neurons):
    print(f"neuron #(")
    print(f"{indent}.numWeight(numWeight),")
    print(f"{indent}.layerNo(layerNum),")
    print(f"{indent}.neuronNo({i}),")
    print(f"{indent}.dataWidth(dataWidth),")
    print(f"{indent}.sigmoidSize(sigmoidSize),")
    print(f"{indent}.weightIntWidth(weightIntWidth),")
    print(f"{indent}.actType(actType),")
    print(f"{indent}.weightFile(\"w_{layer_num}_{i}.mif\"),")
    print(f"{indent}.biasFile(\"b_{layer_num}_{i}.mif\")")
    print(f") n_{i}(")
    print(f"{indent}.clk(clk),")
    print(f"{indent}.rst(rst),")
    print(f"{indent}.myinput(x_in),")
    print(f"{indent}.weightValid(weightValid),")
    print(f"{indent}.biasValid(biasValid),")
    print(f"{indent}.weightValue(weightValue),")
    print(f"{indent}.biasValue(biasValue),")
    print(f"{indent}.config_layer_num(config_layer_num),")
    print(f"{indent}.config_neuron_num(config_neuron_num),")
    print(f"{indent}.myinputValid(x_valid),")
    print(f"{indent}.out(x_out[{i}*dataWidth+:dataWidth]),")
    print(f"{indent}.outvalid(o_valid[{i}])")
    print(f");")

neuron #(
        .numWeight(numWeight),
        .layerNo(layerNum),
        .neuronNo(0),
        .dataWidth(dataWidth),
        .sigmoidSize(sigmoidSize),
        .weightIntWidth(weightIntWidth),
        .actType(actType),
        .weightFile("w_2_0.mif"),
        .biasFile("b_2_0.mif")
) n_0(
        .clk(clk),
        .rst(rst),
        .myinput(x_in),
        .weightValid(weightValid),
        .biasValid(biasValid),
        .weightValue(weightValue),
        .biasValue(biasValue),
        .config_layer_num(config_layer_num),
        .config_neuron_num(config_neuron_num),
        .myinputValid(x_valid),
        .out(x_out[0*dataWidth+:dataWidth]),
        .outvalid(o_valid[0])
);
neuron #(
        .numWeight(numWeight),
        .layerNo(layerNum),
        .neuronNo(1),
        .dataWidth(dataWidth),
        .sigmoidSize(sigmoidSize),
        .weightIntWidth(weightIntWidth),
        .actType(actType),
        .weightFile("w_2_1.mif"),
        .biasFile("b_2_1.mif")
) n_1(
        .c