In [1]:
from pynq import Overlay, PL
import pynq

PL.reset()
ol = Overlay("CS411_team4_step4.bit")

bram_sp_arr = ol.axi_bram_ctrl_0.mmio.array
bram_a_arr = ol.axi_bram_ctrl_1.mmio.array
bram_w_arr = ol.axi_bram_ctrl_2.mmio.array
bram_o_arr = ol.axi_bram_ctrl_3.mmio.array

In [2]:
import numpy as np
def matmul_OS (mat1: np.array, mat2: np.array):
    assert(mat1.shape[1] == mat2.shape[0])
    M = 8
    N = 8
    K = 8
    
    mat1_uint = mat1.astype(np.uint32)
    mat2_uint = mat2.astype(np.uint32)
    
    mat1_pad = np.zeros((M, K)).astype(np.uint32)
    mat2_pad = np.zeros((K, N)).astype(np.uint32)
    mat1_pad[0:mat1.shape[0], 0:mat1.shape[1]] = mat1_uint
    mat2_pad[0:mat2.shape[0], 0:mat2.shape[1]] = mat2_uint


    # load data to BRAM 
    bram_a_arr[0:M * K] = mat1_pad.flatten()
    bram_w_arr[0:K * N] = mat2_pad.flatten()
    
    #=============== HW RUNNIG ==================#
    # set Special Memory to specify M,K,N and mode (1: OS)    
    bram_sp_arr[1] = 1
    bram_sp_arr[2] = M
    bram_sp_arr[3] = K
    bram_sp_arr[4] = N
    # start (set sp(addr0) = 1)
    bram_sp_arr[0] = 1
    #while until sp(addr0) => 0 or sp(addr100) =>1 
    while(bram_sp_arr[25] != 1):
        pass
    # get data from BARM_O
    bram_sp_arr[25] = 0
    
    #=============== HW END =====================#


    return bram_o_arr[0:M * N].astype(np.int32).reshape(M, N)[0:mat1.shape[0], 0:mat2.shape[1]]
    

def matmul_WS (mat1: np.array, mat2: np.array):
    assert(mat1.shape[1] == mat2.shape[0])
    M = 8
    N = 8
    K = 8
    
    mat1_uint = mat1.astype(np.uint32)
    mat2_uint = mat2.astype(np.uint32)
    
    mat1_pad = np.zeros((M, K)).astype(np.uint32)
    mat2_pad = np.zeros((K, N)).astype(np.uint32)
    mat1_pad[0:mat1.shape[0], 0:mat1.shape[1]] = mat1_uint
    mat2_pad[0:mat2.shape[0], 0:mat2.shape[1]] = mat2_uint

    # load data to BRAM 
    bram_a_arr[0:M * K] = mat1_pad.flatten()
    bram_w_arr[0:K * N] = mat2_pad.flatten()
       
    #=============== HW RUNNIG ==================#
    # set Special Memory to specify M,K,N and mode (1: OS)    
    bram_sp_arr[1] = 0
    bram_sp_arr[2] = M
    bram_sp_arr[3] = K
    bram_sp_arr[4] = N
    # start (set sp(addr0) = 1)
    bram_sp_arr[0] = 1
    #while until sp(addr0) => 0 or sp(addr100) =>1 
    while(bram_sp_arr[25] != 1):
        pass
    # get data from BARM_O
    bram_sp_arr[25] = 0
    
    #=============== HW END =====================#
    
    
    return bram_o_arr[0:M * N].astype(np.int32).reshape(M, N)[0:mat1.shape[0], 0:mat2.shape[1]]

In [3]:
import random

VAL_MIN, VAL_MAX = -(1<<12), 1<<12

def test_OS():
    score = 0
    num_test = 0
    
    for _ in range (100):
        num_test = num_test +1
        
        M = random.randint(1,8)
        K = random.randint(1,8)
        N = random.randint(1,8)
        
        mat1 = np.random.randint(VAL_MIN, VAL_MAX, size=(K,M)) #np.ones((K,M))#
        mat2 = np.random.randint(VAL_MIN, VAL_MAX, size=(M,N))#np.ones((M,N))#
        
        FPGA = matmul_OS(mat1, mat2)
        CPU = mat1@mat2
        
        if(np.equal(FPGA, CPU).all()):
            score = score + 1
            
    return score/num_test*100

def test_WS():
    score = 0
    num_test = 0
    
    for _ in range (100):
        num_test = num_test +1
        
        M = random.randint(1,8)
        K = random.randint(1,8)
        N = random.randint(1,8)
        
        mat1 = np.random.randint(VAL_MIN, VAL_MAX, size=(K,M)) #np.ones((K,M))#
        mat2 = np.random.randint(VAL_MIN, VAL_MAX, size=(M,N))#np.ones((M,N))#
        
        FPGA = matmul_WS(mat1, mat2)
        CPU = mat1@mat2
        
        if(np.equal(FPGA, CPU).all()):
            score = score + 1
            
    return score/num_test*100

In [4]:
def test_STEP1():
    ws_res = test_WS()
    os_res = test_OS()
    
    print(f"WS: {ws_res} / 100, OS: {os_res} / 100")
    print(f"SCORE : {(ws_res + os_res) * 0.5} / 100")

In [5]:
test_STEP1()

WS: 100.0 / 100, OS: 100.0 / 100
SCORE : 100.0 / 100
