# Count DNA Nucleotides accelerator
Count the frequency of adenine (A), cytosine(C), guanine(G) and thymine(T) nucleotides in the DNA reference string of length 1024 using (1) software and (2) hardware implementation and compare their timing performance


## **Count DNA using software implementation

#### Load DNA reference string

In [1]:
fname="dnaref1024.txt"
f = open(fname, 'r')
dnaref = f.readline().strip()

#### Copy DNA string to numpy array

In [2]:
import numpy as np
dna=np.fromstring(dnaref, dtype=np.uint8)
n=len(dna)

#### Verify DNA string length

In [3]:
print(n)

1024


#### Get the frequency count of A,C,G,T nucleotides

In [4]:
import time
start_time = time.time()
sw_a = 0
sw_c = 0
sw_g = 0
sw_t = 0
for i in dna:
    if i == 65:
        sw_a +=1
    elif i == 67:
        sw_c +=1
    elif i==71:
        sw_g +=1
    elif i == 84:
        sw_t +=1
stop_time = time.time()
sw_exec_time = stop_time - start_time
print('Software FIR execution time: ',sw_exec_time)
print("Frequency of A = {:,}".format(sw_a))
print("Frequency of C = {:,}".format(sw_c))
print("Frequency of G = {:,}".format(sw_g))
print("Frequency of T = {:,}".format(sw_t))
print("Total count = {:,}".format(sw_a+sw_c+sw_g+sw_t))

Software FIR execution time:  0.08093810081481934
Frequency of A = 258
Frequency of C = 264
Frequency of G = 237
Frequency of T = 265
Total count = 1,024


## **Hardware count DNA implementation

In [5]:
from pynq import Overlay
import pynq.lib.dma

# Load the overlay
overlay = Overlay('/home/xilinx/pynq/overlays/countdna/countdna.bit')

In [6]:
# Load the IP
dma = overlay.counting.dma
cnt=overlay.counting.countdna

In [7]:
# check initial value
print(cnt.read(0x10)) # frequency count of A
print(cnt.read(0x18)) # frequency count of C
print(cnt.read(0x20)) # frequency count of G
print(cnt.read(0x28)) # frequency count of T


0
0
0
0


In [8]:
from pynq import Xlnk
import numpy as np

# Allocate contiguious buffer for DMA transfer
xlnk = Xlnk()
in_buffer = xlnk.cma_array(shape=(n,), dtype=np.uint8)

# check buffer status
xlnk.cma_stats() 

{'Buffer Count': 1,
 'CMA Memory Available': 133910528,
 'CMA Memory Usage': 1024}

In [9]:
# Copy the DNA string to the in_buffer
np.copyto(in_buffer,dna)

# Trigger the DMA transfer and wait for the result
import time
start_time = time.time()
dma.sendchannel.start()
dma.sendchannel.transfer(in_buffer)
cnt.write(0x00,0x01)
dma.sendchannel.wait()
cnt.write(0x00,0)
stop_time = time.time()
hw_exec_time = stop_time-start_time
print('Hardware FIR execution time: ',hw_exec_time)
print('Hardware acceleration factor: ',sw_exec_time / hw_exec_time)
print("Frequency of A = {:,}".format(cnt.read(0x10)))
print("Frequency of C = {:,}".format(cnt.read(0x18)))
print("Frequency of G = {:,}".format(cnt.read(0x20)))
print("Frequency of T = {:,}".format(cnt.read(0x28)))
print("Total count = {:,}".format(cnt.read(0x10)+cnt.read(0x18)+cnt.read(0x20)+cnt.read(0x28)))


Hardware FIR execution time:  0.0022177696228027344
Hardware acceleration factor:  36.49526983444421
Frequency of A = 258
Frequency of C = 264
Frequency of G = 237
Frequency of T = 265
Total count = 1,024


In [10]:
# free up buffer
in_buffer.freebuffer()
xlnk.xlnk_reset()
xlnk.cma_stats()

{'Buffer Count': 0, 'CMA Memory Available': 133910528, 'CMA Memory Usage': 0}

## Driver for CountDNA accelerator

To hide the details of the DMA from the programmer, and make the accelerator easier to use, we create a driver for it. Below is the driver that will be associated with the counting hierarchy created in Vivado, consisting of the CountDNA IP and the DMA.


In [11]:
from pynq import DefaultHierarchy

class CountDriver(DefaultHierarchy):
    def __init__(self, description):
        super().__init__(description)

    def count(self, data):
        with xlnk.cma_array(shape=(len(data),), dtype=np.uint8) as in_buffer:
            np.copyto(in_buffer,data)
            self.dma.sendchannel.start()
            self.dma.sendchannel.transfer(in_buffer)
            self.countdna.write(0x00,0x01)
            self.dma.sendchannel.wait()
            self.countdna.write(0x00,0)
            resultA = self.countdna.read(0x10)
            resultC = self.countdna.read(0x18)
            resultG = self.countdna.read(0x20)
            resultT = self.countdna.read(0x28)
            in_buffer.freebuffer()
            xlnk.xlnk_reset()
        return resultA, resultC, resultG, resultT

    
    @staticmethod
    def checkhierarchy(description):
        if 'countdna' in description['ip']\
           and 'dma' in description['ip']:
            return True
        return False

In [12]:
from pynq import Overlay
import pynq.lib.dma

overlay = Overlay('/home/xilinx/pynq/overlays/countdna/countdna.bit')

# Run the hardware solution using driver and measure the runtime
start_time = time.time()
resA, resC, resG, resT = overlay.counting.count(dna)

stop_time = time.time()
print('Hardware FIR execution time (with driver): ',stop_time-start_time)
print('Hardware acceleration factor: ',sw_exec_time / hw_exec_time)
print("Frequency of A = {:,}".format(resA))
print("Frequency of C = {:,}".format(resC))
print("Frequency of G = {:,}".format(resG))
print("Frequency of T = {:,}".format(resT))
print("Total count = {:,}".format(resA+resC+resG+resT))


Hardware FIR execution time (with driver):  0.010500669479370117
Hardware acceleration factor:  36.49526983444421
Frequency of A = 258
Frequency of C = 264
Frequency of G = 237
Frequency of T = 265
Total count = 1,024


In [13]:
from IPython.display import Markdown as md
k = sw_exec_time / hw_exec_time
md("### Result shows that hardware implementation is {:.2f}x faster than software implementation".format(k))

### Result shows that hardware implementation is 36.50x faster than software implementation