In [1]:
import sys 
import numpy as np
import math
import numba
from numba import cuda, int64, jit
from numba.cuda import random as rnd
from timeit import default_timer as timer
from scipy.stats import binom, poisson
import pickle as pkl
import os
os.system("ml CUDA")

0

Here we have a little helper function for transforming our binary encoded genomes into a more human readable version.

In [2]:
@jit
def transformAlignment(genotypes):
    N=int(genotypes.shape[0])
    L=int(genotypes.shape[1])
    alignment=[]
    for pos in range(0,L*64,2):
        posInt = pos//64
        posInInt = pos%64
        variant=[]
        for i in range(N):
            if int(genotypes[i,posInt]) & 2**posInInt == 0 and int(genotypes[i,posInt]) & 2**(posInInt+1) == 0:
                 variant.append("A")
            elif int(genotypes[i,posInt]) & 2**posInInt == 0 and int(genotypes[i,posInt]) & 2**(posInInt+1) > 0:
                 variant.append("C")
            elif int(genotypes[i,posInt]) & 2**posInInt > 0 and int(genotypes[i,posInt]) & 2**(posInInt+1) == 0:
                 variant.append("G")
            else:
                 variant.append("T")
        alignment.append(variant)
    return(alignment)

In [3]:
@cuda.jit
def sumF(fitnessarr,F):
    F[0]=0
    for i in range(fitnessarr.shape[0]):
        F[0]+=fitnessarr[i]

Let's see how we can use ints and bit operations to store and manipulate large amounts of data on a GPU. Our genome is made up of letters of a 4 letter alphabet (ACTG). Most efficient storage is using two bits per letter. For mutations we would like to get a transition:transversion ratio of 1:3. 

In [4]:
@cuda.jit
def mutate(rng_states,gen,genotypes,reference,fitnessarr,F,s):
    cellID = cuda.grid(1)
    genoblocks = genotypes.shape[1]/2
    L=64*genoblocks
    N=genotypes.shape[0]
    targetoffset = 0
    if cellID < N:
        #add mutations
        numberMutations=5
        for i in range(numberMutations):
            #for simplicity and demonstration purposes we cluster all mutations in generation gen in the genth int (bitblock) and mutate the first five letters
            posInt = cellID
            posInInt = 2*(5*gen+i)%32
            #this will give a transition:transversion ratio of 1:3 on average
            r=((cellID+i*N/4)%N)/N
            if r < 1/4:
                #transverion (first bit with probability 1/2)
                if r < 1/8:
                    genotypes[cellID,posInt+targetoffset]=genotypes[cellID,posInt+targetoffset] ^ 2**posInInt
                #second bit has to be flipped
                genotypes[cellID,posInt+targetoffset]=genotypes[cellID,posInt+targetoffset] ^ 2**(posInInt+1)
            else:
                #transition (00<->10 or 01<->11)
                genotypes[cellID,posInt+targetoffset]=genotypes[cellID,posInt+targetoffset] ^ 2**posInInt
        #not we are gonna count the mutatations, i.e., mismatches with the reference
        snpCount=0
        for posInt in range(genoblocks):
            intAnd=(genotypes[cellID,posInt+targetoffset] ^ reference[posInt])
            for posInInt in range(0,64,2):
                if intAnd & 2**posInInt > 0 or intAnd & 2**(posInInt+1) > 0:
                    snpCount+=1
        fitnessarr[cellID]=s**snpCount
    cuda.syncthreads()

In [5]:
L=10000
n=64*L

threadsperblock = 20
blocks = 20

N=threadsperblock*blocks
s=0.999
SEED=1
NUMCYCLES=8*N

genotypes = np.array(np.reshape(np.array(list(np.random.randint(0,2**64, L*2,dtype=np.uint64))*2*N),(N,L*4)), dtype=np.uint64)
devgenotypes = cuda.to_device(genotypes)
reference = np.copy(genotypes[0,:])
devreference = cuda.to_device(reference)

fitnessarr = np.ones(N)
F=np.sum(fitnessarr)
devfitnessarr=cuda.to_device(fitnessarr)
rng_states = rnd.create_xoroshiro128p_states(threadsperblock * blocks, seed=SEED)
F=cuda.device_array((1))
sumF[1,1](devfitnessarr,F)

In [6]:
start = timer()
mutate[blocks, threadsperblock](rng_states,0,devgenotypes,devreference,devfitnessarr,F,s)
sumF[1,1](devfitnessarr,F)
evolve_time=timer()-start
print("first iteration: "+str(evolve_time))
fitnessarr = devfitnessarr.copy_to_host()
print(fitnessarr[:10])
genotypes = devgenotypes.copy_to_host()
alignment = transformAlignment(genotypes[:20,:3])
x=[print("".join(row)) for row in np.array(alignment).T]

first iteration: 0.35143856797367334
[0.99500999 0.99500999 0.99500999 0.99500999 0.99500999 0.99500999
 0.99500999 0.99500999 0.99500999 0.99500999]
GATTTACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCAGGTTTGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACAAGCACTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGG

In [7]:
start = timer()
mutate[blocks, threadsperblock](rng_states,1,devgenotypes,devreference,devfitnessarr,F,s)
sumF[1,1](devfitnessarr,F)
evolve_time=timer()-start
print("second iteration: "+str(evolve_time))
fitnessarr = devfitnessarr.copy_to_host()
print(fitnessarr[:10])
genotypes = devgenotypes.copy_to_host()
alignment = transformAlignment(genotypes[:20,:3])
x=[print("".join(row)) for row in np.array(alignment).T]

second iteration: 0.0010627079755067825
[0.99004488 0.99004488 0.99004488 0.99004488 0.99004488 0.99004488
 0.99004488 0.99004488 0.99004488 0.99004488]
GATTTTTGGATAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCAGGTTTCATGACAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACAAGCAGCAACCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCT

In [8]:
start = timer()
for i in range(2,102):
    mutate[blocks, threadsperblock](rng_states,i,devgenotypes,devreference,devfitnessarr,F,s)
    sumF[1,1](devfitnessarr,F)
evolve_time=timer()-start
print("100 more iterations: "+str(evolve_time))
fitnessarr = devfitnessarr.copy_to_host()
print(fitnessarr[:10])
genotypes = devgenotypes.copy_to_host()
alignment = transformAlignment(genotypes[:20,:3])
x=[print("".join(row)) for row in np.array(alignment).T]

100 more iterations: 0.04910828825086355
[0.99202794 0.99202794 0.99202794 0.99202794 0.99202794 0.99202794
 0.99202794 0.99202794 0.99202794 0.99202794]
AGCAACCACTGAACCAACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCAAACAATGCCTAAGCAATCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACGGAGTATGTGAGGCGAGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTCTGGGCGGACTGTCTTTCAGGACTAGA
CGCCAACAATTAAAGTACGCTAGTTCTAAGCACACCAGGCATCAGATTTCATAAGTGTGTGGACTGATTC