# Naive Implementation vs GPU code
This is for debugging purposes only, to see if the GPU implementation is correct or not.

In [1]:
# Imports
import sys
import os
import random
import time
import numpy as np
np.set_printoptions(precision=2, suppress=True)

# the underlying convRBM implementation
sys.path.append(os.path.abspath('../code'))
from convRBM import CRBM
import getData as dataRead

# biopython stuff
#import Bio.SeqIO as sio
#import Bio.motifs.matrix as mat
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq
#from Bio import motifs
import math

ERROR (theano.sandbox.cuda): Failed to compile cuda_ndarray.cu: libcublas.so.7.0: cannot open shared object file: No such file or directory
ERROR:theano.sandbox.cuda:Failed to compile cuda_ndarray.cu: libcublas.so.7.0: cannot open shared object file: No such file or directory


Couldn't import dot_parser, loading of dot files will not be possible.


## Code for the naive cRBM
All formulas are the same as in the writing section of our git repo.
We implement all the basic functions for upward pass, downward pass, prob max pooling, derivative calculation, gibbs sampling and a training procedure.

In [2]:
class NaiveCRBM:

    def __init__ (self, motifLength=1, numMotifs=1, learningRate=0.1, poolingFactor=1):
        self.numberOfKernels = numMotifs
        self.kernelLength = motifLength
        self.poolingFactor = poolingFactor
        self.learningRate = learningRate
        self.setParamsToZero = True
        self.debug = True
        self.updateWeights = True
        if self.setParamsToZero:
            self.kernels = np.zeros((self.numberOfKernels, 1, 4, self.kernelLength))
            self.bias = np.zeros(self.numberOfKernels)
            self.c = np.zeros(4)
        else:
            self.kernels = np.random.rand(self.numberOfKernels, 1, 4, self.kernelLength)
            self.bias = np.random.rand(self.numberOfKernels)
            self.c = np.random.rand(4)
    
    def setCustomKernels (self, kernels):
        self.numberOfKernels = kernels.shape[0]
        self.kernelLength = kernels.shape[3]
        self.kernels = kernels.astype(float)
        if self.setParamsToZero:
            self.bias = np.zeros(self.numberOfKernels)
        else:
            self.bias = np.random.rand(self.numberOfKernels)

    def initializeMotifs (self):
        pass
        
    def complement (self, kernelSlice):
        return kernelSlice[::-1]

    def computeHgivenV (self, data):
        N_h = data.shape[3]-self.kernelLength+1
        H = np.zeros((data.shape[0], self.numberOfKernels, 1, N_h))
        for sample in range(data.shape[0]):
            for k in range(self.numberOfKernels):
                for n in range(N_h):
                    for m in range(self.kernelLength):
                        # calculate the x_i, that is the cross-correlation
                        x = data[sample,0,:,n+m].T.dot(self.kernels[k,0,:,m]) + self.bias[k]
                        #cKernel = self.complement(self.kernels[k,0,:,self.kernelLength-m-1])
                        #x_prime = data[sample,0,:,n+m].T.dot(cKernel) + self.bias[k]
                        H[sample, k, 0, n] += x # + x_prime
        
        if self.debug:
            print "Pre Sigmoid Hidden Layer:"
            print H
        # perform prob max pooling
        P = np.zeros(H.shape)
        S = np.zeros(H.shape)
        H_exp = np.exp(H)
        numBins = N_h / self.poolingFactor
        for sample in range(data.shape[0]):
            for k_pos in range(0, self.numberOfKernels, 1):
                for unit in range(numBins):
                    #print "Doing unit: " + str(unit)
                    # calculate sum within unit
                    sumInUnit = 0
                    for cell in range(self.poolingFactor):
                        curPos = unit*self.poolingFactor+cell
                        sumInUnit += H_exp[sample,k_pos,0,curPos]# + H_exp[sample,k_pos+1,0,curPos]
                        
                    # now, calculate the single positions in P
                    arr = []
                    for cell in range(self.poolingFactor):
                        curPos = unit*self.poolingFactor+cell
                        P[sample,k_pos,0,curPos] = H_exp[sample,k_pos,0,curPos] / (sumInUnit + 1)
                        #P[sample,k_pos+1,0,curPos] = H_exp[sample,k_pos+1,0,curPos] / (sumInUnit + 1)
                        arr.append(P[sample,k_pos,0,curPos])
                        #arr.append(P[sample,k_pos+1,0,curPos])
                    
                    # finally, do the sampling step
                    arr.append(1 / (sumInUnit+1))
                    s = np.random.multinomial(n=1, pvals=np.array(arr),size=1)
                    am = np.argmax(s)
                    if am < self.poolingFactor:#*2:
                        strand = am % 2
                        pos = unit * self.poolingFactor + am #(am // 2)
                        #print "Strand: " + str(strand) + " Pos: " + str(pos)
                        S[sample,k_pos,0,pos] = 1
        return [P,S]


    def computeVgivenH (self, H):
        
        # calculate full convolution (not valid, therefore padding is applied with zeros)
        N_v = H.shape[3] + self.kernelLength - 1
        pad = self.kernelLength-1
        V = np.zeros((H.shape[0],1,4,N_v))
        Y = np.zeros(V.shape)
        H_pad = np.pad(H,[(0,0),(0,0),(0,0),(pad, pad)], 'constant',constant_values=(0,0))
        for sample in range(H.shape[0]):
            for k in range(self.numberOfKernels):
                for n in range(N_v):
                    for m in range(self.kernelLength):
                        Y[sample,0,:,n] += self.kernels[k,0,:,m] * H_pad[sample,k,0,pad+n-m]
                        
        # calculate softmax on convolved data
        P_V = self.softmax(Y)
        
        # sample the visible layer from probabilities
        V = np.zeros(P_V.shape)
        for sample in range(P_V.shape[0]):
            for col in range(P_V.shape[3]):
                V[sample,0,:,col] = np.random.multinomial(n=1,pvals=P_V[sample,0,:,col],size=1)
        
        return [P_V, V]
        

    def collectUpdateStatistics (self, H, data):
        G = np.zeros(self.kernels.shape)
        for sample in range(data.shape[0]):
            for k in range(self.numberOfKernels):
                for n_h in range(H.shape[3]):
                    for m in range(self.kernelLength):
                        G[k,0,:,m] += data[sample,0,:,n_h+m] * H[sample,k,0,n_h]

        der_bias = np.mean(np.mean(H, axis=3), axis=0).reshape(-1)
        der_c = np.mean(np.mean(data, axis=3), axis=0).reshape(-1)
        return [G, der_bias, der_c]
    
    def updateWeightsOnMinibatch (self, D, numOfCDs):
        # calculate the data gradient for weights (motifs) and bias
        [P_H_data, H_data] = self.computeHgivenV(D)
        if self.debug:
            print "Hidden Layer Probabilities:"
            print P_H_data
            print "Hidden Layer Sample:"
            print H_data

        # calculate data gradients
        [G_motif_data, G_bias_data, G_c_data] = self.collectUpdateStatistics(P_H_data, D)

        if self.debug:
            print "Data gradient for motifs"
            print G_motif_data

        # calculate model probs
        H = H_data
        for i in range(numOfCDs):
            [P_V, V] = self.computeVgivenH(H)
            if self.debug:
                print "Visible Sample for CD " + str(i)
                print V
            [P_H_model, H] = self.computeHgivenV(V)
        
        # compute the model gradients
        [G_motif_model, G_bias_model, G_c_model] = self.collectUpdateStatistics(P_H_model, V)
        
        if self.debug:
            print "Model gradient for motifs:"
            print G_motif_model
        
        # update the parameters
        new_kernels = self.learningRate * (G_motif_data - G_motif_model)
        new_bias = self.learningRate * (G_bias_data - G_bias_model)
        new_c = self.learningRate * (G_c_data - G_c_model)

        if self.updateWeights:
            self.kernels += new_kernels
            self.bias += new_bias
            self.c += new_c

        return (new_kernels, new_bias, new_c)

        
    def trainModel (self, trainData, epochs, batchSize, numOfCDs):
        iterations = trainData.shape[0] / batchSize
        for epoch in range(epochs):
            for batchIdx in range(iterations):
                self.updateWeightsOnMinibatch(trainData[batchIdx*batchSize:(batchIdx+1)*batchSize], numOfCDs)
        
    def softmax (self, x):
        return np.exp(x) / np.exp(x).sum(axis=2, keepdims=True)

## Construct toy data to test the code

In [3]:
kernel1 = np.tile(np.array([[1,0,0],[0,1,0],[0,0,1],[0,0,0]]), [1,1,1])
kernel1_ = np.tile(np.flipud(np.fliplr(kernel1[0])),[1,1,1])
kernel2 = np.tile(np.array([[0,0,0],[0,0,0],[1,1,1],[0,0,0]]), [1,1,1])
kernel2_ = np.tile(np.flipud(np.fliplr(kernel2[0])), [1,1,1])
kernel3 = np.random.rand(1,4,3)
kernel3_ = np.tile(np.flipud(np.fliplr(kernel3[0])), [1,1,1])
kernel = np.array([kernel1, kernel1_])#, kernel2, kernel2_])#, kernel3, kernel3_])
#kernel = np.array([kernel3, kernel3_])
print "Kernel: " + str(kernel)

# initialize the data
randSeq1 = dataRead.getOneHotSeq(Seq("ACGTGGGG", IUPAC.unambiguous_dna))
randSeq2 = dataRead.getOneHotSeq(Seq("ACGTACGT", IUPAC.unambiguous_dna))
data = np.array([randSeq1], dtype=np.float32)
print "Data shape: " + str(data.shape)
print data
print kernel.shape

#initialize the learner and set custom kernels
hyper_params = {'number_of_motifs':1,
                'motif_length':3,
                'learning_rate':0.1,
                'pooling_factor':1,
                'epochs':100,
                'cd_k':1,
                'batch_size':1
}
naiveModel = NaiveCRBM(motifLength=hyper_params['motif_length'],
                       numMotifs=hyper_params['number_of_motifs'],
                       learningRate=hyper_params['learning_rate'],
                       poolingFactor=hyper_params['pooling_factor'])

gpuModel = CRBM(hyper_params)
gpuModel.setToZero = True
# set parameters
naiveModel.setCustomKernels(kernel)
gpuModel.setCustomKernels(kernel)
gpuModel.batchSize = 1
print gpuModel.printHyperParams()
gpuModel.debug = False
naiveModel.debug = False

Kernel: [[[[1 0 0]
   [0 1 0]
   [0 0 1]
   [0 0 0]]]


 [[[0 0 0]
   [1 0 0]
   [0 1 0]
   [0 0 1]]]]
Data shape: (1, 1, 4, 8)
[[[[ 1.  0.  0.  0.  0.  0.  0.  0.]
   [ 0.  1.  0.  0.  0.  0.  0.  0.]
   [ 0.  0.  1.  0.  1.  1.  1.  1.]
   [ 0.  0.  0.  1.  0.  0.  0.  0.]]]]
(2, 1, 4, 3)
New motifs set. # Motifs: 1 K-mer-Length: 3
{'batch_size': 1,
 'cd_k': 1,
 'epochs': 100,
 'learning_rate': 0.1,
 'motif_length': 3,
 'number_of_motifs': 1,
 'pooling_factor': 1}
None


In [12]:
seqReader = dataRead.SeqReader()
allSeqs = seqReader.readSequencesFromFile('../data/wgEncodeAwgDnaseUwAg10803UniPk.fa')

skip sequence containing N


In [13]:
realData = np.array([allSeqs[random.randrange(0, len(allSeqs))] for i in range(10)])
print realData.shape

(10, 1, 4, 150)


## Perform test of both, the GPU and Naive variant of the code
Test scenarios are the following:
* **Upward pass**
* **Downward pass**
* **Calculation of Derivatives**

In [10]:
import theano
import theano.tensor as T
import theano.tensor.nnet.conv as conv

# create theano functions
# forward
print "Compiliing theano functions..."
D = T.tensor4('data')
[P_H, H] = gpuModel.computeHgivenV(D)
forward = theano.function([D], [P_H,H], allow_input_downcast=True)

# backward
H = T.tensor4('Hidden')
[P_V, V] = gpuModel.computeVgivenH(H)
backward = theano.function([H], [P_V,V], allow_input_downcast=True)

# gradient
H = T.tensor4('Hidden Probabilities')
D = T.tensor4('Data')
G_m,G_b,G_c = gpuModel.collectUpdateStatistics(H,D)
gradient = theano.function([H,D], [G_m,G_b,G_c], allow_input_downcast=True)

# gibbs sampler (up, down, sample)
D = T.tensor4('data')
[P_H,H] = gpuModel.computeHgivenV(D)
[P_V,V] = gpuModel.computeVgivenH(H)
gibbs = theano.function([D], V, allow_input_downcast=True)

print "Starting forward pass test:"
print "----------------------------"
[P_naive,S_n] = naiveModel.computeHgivenV(data)
[P_GPU,S_g] = forward(data)
print "ERROR MADE: " + str(np.mean(np.abs(P_naive-P_GPU)))
print
print "Starting backward pass test:"
print "----------------------------"
[P_V_naive, V_naive] = naiveModel.computeVgivenH(S_n)
[P_V_gpu,V_gpu] = backward(S_n)
print "ERROR MADE: " + str(np.sum(np.abs(P_V_naive-P_V_gpu)))
print
print "Starting Gradient pass test:"
print "----------------------------"
G_M_naive, G_b_naive, G_c_naive = naiveModel.collectUpdateStatistics(P_naive, data)
G_M_gpu,G_b_gpu,G_c_gpu = gradient(P_naive, data)
print "ERROR MADE (Motifs): " + str(np.sum(np.abs(G_M_naive-G_M_gpu)))
print "ERROR MADE (Bias): " + str(np.sum(np.abs(G_b_naive-G_b_gpu)))
print "ERROR MADE (c): " + str(np.sum(np.abs(G_c_naive-G_c_gpu)))

Compiliing theano functions...
Starting forward pass test:
----------------------------
ERROR MADE: 1.22203293133e-08

Starting backward pass test:
----------------------------
ERROR MADE: 2.82151124555e-07

Starting Gradient pass test:
----------------------------
ERROR MADE (Motifs): 2.38418579102e-07
ERROR MADE (Bias): 4.43088736812e-08
ERROR MADE (c): 0.0


## Test the gibbs sampler

In [15]:
print "Starting Gibbs Sampling test:"
print "----------------------------"
#data = np.array([allSeqs[random.randrange(0,len(allSeqs))] for i in range(1)])
precision = 100
V_naive_acc = np.zeros(realData.shape)
V_gpu_acc = np.zeros(realData.shape)
for i in range(precision):
    V_naive = naiveModel.computeVgivenH(naiveModel.computeHgivenV(realData)[1])[1]
    V_gpu = gibbs(realData)
    V_naive_acc += V_naive
    V_gpu_acc += V_gpu
    if i % 100 == 0 and i > 0:
        print "100 iterations done"

V_naive_acc /= precision
V_gpu_acc /= precision
print "ERROR MADE: " + str(np.mean(np.abs(V_naive_acc-V_gpu_acc)))

Starting Gibbs Sampling test:
----------------------------
ERROR MADE: 0.04696


## Compare training procedures of both methods
First, compare the training of the data with multiple epochs. That should converge to the same result after enough epochs.

In [17]:
naiveModel = NaiveCRBM(motifLength=hyper_params['motif_length'],
                       numMotifs=hyper_params['number_of_motifs'],
                       learningRate=hyper_params['learning_rate'],
                       poolingFactor=hyper_params['pooling_factor'])

gpuModel = CRBM(hyper_params)
gpuModel.setToZero = True
# set parameters
naiveModel.setCustomKernels(kernel)
gpuModel.setCustomKernels(kernel)
gpuModel.batchSize = 1
gpuModel.printHyperParams()
gpuModel.debug = False
naiveModel.debug = False

naiveModel.trainModel(data, hyper_params['epochs'], hyper_params['batch_size'], hyper_params['cd_k'])
print "DONE WITH NAIVE---------"
gpuModel.trainModel(data)

new_motifs_gpu = gpuModel.motifs.get_value()
new_motifs_naive = naiveModel.kernels

print "ERROR MADE (motifs): " + str(np.mean(np.abs(new_motifs_gpu-new_motifs_naive)))

New motifs set. # Motifs: 1 K-mer-Length: 3
{'batch_size': 1,
 'cd_k': 1,
 'epochs': 100,
 'learning_rate': 0.1,
 'motif_length': 3,
 'number_of_motifs': 1,
 'pooling_factor': 1}
DONE WITH NAIVE---------
BatchSize: 1
Num of iterations per epoch: 1
Start compiling Theano training function...
Compilation of Theano training function finished in 4.93964099884 seconds
Start training the model...
[Epoch 0] done!
[Epoch 1] done!
[Epoch 2] done!
[Epoch 3] done!
[Epoch 4] done!
[Epoch 5] done!
[Epoch 6] done!
[Epoch 7] done!
[Epoch 8] done!
[Epoch 9] done!
[Epoch 10] done!
[Epoch 11] done!
[Epoch 12] done!
[Epoch 13] done!
[Epoch 14] done!
[Epoch 15] done!
[Epoch 16] done!
[Epoch 17] done!
[Epoch 18] done!
[Epoch 19] done!
[Epoch 20] done!
[Epoch 21] done!
[Epoch 22] done!
[Epoch 23] done!
[Epoch 24] done!
[Epoch 25] done!
[Epoch 26] done!
[Epoch 27] done!
[Epoch 28] done!
[Epoch 29] done!
[Epoch 30] done!
[Epoch 31] done!
[Epoch 32] done!
[Epoch 33] done!
[Epoch 34] done!
[Epoch 35] done!
[Epo

### Now, test both training procedures in another way.
We don't apply the gradients to the model anymore and compare only the calculated gradients for the data.
These gradients should be more or less the same after a couple of iterations.
Due to the sampling, some errors can still exist of course.

In [19]:
#data = np.array([allSeqs[random.randrange(0,len(allSeqs))] for i in range(1)])
naiveModel = NaiveCRBM(motifLength=hyper_params['motif_length'],
                       numMotifs=hyper_params['number_of_motifs'],
                       learningRate=hyper_params['learning_rate'],
                       poolingFactor=hyper_params['pooling_factor'])

gpuModel = CRBM(hyper_params)
gpuModel.setToZero = True
# set parameters
naiveModel.setCustomKernels(kernel)
gpuModel.setCustomKernels(kernel)
gpuModel.batchSize = 1
gpuModel.printHyperParams()
gpuModel.debug = False
naiveModel.debug = False
naiveModel.updateWeights = False

# compile theano function
D = T.tensor4('data')
updates = gpuModel.updateWeightsOnMinibatch(D, 1)
der_m = updates[0][1]-updates[0][0]
der_bias = updates[1][1]-updates[1][0]
der_c = updates[2][1]-updates[2][0]
train = theano.function([D], [der_m, der_bias, der_c], allow_input_downcast=True)

precision = 10000
der_m_naive = np.zeros(kernel.shape)
der_m_gpu = np.zeros(kernel.shape)
der_bias_naive = np.zeros(naiveModel.bias.shape)
der_bias_gpu = np.zeros(gpuModel.bias.get_value().shape)
der_c_naive = np.zeros(naiveModel.c.shape)
der_c_gpu = np.zeros(gpuModel.c.get_value().shape)

for i in range(precision):
    # naive
    [der_m_naive_l, der_bias_naive_l, der_c_naive_l] = naiveModel.updateWeightsOnMinibatch(data, 1)
    der_m_naive += der_m_naive_l
    der_bias_naive += der_bias_naive_l
    der_c_naive += der_c_naive_l
    # gpu
    [der_m_gpu_l, der_bias_gpu_l, der_c_gpu_l] = train(data)
    der_m_gpu += der_m_gpu_l
    der_bias_gpu += der_bias_gpu_l
    der_c_gpu += der_c_gpu_l
    
    if i % 100 == 0 and i > 0:
        print "100 iterations done"

der_m_naive /= precision
der_bias_naive /= precision
der_c_naive /= precision

der_m_gpu /= precision
der_bias_gpu /= precision
der_c_gpu /= precision

print "ERROR MADE (motifs): " + str(np.mean(np.abs(der_m_naive - der_m_gpu)))
print "ERROR MADE (bias): " + str(np.mean(np.abs(der_bias_naive - der_bias_gpu)))
print "ERROR MADE (c): " + str(np.mean(np.abs(der_c_naive - der_c_gpu)))

New motifs set. # Motifs: 1 K-mer-Length: 3
{'batch_size': 1,
 'cd_k': 1,
 'epochs': 100,
 'learning_rate': 0.1,
 'motif_length': 3,
 'number_of_motifs': 1,
 'pooling_factor': 1}
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
100 iterations done
1

## Test the sampling methods of both implementations
In order to do that, sampling from both implementations has to be done multiple times to ensure the correctness.

In [21]:
precision = 10000
naiveModel.debug = False

print "Start test of sampling for prob max pooling"
print "----------------------------"
[P_naive,S_n] = naiveModel.computeHgivenV(data)

S_naive = np.zeros((data.shape[0], naiveModel.numberOfKernels, 1, data.shape[3]-naiveModel.kernelLength+1))
S_GPU = np.zeros(P_naive.shape)
for i in range(precision):
    [P_n,S_n] = naiveModel.computeHgivenV(data)
    [P_g,S_g] = forward(data)
    S_naive += S_n
    S_GPU += S_g
S_naive /= precision
S_GPU /= precision

print "ERROR MADE: " + str(np.mean(np.abs(S_naive-S_GPU)))

Start test of sampling for prob max pooling
----------------------------
ERROR MADE: 0.006525


In [24]:
x = np.array(allSeqs)

In [25]:
x.shape

(171274, 1, 4, 150)

In [37]:
print np.mean(np.mean(np.sum(x[:2], axis=1), axis=2), axis=0)

[ 0.19  0.24  0.31  0.26]


In [32]:
xa = x[:100,:,0,:]
print np.mean(xa, axis=0)

[[ 0.17  0.21  0.21  0.22  0.23  0.19  0.21  0.25  0.21  0.23  0.16  0.21
   0.24  0.17  0.16  0.22  0.19  0.22  0.27  0.18  0.22  0.21  0.19  0.16
   0.13  0.16  0.26  0.18  0.16  0.2   0.22  0.21  0.17  0.19  0.21  0.22
   0.22  0.19  0.16  0.2   0.17  0.21  0.24  0.19  0.19  0.18  0.2   0.29
   0.18  0.18  0.25  0.2   0.17  0.15  0.19  0.16  0.23  0.18  0.23  0.14
   0.21  0.15  0.19  0.22  0.23  0.2   0.22  0.16  0.23  0.23  0.14  0.23
   0.21  0.22  0.19  0.17  0.16  0.17  0.21  0.18  0.13  0.19  0.21  0.18
   0.16  0.18  0.21  0.19  0.17  0.13  0.21  0.19  0.19  0.22  0.15  0.25
   0.15  0.21  0.17  0.26  0.21  0.19  0.2   0.21  0.21  0.31  0.17  0.2
   0.17  0.19  0.21  0.2   0.28  0.2   0.14  0.18  0.2   0.22  0.21  0.26
   0.22  0.22  0.19  0.18  0.17  0.19  0.22  0.23  0.13  0.22  0.19  0.25
   0.23  0.16  0.25  0.17  0.16  0.12  0.22  0.2   0.17  0.16  0.15  0.16
   0.24  0.12  0.22  0.26  0.27  0.2 ]]
