# This notebook will implement a naive implementation
This is for debugging purposes only, to see if the GPU implementation is correct or not.

In [2]:
# Imports
import sys
import os
import random
import time
import numpy as np

# the underlying convRBM implementation
sys.path.append(os.path.abspath('../code'))
#from convRBM import CRBM
import getData as dataRead

# biopython stuff
#import Bio.SeqIO as sio
#import Bio.motifs.matrix as mat
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq
#from Bio import motifs

In [52]:
class NaiveCRBM:

    def __init__ (self, motifLength=1, numMotifs=1, learningRate=0.1, poolingFactor=1):
        self.numberOfKernels = numMotifs
        self.kernelLength = motifLength
        self.setParamsToZero = True
        if self.setParamsToZero:
            self.kernels = np.zeros((self.numberOfKernels, 1, 4, self.kernelLength))
            self.bias = np.zeros(self.numberOfKernels)
        else:
            self.kernels = np.random.rand(self.numberOfKernels, 1, 4, self.kernelLength)
            self.bias = np.random.rand(self.numberOfKernels)
    
    def setCustomKernels (self, kernels):
        self.numberOfKernels = kernels.shape[0]
        self.kernelLength = kernels.shape[3]
        self.kernels = kernels
        if self.setParamsToZero:
            self.bias = np.zeros(self.numberOfKernels)
        else:
            self.bias = np.random.rand(self.numberOfKernels)

    def initializeMotifs (self):
        pass
        
    def complement (self, kernelSlice):
        return kernelSlice[::-1]

    def forwardBatch (self, data):
        H = np.zeros((data.shape[0], self.numberOfKernels, 1, data.shape[3]-self.kernelLength+1))
        for sample in range(data.shape[0]):
            for k in range(self.numberOfKernels):
                for n in range(data.shape[3]-self.kernelLength+1):
                    for m in range(self.kernelLength):
                        print "Sample: " + str(sample) + " k: " + str(k) + " n: " + str(n) + " m: " + str(m)
                        # calculate the x_i, that is the convolution
                        print "Data: " + str(data[sample, 0, :, n+m]) + " -> Kernel: " + str(self.kernels[k,0,:,m])
                        x = data[sample,0,:,n+m].T.dot(self.kernels[k,0,:,m]) + self.bias[k]
                        print "result = " + str(x)
                        cKernel = self.complement(self.kernels[k,0,:,self.kernelLength-m-1])
                        x_prime = data[sample,0,:,n+m].T.dot(cKernel) + self.bias[k]
                        H[sample, k, 0, n] += x # + x_prime
        print H


    def backwardBatch (self, H_sample):
        pass


    def expectedDerivative (self, hiddenProbs, data):
        pass
    
    
    def trainMinibatch (self, trainData, epochs, batchSize, numOfCDs):
        iterations = trainData.shape[0] / batchSize
        for epoch in range(epochs):
            pass

## Construct the data

In [53]:
#initialize the learner and set custom kernels
naiveModel = NaiveCRBM(motifLength=3, numMotifs=2, learningRate=0.1, poolingFactor=1)

# design data
#print "Motifs:"
#print x.motifs.get_value()
kernel1 = np.tile(np.array([[1,0,0],[0,1,0],[0,0,1],[0,0,0]]), [1,1,1])
kernel1_ = np.tile(np.flipud(np.fliplr(kernel1[0])),[1,1,1])
kernel2 = np.tile(np.array([[0,0,0],[0,0,0],[1,1,1],[0,0,0]]), [1,1,1])
kernel2_ = np.tile(np.flipud(np.fliplr(kernel2[0])), [1,1,1])
kernel3 = np.random.rand(1,4,3)
kernel3_ = np.tile(np.flipud(np.fliplr(kernel3[0])), [1,1,1])
kernel = np.array([kernel1])#, kernel2, kernel2_, kernel3, kernel3_])
#kernel = np.array([kernel3, kernel3_])
print "Kernel: " + str(kernel)

# initialize the data
randSeq1 = dataRead.getOneHotMatrixFromSeq(Seq("ACGTGGGG", IUPAC.unambiguous_dna))
randSeq2 = dataRead.getOneHotMatrixFromSeq(Seq("ACGTACGT", IUPAC.unambiguous_dna))
data = np.array([randSeq1], dtype=np.float32)
print "Data shape: " + str(data.shape)
print data

# perform forward pass
naiveModel.setCustomKernels(kernel)
naiveModel.forwardBatch(data)

Kernel: [[[[1 0 0]
   [0 1 0]
   [0 0 1]
   [0 0 0]]]]
Data shape: (1, 1, 4, 8)
[[[[ 1.  0.  0.  0.  0.  0.  0.  0.]
   [ 0.  1.  0.  0.  0.  0.  0.  0.]
   [ 0.  0.  1.  0.  1.  1.  1.  1.]
   [ 0.  0.  0.  1.  0.  0.  0.  0.]]]]
Sample: 0 k: 0 n: 0 m: 0
Data: [ 1.  0.  0.  0.] -> Kernel: [1 0 0 0]
result = 1.0
Sample: 0 k: 0 n: 0 m: 1
Data: [ 0.  1.  0.  0.] -> Kernel: [0 1 0 0]
result = 1.0
Sample: 0 k: 0 n: 0 m: 2
Data: [ 0.  0.  1.  0.] -> Kernel: [0 0 1 0]
result = 1.0
Sample: 0 k: 0 n: 1 m: 0
Data: [ 0.  1.  0.  0.] -> Kernel: [1 0 0 0]
result = 0.0
Sample: 0 k: 0 n: 1 m: 1
Data: [ 0.  0.  1.  0.] -> Kernel: [0 1 0 0]
result = 0.0
Sample: 0 k: 0 n: 1 m: 2
Data: [ 0.  0.  0.  1.] -> Kernel: [0 0 1 0]
result = 0.0
Sample: 0 k: 0 n: 2 m: 0
Data: [ 0.  0.  1.  0.] -> Kernel: [1 0 0 0]
result = 0.0
Sample: 0 k: 0 n: 2 m: 1
Data: [ 0.  0.  0.  1.] -> Kernel: [0 1 0 0]
result = 0.0
Sample: 0 k: 0 n: 2 m: 2
Data: [ 0.  0.  1.  0.] -> Kernel: [0 0 1 0]
result = 1.0
Sample: 0 k: 0 n: 3 m: