# This notebook will implement a naive implementation
This is for debugging purposes only, to see if the GPU implementation is correct or not.

In [229]:
# Imports
import sys
import os
import random
import time
import numpy as np
np.set_printoptions(precision=2, suppress=True)

# the underlying convRBM implementation
sys.path.append(os.path.abspath('../code'))
#from convRBM import CRBM
import getData as dataRead

# biopython stuff
#import Bio.SeqIO as sio
#import Bio.motifs.matrix as mat
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq
#from Bio import motifs
import math

In [250]:
class NaiveCRBM:

    def __init__ (self, motifLength=1, numMotifs=1, learningRate=0.1, poolingFactor=1):
        self.numberOfKernels = numMotifs
        self.kernelLength = motifLength
        self.poolingFactor = poolingFactor
        self.learningRate = learningRate
        self.setParamsToZero = True
        self.debug = True
        if self.setParamsToZero:
            self.kernels = np.zeros((self.numberOfKernels, 1, 4, self.kernelLength))
            self.bias = np.zeros(self.numberOfKernels)
        else:
            self.kernels = np.random.rand(self.numberOfKernels, 1, 4, self.kernelLength)
            self.bias = np.random.rand(self.numberOfKernels)
    
    def setCustomKernels (self, kernels):
        self.numberOfKernels = kernels.shape[0]
        self.kernelLength = kernels.shape[3]
        self.kernels = kernels
        if self.setParamsToZero:
            self.bias = np.zeros(self.numberOfKernels)
        else:
            self.bias = np.random.rand(self.numberOfKernels)

    def initializeMotifs (self):
        pass
        
    def complement (self, kernelSlice):
        return kernelSlice[::-1]

    def forwardBatch (self, data):
        N_h = data.shape[3]-self.kernelLength+1
        H = np.zeros((data.shape[0], self.numberOfKernels, 1, N_h))
        for sample in range(data.shape[0]):
            for k in range(self.numberOfKernels):
                for n in range(N_h):
                    for m in range(self.kernelLength):
                        # calculate the x_i, that is the cross-correlation
                        x = data[sample,0,:,n+m].T.dot(self.kernels[k,0,:,m]) + self.bias[k]
                        #cKernel = self.complement(self.kernels[k,0,:,self.kernelLength-m-1])
                        #x_prime = data[sample,0,:,n+m].T.dot(cKernel) + self.bias[k]
                        H[sample, k, 0, n] += x # + x_prime      
        print "Pre Max Pooling H:"
        print H

        # perform prob max pooling
        P = np.zeros(H.shape)
        S = np.zeros(H.shape)
        H_exp = np.exp(H)
        numBins = N_h / self.poolingFactor
        #print "Number Of Bins: " + str(numBins)
        for sample in range(data.shape[0]):
            for k_pos in range(0, self.numberOfKernels, 2):
                print "K_pos: " + str(k_pos)
                for unit in range(numBins):
                    #print "Doing unit: " + str(unit)
                    # calculate sum within unit
                    sumInUnit = 0
                    for cell in range(self.poolingFactor):
                        curPos = unit*self.poolingFactor+cell
                        sumInUnit += H_exp[sample,k_pos,0,curPos] + H_exp[sample,k_pos+1,0,curPos]
                        
                    # now, calculate the single positions in P
                    arr = []
                    for cell in range(self.poolingFactor):
                        curPos = unit*self.poolingFactor+cell
                        P[sample,k_pos,0,curPos] = H_exp[sample,k_pos,0,curPos] / (sumInUnit + 1)
                        P[sample,k_pos+1,0,curPos] = H_exp[sample,k_pos+1,0,curPos] / (sumInUnit + 1)
                        arr.append(P[sample,k_pos,0,curPos])
                        arr.append(P[sample,k_pos+1,0,curPos])
                    
                    # finally, do the sampling step
                    arr.append(1 / (sumInUnit+1))
                    s = np.random.multinomial(n=1, pvals=np.array(arr),size=1)
                    am = np.argmax(s)
                    #print "Argmax: " + str(am)
                    if am < self.poolingFactor*2:
                        strand = am % 2
                        pos = unit * self.poolingFactor + (am // 2)
                        #print "Strand: " + str(strand) + " Pos: " + str(pos)
                        S[sample,k_pos+strand,0,pos] = 1
                    else:
                        print "5th element chosen"
        return [P,S]


    def backwardBatch (self, H):
        
        # calculate full convolution (not valid, therefore padding is applied with zeros)
        N_v = H.shape[3] + self.kernelLength - 1
        pad = self.kernelLength-1
        V = np.zeros((H.shape[0],1,4,N_v))
        Y = np.zeros(V.shape)
        H_pad = np.pad(H,[(0,0),(0,0),(0,0),(pad, pad)], 'constant',constant_values=(0,0))
        for sample in range(H.shape[0]):
            for k in range(self.numberOfKernels):
                for n in range(N_v):
                    for m in range(self.kernelLength):
                        Y[sample,0,:,n] += self.kernels[k,0,:,m] * H_pad[sample,k,0,pad+n-m]
                        
        # calculate softmax on convolved data
        P_V = self.softmax(Y)
        print P_V
        
        # sample the visible layer from that
        for sample in range(H.shape[0]):
            for col in range(P_V.shape[3]):
                V[sample,0,:,col] = np.random.multinomial(n=1,pvals=P_V[sample,0,:,col],size=1)
        return V
        


    def expectedDerivative (self, H, data):
        G = np.zeros(self.kernels.shape)
        for sample in range(data.shape[0]):
            for k in range(self.numberOfKernels):
                for n_h in range(H.shape[3]):
                    for m in range(self.kernelLength):
                        G[k,0,:,m] += data[sample,0,:,n_h+m] * H[sample,k,0,n_h]
        return G
    
    def train_model (self, D, numOfCDs):
        # calculate the data gradient for weights (motifs) and bias
        [H_data, S_data] = self.forwardBatch(D)
        if self.debug:
            print H_data

        # calculate data gradients
        G_motif_data = self.expectedDerivative(H_data, D)

        if self.debug:
            print G_motif_data

        # calculate model probs
        S_H = S_data
        for i in range(numOfCDs):
            V_model = self.backwardBatch(S_H)
            [H_model, S_H] = self.forwardBatch(V_model)
        
        # compute the model gradients
        G_motif_model = self.expectedDerivative(H_model, D)
        
        if self.debug:
            print G_motif_model
        
        # update the parameters
        new_motifs = self.kernels + self.learningRate * (G_motif_data - G_motif_model)
        self.kernels = new_motifs
        
    def trainMinibatch (self, trainData, epochs, batchSize, numOfCDs):
        iterations = trainData.shape[0] / batchSize
        for epoch in range(epochs):
            for batchIdx in range(iterations):
                self.train_model(trainData[batchIdx*batchSize:(batchIdx+1)*batchSize], numOfCDs)
        
    def softmax (self, x):
        return np.exp(x) / np.exp(x).sum(axis=2, keepdims=True)

## Construct the data

In [251]:
#initialize the learner and set custom kernels
naiveModel = NaiveCRBM(motifLength=3, numMotifs=4, learningRate=0.1, poolingFactor=2)

# design data
#print "Motifs:"
#print x.motifs.get_value()
kernel1 = np.tile(np.array([[1,0,0],[0,1,0],[0,0,1],[0,0,0]]), [1,1,1])
kernel1_ = np.tile(np.flipud(np.fliplr(kernel1[0])),[1,1,1])
kernel2 = np.tile(np.array([[0,0,0],[0,0,0],[1,1,1],[0,0,0]]), [1,1,1])
kernel2_ = np.tile(np.flipud(np.fliplr(kernel2[0])), [1,1,1])
kernel3 = np.random.rand(1,4,3)
kernel3_ = np.tile(np.flipud(np.fliplr(kernel3[0])), [1,1,1])
kernel = np.array([kernel1, kernel1_])#, kernel2, kernel2_])#, kernel3, kernel3_])
#kernel = np.array([kernel3, kernel3_])
print "Kernel: " + str(kernel)

# initialize the data
randSeq1 = dataRead.getOneHotMatrixFromSeq(Seq("ACGTGGGG", IUPAC.unambiguous_dna))
randSeq2 = dataRead.getOneHotMatrixFromSeq(Seq("ACGTACGT", IUPAC.unambiguous_dna))
data = np.array([randSeq1], dtype=np.float32)
print "Data shape: " + str(data.shape)
print data

# perform forward pass
naiveModel.setCustomKernels(kernel)
print "Start forward..."
[P,S] = naiveModel.forwardBatch(data)
print "Hidden Probs:"
print P
print "Hidden Sample:"
print S
print "Start backward..."
V = naiveModel.backwardBatch(S)
print "Start gradient calc..."
naiveModel.expectedDerivative(P, data)

naiveModel.debug = False
naiveModel.trainMinibatch(data, 2, 1, 1)
print "Finished Training:"
print naiveModel.kernels

Kernel: [[[[1 0 0]
   [0 1 0]
   [0 0 1]
   [0 0 0]]]


 [[[0 0 0]
   [1 0 0]
   [0 1 0]
   [0 0 1]]]]
Data shape: (1, 1, 4, 8)
[[[[ 1.  0.  0.  0.  0.  0.  0.  0.]
   [ 0.  1.  0.  0.  0.  0.  0.  0.]
   [ 0.  0.  1.  0.  1.  1.  1.  1.]
   [ 0.  0.  0.  1.  0.  0.  0.  0.]]]]
Start forward...
Pre Max Pooling H:
[[[[ 3.  0.  1.  1.  1.  1.]]

  [[ 0.  3.  0.  1.  1.  1.]]]]
K_pos: 0
Hidden Probs:
[[[[ 0.47  0.02  0.27  0.27  0.23  0.23]]

  [[ 0.02  0.47  0.1   0.27  0.23  0.23]]]]
Hidden Sample:
[[[[ 1.  0.  0.  1.  1.  0.]]

  [[ 0.  0.  0.  0.  0.  0.]]]]
Start backward...
[[[[ 0.48  0.17  0.17  0.48  0.37  0.13  0.17  0.25]
   [ 0.17  0.48  0.17  0.17  0.37  0.37  0.17  0.25]
   [ 0.17  0.17  0.48  0.17  0.13  0.37  0.48  0.25]
   [ 0.17  0.17  0.17  0.17  0.13  0.13  0.17  0.25]]]]
Start gradient calc...
Pre Max Pooling H:
[[[[ 3.  0.  1.  1.  1.  1.]]

  [[ 0.  3.  0.  1.  1.  1.]]]]
K_pos: 0
[[[[ 0.25  0.17  0.37  0.13  0.17  0.48  0.17  0.17]
   [ 0.25  0.48  0.13  0.37  0.17 

In [163]:
test = np.ones((2,1,1,6))
print test.shape
print np.pad(test,[(0,0),(0,0),(0,0),(2,2)],'constant', constant_values=(0,0))

(2, 1, 1, 6)
[[[[ 0.  0.  1.  1.  1.  1.  1.  1.  0.  0.]]]


 [[[ 0.  0.  1.  1.  1.  1.  1.  1.  0.  0.]]]]
