In [1]:
import numpy as np
from random import randint
#import framework
#import implementations as func
import matplotlib.pyplot as plt
import importlib
import pandas as pd
import scipy.signal as scp
import tensorflow as tf
import target_data_gen
from target_data_gen import get_sizes
from target_data_gen import target_gen
%load_ext autoreload
%autoreload 2
%matplotlib notebook

In [14]:
class Linear():  
  
    # initialization function
    def __init__(self, inputs, outputs):
        print("creating Linear layer")
        lim = np.sqrt(6/(inputs+outputs))
        self.W = np.random.uniform(-lim,lim,(inputs+1, outputs))
        self.W[0,:].fill(0)
        self.dW = np.zeros((inputs+1, outputs))
        self.Linear_updateW = Optimizer(self.W)
    # forward pass calculation
    # receives input tensor of shape : [nb_samples x insize]
    def forward(self,X): 
        # [nb_samples x insize] x [insize x outsize] + [nb_samples x outsize] => [nb_samples x outsize]
        H = X @ self.W
        return H
    
    # backward pass calculation
    # receives gradient of the next layer
    # returns gradient with respect to the input. 
    # returns derivative with respect to the weights.
    def backward(self,dH, X):
        # matrix multiplication [nb_samples, outsize]*[insize, outsize]^t => [nb_samples x insize]
        # sum over samples for parameters (normal Gradient Descent. If divided into batches -> SGD)
        dX = dH @ np.transpose(self.W)
        #print(dH, self.dW)
        self.dW = np.transpose(X) @ dH
        self.W = self.Linear_updateW.adam_update(self.W, self.dW)
        return dX, self.dW
    
  

In [15]:
class GRU():
    #Wz, Wr, Wh, Uz, Ur, Uh = np.array()
    #z, r, h, s = np.array()
    
    def __init__(self,sequences, timesteps, inputs, outputs):
        print("creating GRU layer")
        lim_w = np.sqrt(6/(inputs+outputs))
        lim_u = np.sqrt(6/(2*outputs))
        self.Wz, self.Wr, self.Wh = np.random.uniform(-lim_w,lim_w,(inputs+1, outputs)), np.random.uniform(-lim_w,lim_w,(inputs+1, outputs)), np.random.uniform(-lim_w,lim_w,(inputs+1, outputs))
        print(self.Wz.shape)
        self.Wr[0,:].fill(-1)
        self.Wz[0,:].fill(0)
        self.Wh[0,:].fill(0)
        self.Uz, self.Ur, self.Uh = np.random.uniform(-lim_u,lim_u,(outputs, outputs)), np.random.uniform(-lim_u,lim_u,(outputs, outputs)), np.random.uniform(-lim_u,lim_u,(outputs, outputs))
        self.z, self.r = np.zeros((sequences,timesteps,outputs)),np.zeros((sequences,timesteps,outputs))
        self.h, self.s = np.zeros((sequences,timesteps,outputs)),np.zeros((sequences,timesteps,outputs))
        print(self.s.shape)
        self.dWz, self.dWr, self.dWh = np.zeros((inputs+1, outputs)), np.zeros((inputs+1, outputs)), np.zeros((inputs+1, outputs))
        self.dUz, self.dUr, self.dUh = np.zeros((outputs, outputs)), np.zeros((outputs, outputs)), np.zeros((outputs, outputs))
        self.GRU_updateWz = Optimizer(self.Wz)
        self.GRU_updateWr = Optimizer(self.Wr)
        self.GRU_updateWh = Optimizer(self.Wh)
        self.GRU_updateUz = Optimizer(self.Uz)
        self.GRU_updateUr = Optimizer(self.Ur)
        self.GRU_updateUh = Optimizer(self.Uh)
    def forward(self,X):
        # initialize
        # first iteration
        #print(self.z.shape, X.shape)
        self.z[:,0,:] = sigmoid(X[:,0,:] @ self.Wz) #[seq * time * OUT] = [seq * time * IN] @ [IN * OUT]
        self.r[:,0,:] = sigmoid(X[:,0,:] @ self.Wr)
        self.h[:,0,:] = tanh(X[:,0,:] @ self.Wh)
        self.s[:,0,:] = self.z[:,0,:]*self.h[:,0,:]
        for t in range(1, X.shape[1]):
            self.z[:,t,:] = sigmoid(X[:,t,:] @ self.Wz + self.s[:,t-1,:] @ self.Uz)
            self.r[:,t,:] = sigmoid(X[:,t,:] @ self.Wr + self.s[:,t-1,:] @ self.Ur)
            self.h[:,t,:] = tanh(X[:,t,:] @ self.Wh + self.r[:,t,:] * (self.s[:,t-1,:] @ self.Uh))
            self.s[:,t,:] = self.z[:,t,:]*self.h[:,t,:] + (1-self.z[:,t,:])*self.s[:,t-1,:]
        return self.s
    
    def backward(self, ds, X):
        dsnext = np.zeros_like(self.s[:,0,:])
        dX = np.zeros((X.shape))
        for t in reversed(range(X.shape[1])):
            #print(ds.shape, self.z.shape)
            dh = ds*self.z[:,t,:]
            dh_l = dh*tanh(self.h[:,t,:], deriv=True)
            #print("GRU backward", dh_l.shape)
            self.dWh += np.transpose(X[:,t,:]) @ dh_l # [1 x IN].T @ [1 x OUT]
           
            #print(self.dWh.shape)
            self.dUh += np.transpose(self.r[:,t,:]*self.s[:,t-1,:]) @ dh_l # ([1 x OUT] * [1 x OUTin]).T @ [1 x OUT]
            
            #drsp = dh_l @ np.transpose(Uh)# [OUTin x OUT] @ [1 x OUT]
            drsp = dh_l @ np.transpose(self.Uh)
            dr = dh_l * (self.s[:,t-1,:] @ self.Uh) #[1 x OUT] = [ 1 x OUT]*([1 x OUTin] @ [OUTin x OUT])
            dr_l = dr * sigmoid(self.r[:,t,:], deriv=True)
            
            self.dWr += np.transpose(X[:,t,:]) @ dr_l # [ IN x OUT] = [1 x IN].T @ [1 x OUT]
            self.dUr += np.transpose(self.s[:,t-1,:]) @ dr_l # [OUTin x OUT ] = [ 1 x OUTin].T @ [ 1 x OUT]
            
            dz = (self.h[:,t,:] - self.s[:,t-1,:]) * dh  # [1 x OUT] = ( [1 x OUT] - [1 x OUTin] ) * [1 x OUT]
            dz_l = dz * sigmoid(self.z[:,t,:], deriv=True)
            
            self.dWz += np.transpose(X[:,t,:]) @ dz_l
            self.dUz += np.transpose(self.s[:,t-1,:]) @ dz_l
            
            # calculate gradient w.r.t s[t-1]
            ds_fz_inner = dz_l @ np.transpose(self.Uh) #  [1 x OUTin] =  [1 x OUT] @ [OUTin x OUT].T
            ds_fz = ds * (1-self.z[:,t,:]) # [1 x OUTin] = [1 x OUT] * [1 x OUT]
            ds_fh = drsp * self.r[:,t,:] # [1 x OUTin] = [1 x OUT] * [1 x OUT]
            ds_fr = dr_l @ np.transpose(self.Ur)
            
            dsnext = ds_fz_inner + ds_fz + ds_fh + ds_fr
            ds += dsnext
            dX[:,t,:]=dh_l @ np.transpose(self.Wh) + dr_l @ np.transpose(self.Wr) + dz_l @ np.transpose(self.Wz)
            
            #update weights
        self.Wz = self.GRU_updateWz.adam_update(self.Wz, self.dWz)
        self.Wr = self.GRU_updateWr.adam_update(self.Wr, self.dWr)
        self.Wh = self.GRU_updateWh.adam_update(self.Wh, self.dWh)
        self.Uz = self.GRU_updateUz.adam_update(self.Uz, self.dUz)
        self.Ur = self.GRU_updateUr.adam_update(self.Ur, self.dUr)
        self.Uh = self.GRU_updateUh.adam_update(self.Uh, self.dUh) 
        return ds, dX
    #def compute_gradients(self,X):
        
    def change_input_size(self, sequences, timesteps,outputs):
        self.z, self.r = np.zeros((sequences,timesteps,outputs)),np.zeros((sequences,timesteps,outputs))
        self.h, self.s = np.zeros((sequences,timesteps,outputs)),np.zeros((sequences,timesteps,outputs))
        
    def get_parameters(self):
        return self.Wz, self.dWz,self.Wr, self.dWr,self.Wh, self.dWh,self.Uz, self.dUz,self.Ur, self.dUr,self.Uh, self.dUh

In [239]:
X = np.array([np.arange(1,10), np.arange(1,10)])
X.reshape(1,2,9)
print(X)
GRU_layer_0 = GRU(sequences=1, timesteps=2, inputs=9, outputs=5)
s = GRU_layer_0.forward(X.reshape(1,2,9))

print(s)

[[1 2 3 4 5 6 7 8 9]
 [1 2 3 4 5 6 7 8 9]]
creating GRU layer
(9, 5)
(1, 2, 5) (1, 2, 9)
[[[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]]


In [4]:
def sigmoid(input, deriv=False):
    if deriv:
        return input*(1-input)
    else:
        return 1 / (1 + np.exp(-input))

def tanh(input, deriv=False):
    if deriv:
        return 1 - input ** 2
    else:
        return np.tanh(input)

def reLU(input, deriv=False):
    if deriv:
        output = np.copy(input)
        output[input > 0.0 ] = 1.0
        output[input <= 0.0] = 0.0
        return output
    else:
        output = np.copy(input)
        output[input < 0.0 ] = 0.0
        return output

In [191]:
#test
input_shape = (32,10,2)
X=np.zeros((input_shape))
print(X.shape)

(32, 10, 2)


In [5]:
class Conv2D():
    height = 0
    width = 0
    nb_seq = 0
    timesteps = 0
    new_height = 0
    new_width = 0
    K = 0
    M = 0
    N = 0
    def __init__(self, kernel_height, kernel_width, filters):
        self.W = np.random.uniform(-1,1,(kernel_height, kernel_width, filters))
        self.B = np.zeros((1, filters))
        self.dW = np.zeros((kernel_height, kernel_width, filters))
        self.dB = np.zeros((1, filters))
        print('Creating Conv2D layer')
        self.Conv2D_updateW = Optimizer(self.W)
        self.Conv2D_updateB = Optimizer(self.B)
    # W.shape : [kernel_height, kernel_width, nb_filters]
    # X.shape : [samples, timesteps, height, width, 1]
    # B.shape : [1, nb_filters]
    def forward(self, X):
        print("conv2d Xshape", X.shape)
        self.height = X.shape[2]
        self.width = X.shape[3]
        self.M = self.W.shape[0]
        self.N = self.W.shape[1]
        self.K = self.W.shape[2]
        self.nb_seq = X.shape[0]
        self.timesteps = X.shape[1]
        #print(self.timesteps)
        #compute new dimensions
        self.new_height = self.height - self.M + 1
        self.new_width = self.width - self.N + 1
        
       # print(new_height, new_width)
        h = np.zeros((self.nb_seq, self.timesteps, self.new_height, self.new_width, self.K))
        for k in range(self.K):
            for i in range(self.new_height):
                for j in range(self.new_width):
                    h[:,:,i,j,k]=np.sum(X[:,:,i:i+self.M, j:j+self.N,0]*self.W[:,:,k], axis =(2,3))+self.B[0,k]
        return h
    # dH has dimensions of H which means in case of X.shape=[3,3,1], W.shape=[2,2,1] => H.shape=[2,2,1]
    # dX should have the same shape as X, i.e dX.shape=[3,3,1] = dH conv2D flipped W 'FULL'
    def backward(self, dH, X):
        # dw is the same operation as in forward propagation. 
        # no need to compute dX in our case because the conv2D layer is the first layer and means the end of backprop. algorithm.
        for k in range(self.K):
            self.dB[:,k] = np.sum(dH[:,:,:,:,k])
            for i in range(self.M):
                for j in range(self.N):
                    #average over all the sequences and timesteps
                    self.dW[i,j,k]=np.sum(X[:,:,i:i+self.new_height, j:j+self.new_width,0]*dH[:,:,:,:,k])
        self.W = self.Conv2D_updateW.adam_update(self.W, self.dW)
        self.B = self.Conv2D_updateB.adam_update(self.B, self.dB)
        return 

In [6]:
#test for conv2d
X=np.array([[3.0,5,6], [1,2,4], [2,3,5]]).reshape(3,3,1)
W=np.array([[2,1.0],[1,4]]).reshape(-1,2,1)
#W = np.rot90(W,2)
print(W.shape)
B=np.array([0.5]).reshape(-1,1)
print(X.shape)
conv_layer1 = Conv2D()
H = conv_layer1.forward(X, W, B)
print(H, H.shape)
print(H[1,0,0])

# check against scp : kernel is flipped here.
X_scp = X.reshape(-1,3)
W_scp = W.reshape(-1,2)
H_scp = scp.convolve2d(X_scp, W_scp, mode = 'valid')
#print(H_scp)
#print(X[1,0,0])

#check against tensorflow

X_tf = X.reshape(1,3,3,1)
W_tf = W.reshape(2,2,1,1)
sess = tf.Session()
H_tf = sess.run(tf.nn.conv2d(X_tf, W_tf, strides = (1,1,1,1), padding = 'VALID'))
sess.run(tf.Print(H_tf, [H_tf]))


(2, 2, 1)
(3, 3, 1)


TypeError: __init__() missing 3 required positional arguments: 'kernel_height', 'kernel_width', and 'filters'

In [6]:
class MaxPool2D():
    height = 0
    width = 0
    M = 0
    N = 0
    K = 0
    nb_seq = 0
    timesteps = 0
    def __init__(self):
        print('creating 2D Max pooling layer')
    
    def forward(self, X, pool):
        self.height = X.shape[2]
        self.width = X.shape[3]
        self.M = pool[0]
        self.N = pool[1]
        self.K = X.shape[4]
        self.nb_seq = X.shape[0]
        self.nb_timesteps = X.shape[1]
        if self.height%2 == 1:
            X = np.delete(X, obj=self.height-1, axis=2)
            self.height = X.shape[2]
        if self.width%2 == 1:
            X = np.delete(X, obj=self.width-1, axis=3)
            self.width = X.shape[3]
        X_argmax = np.copy(X)
       
        #print(self.M, self.N)
        #compute new sizes
        new_height = int(self.height/self.M)
        new_width = int(self.width/self.N)
        H = np.zeros((self.nb_seq, self.nb_timesteps, new_height, new_width, self.K))
        #print(H.shape)
       
        #start pooling
        for k in range(self.K):
            for i in range(0,self.height,self.M):
                for j in range(0,self.width,self.N): #genericity loss here, only valid with kernel width = 2.
                    X_temp = X[:,:,i:i+self.M, j:j+self.N, k].reshape(self.nb_seq, self.nb_timesteps, self.M*self.N,1)
                    H[:,:,int(i/self.M), int(j/self.N), k] = np.amax(X[:,:,i:i+self.M, j:j+self.N, k], axis=(2,3)) 
                    X_argmax[:,:, i, j, k] = (np.argmax(X_temp, axis = 2)).reshape(self.nb_seq, self.nb_timesteps) 
                    X_argmax[:,:, i+self.M-1, j, k] = (np.argmax(X_temp, axis = 2)).reshape(self.nb_seq, self.nb_timesteps) 
                    X_argmax[:,:, i, j+self.N-1, k] = (np.argmax(X_temp, axis = 2)).reshape(self.nb_seq, self.nb_timesteps) 
                    X_argmax[:,:, i+self.M-1, j+self.N-1, k] = (np.argmax(X_temp, axis = 2)).reshape(self.nb_seq, self.nb_timesteps) 
        return H, X_argmax
        
    def backward(self, X_argmax, dH):
        dX = np.zeros((X_argmax.shape))
        for k in range(self.K):
            for i in range(0, self.height, self.M):
                for j in range(0,self.width, self.N):
                    dX[:,:, i:i+self.M,j:j+self.N,k] = self.norm_argmax(X_argmax[:,:,i:i+self.M, j:j+self.N, k], dH[:,:,int(i/self.M), int(j/self.N), k])       
        return dX
                                                                
    def norm_argmax(self, X, dH):
        #print("X_argmax shape maxpool = ", X.shape)
        h = X.shape[2]
        w = X.shape[3]
        I = np.array(np.arange(X.shape[2]+X.shape[3]))
        Ibig = np.zeros_like(X)
        #print("Maxpoolbackward:", X.shape, Ibig.shape, I.shape)
        Ibig = np.tile(I,(X.shape[0], X.shape[1],1))
        #print("ibig", Ibig.shape)
        Xresh = X.reshape(X.shape[0], X.shape[1], Ibig.shape[2]) # multidimensional
        diff = (Xresh-Ibig).astype(int)
        #duplicate dH x 4
        dH_dupl = np.zeros_like(X)
        for m in range(2):
            for n in range(2):
                dH_dupl[:,:,m,n] = dH
        # reshape to make boolean assignment
        Xresh = Xresh.flatten()
        diff = diff.flatten()
        dHflat = dH_dupl.flatten()
        #print("dH shape", dH.shape)
        Xresh[diff != 0] = -1
        #print("xresh shape", Xresh.shape, diff.shape)
        deriv = np.copy(Xresh)
        deriv[Xresh > -1.0] = dHflat[Xresh > -1.0]
        deriv[Xresh == -1.0] = 0.0
        deriv = deriv.reshape(X.shape[0],X.shape[1],h, w)
        #print("deriv shape max pool = ", deriv.shape)
        return deriv

In [328]:
# test 
A = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(A, A.shape)
print(A[0,1,0])
B=np.array([[1,2],[3,4]])
print(B, B.shape)
print(B[1,0])
int(98/2)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]] (2, 2, 2)
3
[[1 2]
 [3 4]] (2, 2)
3


49

In [7]:
# test Max Pool 2D
X = np.zeros((1,1,4,4,1))
for i in range(X.shape[2]):
    for j in range(X.shape[3]):
        X[:,:,i,j,:]= i -2*j 
x_dummy=X.reshape(4,4)
ddH = np.ones((5,5,1))
maxPool2D_layer2 = MaxPool2D()
pool = np.array([2,2])
H, X_arg =maxPool2D_layer2.forward(X, pool)
dXmaxpool_test = max, X_arg[1Pool2D_layer2.backward(X_arg, ddH)
print(X_arg)

SyntaxError: invalid syntax (<ipython-input-7-caa07f00056f>, line 11)

In [7]:
print(H.reshape(5,5))

[[ 2.  4.  6.  8. 10.]
 [ 4.  6.  8. 10. 12.]
 [ 6.  8. 10. 12. 14.]
 [ 8. 10. 12. 14. 16.]
 [10. 12. 14. 16. 18.]]


In [8]:
print(X.reshape(10,10))

[[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9.]
 [ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]
 [ 2.  3.  4.  5.  6.  7.  8.  9. 10. 11.]
 [ 3.  4.  5.  6.  7.  8.  9. 10. 11. 12.]
 [ 4.  5.  6.  7.  8.  9. 10. 11. 12. 13.]
 [ 5.  6.  7.  8.  9. 10. 11. 12. 13. 14.]
 [ 6.  7.  8.  9. 10. 11. 12. 13. 14. 15.]
 [ 7.  8.  9. 10. 11. 12. 13. 14. 15. 16.]
 [ 8.  9. 10. 11. 12. 13. 14. 15. 16. 17.]
 [ 9. 10. 11. 12. 13. 14. 15. 16. 17. 18.]]


In [9]:
print(X_arg.reshape(10,10))

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 1. 0. 1. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 1. 0. 1. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 1. 0. 1. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 1. 0. 1. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 1. 0. 1. 0. 1. 0. 1.]]


In [51]:
#loss function
def CrossEntropy(yHat, y):
    length = yHat.shape[0]
    y = y.flatten()
    yHat = yHat.flatten()
    print(yHat.shape)
    output = np.copy(y)
    print(output.shape)
    output[y==1] = -10*np.log(yHat)[y==1]
    output[y==0] = -np.log(1 - yHat)[y==0]
    output = np.sum(output)/y.shape[0]
    return output
def derivCrossEntropy(yHat, y):
    dloss = np.zeros((yHat.shape[0]))
    for i in range(yHat.shape[0]):
        if yHat[i] < 1 and yHat[i] > 0:
            dloss[i] = -y[i]/yHat[i] + (1-y[i])/(1-yHat[i])
        elif yHat[i] == 1 and y[i] == 1:
            dloss[i] = -1
        elif yHat[i] == 0 and y[i] == 0:
            dloss[i] = 1
        elif yHat[i] == 1 and y[i] == 0:
            dloss[i] = 2*63
        elif yHat[i] == 0 and y[i] == 1:
            dloss[i] = -2*63
    return dloss
    

In [11]:
# TB done:
# write main function calling forward & backward
# write flattening (can be done in MAIN)
# try it out!

In [25]:
#weight update
class Optimizer():
    # initialize training parameters
    def __init__(self, W):
       # self.Wconv = np.zeros((nb_Wconv1, nb_Wconv2))
       # self.Bconv = np.zeros((nb_Bconv))
       # self.Wr, self.Wh, self.Wz = np.zeros((nb_GRUin, nb_GRUout)), np.zeros((nb_GRUin, nb_GRUout)), np.zeros((nb_GRUin, nb_GRUout))
       # self.Ur, self.Uh, self.Uz = np.zeros((nb_GRUout, nb_GRUout)), np.zeros((nb_GRUout, nb_GRUout)), np.zeros((nb_GRUout, nb_GRUout))
       # self.Wlin = np.zeros((nb_Wlin)) # only 1 output
        self.alpha = 0.01
        self.beta1 = 0.9
        self.beta2 = 0.99
        self.eps = 10**-8
        self.v = np.zeros((W.shape))
        self.m = np.zeros((W.shape))
    def adam_update(self, W, dW):
        np.clip(dW, -5, 5, out=dW)
        self.m = self.beta1*self.m + (1 - self.beta1)*dW
        self.v = self.beta2*self.v + (1 - self.beta2)*np.power(dW, 2)
        m_corr = self.m/(1-self.beta1)
        v_corr = self.v/(1-self.beta2)
        W = W  - self.alpha*m_corr/(np.sqrt(v_corr)+self.eps)   
        return W



In [60]:
#main

# get data
np.random.seed(0)
output = 1
#X=np.array(sigbufs
batch_size = 100 #(length)
batch_mod = 10000
dataset_size = 18
X, Y = list(), list()

#**********************CALLLING DATA GENERATOR FUNCTION ***********************
X, input_size, length = get_sizes(X, dataset_size)
print(X.shape)
#Defining sizes for input/target data
Y=np.zeros((dataset_size, batch_mod, output))
#yhat=np.zeros((dataset_size, batch_mod, output))


#***********************CALLING TARGET GENERATOR FUNCTION**********************
file = 'database/chb01-summary.txt'
Y = target_gen(output, batch_mod, dataset_size, batch_size, file)
#******************************************************************************
print(Y.shape)

input_size_new = 23
#elements of each frame
batch_size_new = 100
#time steps equivalent 
timesteps= 10 # meaning of this?
#number of sequences
seq_number = 100
#Re-defining dataset size for training
dataset_size = 16
max_iters = 40 #nb_epochs
#Defining sizes for input/target data
X_new=np.zeros((seq_number*dataset_size, timesteps, input_size_new, batch_size_new, 1))
#yhat=np.zeros((dataset_size, batch_mod, output))
Y_new=np.zeros((seq_number*dataset_size, output))

for m in range(0,dataset_size):
    if (m == 3-1 or  m == 4-1 or m == 15-1 or m == 16-1 or m == 18-1 ):
        if m == 3-1:
            initial_time = 700000
        if m == 4-1:
            initial_time = 300000
        if m == 15-1:
            initial_time = 400000
        if m == 16-1:
            initial_time = 200000
        if m == 18-1:
            initial_time = 400000
    else:
        initial_time = 0
    print('initial time:', initial_time)
    for i in range(0,seq_number):
        for j in range(0,timesteps):
            initial = initial_time+(i*batch_size_new*timesteps)+j*batch_size_new
            final = initial_time+(i*batch_size_new*timesteps)+((j+1)*batch_size_new)
            #print(initial_time+(i*batch_size_new*timesteps)+j*batch_size_new)
            #print(initial_time+(i*batch_size_new*timesteps)+((j+1)*batch_size_new))
            X_new[seq_number*m+i,j,:,0:batch_size_new,0] =  X[m,0,0,0:input_size_new,initial:final,0] 

#standardizing data
max_value = np.amax(abs(X_new))
min_value = np.amin(abs(X_new))
# print('MAX VALUE', max_value)
X_new = X_new/max_value # standardization by max value (all values between 0 and 1)
#size of data: (800, 10, 23, 100, 1)
Y_new[seq_number*2+33*2:seq_number*2+39*2,output-1] = 1
Y_new[seq_number*3+37*2:seq_number*3+42*2,output-1] = 1
Y_new[seq_number*14+21*2:seq_number*14+27*2,output-1] = 1
Y_new[seq_number*15+29*2:seq_number*15+37*2,output-1] = 1


print(X_new.shape, Y_new.shape)

# sequences, timesteps, features, batches. (100 x nb_files, 10,23, 100,,1)
# define model
# initialize all parameters
sgd_batch_size = 1600
layer_0 = Conv2D(2,2,2)
# add activation
layer_1 = MaxPool2D()
#layer_2 = GRU(dataset_size*seq_number,timesteps,1078,100) #sequences, timesteps, features, outputs
layer_2 = GRU(sgd_batch_size, timesteps, 1078, 100)
layer_3 = Linear(100,1)
X_GRU_flat_augmented = np.ones((sgd_batch_size,10,1079))
#Lin_bias_col = np.ones((800)) # create column of 1's for the bias.
S_GRU_augmented = np.ones((sgd_batch_size,101))
# add activation
#training phase

#forward pass
for n_iters in range(20):
    #pick sequence randomly:
    i = randint(0,dataset_size*seq_number/sgd_batch_size-1)
    #conv layer
    HConv = layer_0.forward(X_new[i*sgd_batch_size:(i+1)*sgd_batch_size,:,:,:,:]) # 5D data for train_data, 3D for Wconv 2D for Bconv
    #print(HConv.shape)
    YConv = reLU(HConv, deriv=False) # no requirement on shape
    #pooling layer
    pool_kernel = np.array([2,2])
    YPool, XArgmax = layer_1.forward(YConv,  pool_kernel) #5D data for YConv
    #print("YPOOOL", YPool.shape)
    #flattening
    X_GRU_flat = YPool.reshape(YPool.shape[0],10,-1) # check size here should be 3D (100*nb_files, 10, 1078)
    #print(X_GRU_flat.shape)
    X_GRU_flat_augmented[:,:,1:1079]= X_GRU_flat
    #GRU
    S_GRU = layer_2.forward(X_GRU_flat_augmented)
    last = S_GRU.shape[1]-1 # timesteps
    S_GRU_augmented[:,1:101] =  S_GRU[:,last,:]
    HLinear = layer_3.forward(S_GRU_augmented)
    yhat = sigmoid(HLinear, deriv=False)
    #print(yhat)
    #calculate loss
    loss = CrossEntropy(yhat, Y_new[i*sgd_batch_size:(i+1)*sgd_batch_size]) # works only for y = 0 or 1
    print(loss)
    #backward pass
    #NOT NEEDED: dloss = derivCrossEntropy(yhat, y_train)
    dhlin = np.zeros((yhat.shape))
    #linear layer
    #because binary cross-entropy is associated to sigmoid we have:
    # dloss/dw = dloss/dyhat*dyhat/dwx*dwx/dw = (-y+yhat)*w
    # dloss/dx = (-y + yhat)*x
    # no need to explicitly compute dloss/dyhat, dyhat/dwx
    #print(yhat.shape, Y_new.shape)
    print(i, yhat.shape,Y_new.shape)
    for k in range(yhat.shape[0]):
        if Y_new[i*sgd_batch_size+k,:]==0:
            dhlin[k,:] = 2*(yhat[k,:] - Y_new[i*sgd_batch_size+k,:])
        else:
            dhlin[k,:] = 20*(yhat[k,:] - Y_new[i*sgd_batch_size+k,:])
    #only 0.5/-0.5
    dxlin, dwlin = layer_3.backward(dhlin, S_GRU_augmented)

    dxlin = np.delete(dxlin, 0, 1)
    dsGRU, dxGRU = layer_2.backward(dxlin, X_GRU_flat_augmented)
    #print(dxGRU.shape)
    dxGRU = np.delete(dxGRU, 0, 2)
    dyMaxPool = dxGRU.reshape(dxGRU.shape[0], dxGRU.shape[1],11,49,2)
    #print(dyMaxPool.shape)
    dxMaxPool = layer_1.backward(XArgmax, dyMaxPool) 
    #last column of convolution image
    dxMaxPool_augmented = np.zeros((dxMaxPool.shape[0], dxMaxPool.shape[1], dxMaxPool.shape[2], dxMaxPool.shape[3]+1, dxMaxPool.shape[4]))
    dxMaxPool_augmented[:,:,:,0:dxMaxPool.shape[3],:]=dxMaxPool
    dhConv2D = reLU(dxMaxPool_augmented, deriv=True)
    #print(dhConv2D.shape)
    layer_0.backward(dhConv2D, X_new[i*sgd_batch_size:(i+1)*sgd_batch_size,:,:,:,:])

    #wz, dwz,_,_,_,_,_,_,_,_,_,_ = layer_2.get_parameters()
    #print(wz[580:585,0:10], dwz[580:585,0:10])

SIZE sigbufs (23, 921600)
921600
(18, 1, 1, 23, 921600, 1)
(18, 10000, 1)
initial time: 0
initial time: 0
initial time: 700000
initial time: 300000
initial time: 0
initial time: 0
initial time: 0
initial time: 0
initial time: 0
initial time: 0
initial time: 0
initial time: 0
initial time: 0
initial time: 0
initial time: 400000
initial time: 200000
(1600, 10, 23, 100, 1) (1600, 1)
Creating Conv2D layer
creating 2D Max pooling layer
creating GRU layer
(1079, 100)
(1600, 10, 100)
creating Linear layer
conv2d Xshape (1600, 10, 23, 100, 1)
(1600,)
(1600,)
0.8898600047095209
0 (1600, 1) (1600, 1)
conv2d Xshape (1600, 10, 23, 100, 1)
(1600,)
(1600,)
1.15810805437319
0 (1600, 1) (1600, 1)
conv2d Xshape (1600, 10, 23, 100, 1)
(1600,)
(1600,)
0.8756739872830286
0 (1600, 1) (1600, 1)
conv2d Xshape (1600, 10, 23, 100, 1)
(1600,)
(1600,)
0.7336825699661891
0 (1600, 1) (1600, 1)
conv2d Xshape (1600, 10, 23, 100, 1)
(1600,)
(1600,)
0.6632243621757129
0 (1600, 1) (1600, 1)
conv2d Xshape (1600, 10, 23,

In [None]:
def compute_error(yhat,y):
    err = 0
    yhat = np.round(yhat)
    diff = yhat-y
    for i in range(diff.shape[0]):
        if diff != 0:
            err+=1
    err = err/diff.shape[0]
    return err


In [61]:

#*******************************test phase***********************************************
file = 18

#Defining sizes for input/target data
X_test=np.zeros((seq_number, timesteps, input_size_new, batch_size_new, 1))

layer_2.change_input_size(100,10,100)
X_GRU_flat_augmented_test = np.ones((100,10,1079))
S_GRU_augmented_test = np.ones((100,101))
initial_time = 400000

for i in range(0,seq_number):
    for j in range(0,timesteps):
        #print(initial, final, batch_size_new, file-1)
        initial = initial_time+(i*batch_size_new*timesteps)+j*batch_size_new
        final = initial_time+(i*batch_size_new*timesteps)+((j+1)*batch_size_new)
        #print(initial_time+(i*batch_size_new*timesteps)+j*batch_size_new)
        #print(initial_time+(i*batch_size_new*timesteps)+((j+1)*batch_size_new))
        X_test[i,j,:,0:batch_size_new,0] =  X[file-1,0,0,0:input_size_new,initial:final,0] 
   

   
max_value_2 = np.amax(abs(X_test))
min_value = np.amin(abs(X_test))
# print('MAX VALUE', max_value)
X_test = X_test/max_value_2

#generate predictions:
HConv = layer_0.forward(X_test) # 5D data for train_data, 3D for Wconv 2D for Bconv
YConv = reLU(HConv, deriv=False) # no requirement on shape
#pooling layer
pool_kernel = np.array([2,2])
YPool, XArgmax = layer_1.forward(YConv,  pool_kernel) #5D data for YConv
#flattening
X_GRU_flat = YPool.reshape(YPool.shape[0],10,-1) # check size here should be 3D (100*nb_files, 10, 1078)
#print(X_GRU_flat.shape)
X_GRU_flat_augmented_test[:,:,1:1079]=X_GRU_flat
#GRU
S_GRU = layer_2.forward(X_GRU_flat_augmented_test)
last = S_GRU.shape[1]-1 # timesteps
S_GRU_augmented_test[:,1:101] =  S_GRU[:,last,:]
HLinear = layer_3.forward(S_GRU_augmented_test)
yhat_test = sigmoid(HLinear, deriv=False)
file2 = open("CNN_GRU_testsetfile18_lowlevel", 'w')
np.savetxt(file2, yhat_test, delimiter="," )
file2.close()
#test_err = compute_error(yhat_test, Y_test)
#print(test_err)



conv2d Xshape (100, 10, 23, 100, 1)


In [23]:
np.round([1.5,1.4])

array([2., 1.])

In [None]:
A = np.array([[1,2],[3,4]])
I = np.tile(A, (2,2,2))
print(I)

In [117]:
print(layer_0.W)
print(layer_0.B)


[[[-0.01349721 -0.79802286]
  [-0.01274448 -1.14339606]]

 [[ 0.00696058 -0.66652883]
  [ 0.01983425 -0.17549277]]]
[[-0.29894161 -1.45337415]]


In [119]:
print(layer_3.W)

[[ 0.28619615]
 [ 0.67600959]
 [-0.299434  ]
 [ 0.28857047]
 [-0.69584952]
 [-0.04184838]
 [-0.95888501]
 [-0.76966287]
 [ 0.75901764]
 [-0.56461196]
 [-0.96454562]
 [ 0.02387505]
 [ 0.92176659]
 [-0.01527161]
 [-0.94178601]
 [ 0.49579442]
 [ 0.30305533]
 [-0.98547802]
 [ 0.54944936]
 [ 0.45959234]
 [ 0.86060038]
 [ 0.05158444]
 [-0.71606789]
 [-0.86182729]
 [-0.14422646]
 [ 0.76364925]
 [-0.41889778]
 [-0.63164139]
 [-0.97080978]
 [ 0.57151749]
 [ 0.98358668]
 [-0.51541532]
 [-0.17859179]
 [-0.35030104]
 [ 0.01420336]
 [ 0.3292313 ]
 [ 0.46236014]
 [ 0.66655303]
 [ 0.40879586]
 [ 0.63263982]
 [-0.27322773]
 [ 0.1328386 ]
 [-0.49677096]
 [-0.42392539]
 [-0.91191459]
 [-0.24041867]
 [-0.96867901]
 [ 0.35115342]
 [ 0.57394092]
 [ 0.62597609]
 [ 0.84378306]
 [ 0.05285628]
 [-0.99696805]
 [ 0.042918  ]
 [-0.23707146]
 [ 0.07739612]
 [ 0.05690764]
 [-0.20282921]
 [ 0.64222785]
 [ 0.19241508]
 [-0.25928168]
 [ 0.93267523]
 [ 0.20632538]
 [ 0.40934429]
 [-0.7589915 ]
 [-0.77674195]
 [-0.01203

In [50]:
nb_ones =0
for i in range(Y_new.shape[0]):
    if Y_new[i]==1:
        nb_ones +=1
nb_ones/=1600
print(nb_ones, Y_new.shape)

0.03125 (1600, 1)
