In [1]:
from autograd import grad as compute_grad   
import autograd.numpy as np
import numpy as npo
import copy
from datetime import datetime 

#this is needed to compensate for matplotlib notebook's tendancy to blow up images when plotted inline
%matplotlib notebook
from matplotlib import rcParams
rcParams['figure.autolayout'] = True

# The most naive convolution code ever (?)

To construct a stack of feature maps of an input tensor of images we first try the most naive approach ever - we construct one feature map at a time by looping through all images, and for each image constructing a feature map for each convolution kernel (again by explicitly looping through the kernels).  

There is nothing mathematically wrong with taking the naive way out - all of the computations here will be correct, and we can use this as either a fixed convolution feature extractor or place it in a feedforward network and learn the kernels.  Computationally speaking, however, this will be extremely slow in ``Python`` due to all of the nexted for-loops!  We will re-write this exact computation below using ``tensors`` instead of individual images / kernels, which will drastically improve computation speed.

In [2]:
class naive_conv_layer:  
    '''
    A simple convnet module.  Here we calculate feature maps exactly one at a time, using
    a host of nested for-loops.  This means computation will be quite slow!  However this
    can still be used in theory as a fixed convolutional feature extractor or as a convolutional
    layer in a conv net (where the kernels are learned).
    '''   
    
    # a convolution function
    def conv_function(self,window):
        conv = np.sum(self.kernel*window)
        return conv

    # a pooling function
    def pool_function(self,window):
        pool = np.max(window)
        return pool

    # activation function
    def activation(self,window):
        a = np.maximum(0,window)
        return a

    # pad image with appropriate number of zeros for convolution
    def pad_image(self,image,kernel_size):
        odd_nums = np.array([int(2*n + 1) for n in range(100)])
        pad_val = np.argwhere(odd_nums == kernel_size)[0][0]
        image_padded = np.zeros((np.shape(image) + 2*pad_val))
        image_padded[pad_val:-pad_val,pad_val:-pad_val] = image
        return image_padded   

    # sliding window function, convolution or pooling done on each window
    def sliding_window_image(self,image,window_size,stride,func):
        # grab image size, set container for results
        image_size = np.shape(image)[0]
        results = []

        # slide window over input image with given window size / stride and function
        for i in np.arange(0, image_size - window_size + 1, stride):
            for j in np.arange(0, image_size - window_size + 1, stride):
                # now we have a window from our image, and use the desired 'func' to process it
                window = image[i:i+window_size,j:j+window_size]

                # process using input func
                processed_window = func(window)
                results.append(processed_window)

        # array-afy results
        results = np.array(results)

        # return results in numpy array format
        return results

    def make_feature_map(self,image,kernel):
        # square up input
        self.kernel = kernel
        img_size = int((np.size(image))**(0.5))
        image = np.reshape(image,(img_size,img_size))

        # pad image appropriately
        kernel_size = kernel.shape[0]
        padded_image = self.pad_image(image,kernel_size)

        # create feature map via convolution --> returns flattened convolution calculations
        conv_stride = 1
        feature_map = self.sliding_window_image(padded_image,kernel_size,conv_stride,self.conv_function)

        # reshape convolution feature map into array
        feature_map = np.reshape(feature_map,(np.shape(image)))

        # now shove result through nonlinear activation
        feature_map = self.activation(feature_map)

        #### now pool / downsample feature map, first window then pool on each window
        max_pool = self.sliding_window_image(feature_map,6,3,self.pool_function)

        # reshape into new tensor
        max_pool = np.reshape(max_pool, (int((np.size(max_pool))**(0.5)),int((np.size(max_pool))**(0.5))))

        return max_pool

#     # our main function for constructing feature maps via convolution / pooling
#     def make_feature_map(self,image,kernel):
#         # square up input
#         self.kernel = kernel
#         img_size = int((np.size(image))**(0.5))
#         image = np.reshape(image,(img_size,img_size))

#         # pad image appropriately
#         kernel_size = kernel.shape[0]
#         padded_image = self.pad_image(image,kernel_size)

#         # create feature map via convolution --> returns flattened convolution calculations
#         conv_stride = 1
#         feature_map = self.sliding_window(padded_image,kernel_size,conv_stride,self.conv_function)

#         # re-shape convolution output ---> to square of same size as original input
#         new_size = int(np.size(feature_map)**(0.5))
#         feature_map = np.reshape(feature_map,(new_size,new_size))

# #         # shove feature map through nonlinearity
# #         feature_map = self.activation(feature_map)

# #         # pool feature map --- i.e., downsample it
# #         pool_window_size = 6
# #         pool_stride = 3
# #         downsampled_feature_map = self.sliding_window(feature_map,pool_window_size,pool_stride,self.pool_function)

#         # return downsampled feature map --> flattened
#         return feature_map #downsampled_feature_map
    
    # convolution layer function - here we collect all of the feature maps and package them appropriately
    def conv_layer(self,tensor,kernels):   
        kernel = kernels[0]
        all_feature_maps = []
        for image in tensor:
            current_feat_maps = []
            for kernel in kernels:
                # compute feature map for current image using current convolution kernel
                feat_map = self.make_feature_map(image,kernel)

                # store feature maps of current kernel
                current_feat_maps.append(feat_map)

            # append all feature maps from current kernel to running list
            all_feature_maps.append(current_feat_maps)

        # convert to array and re-shape properly
        all_feature_maps = np.array(all_feature_maps)
        all_feature_maps = np.reshape(all_feature_maps,(np.shape(all_feature_maps)[0],np.prod(np.shape(all_feature_maps)[1:])),order = 'F')
        return all_feature_maps

# A much more effecient tensor-based implementation

By carefully thinking about how convolutional feature maps are constructed on a set of images we can re-write the implementation above in a much more effecient manner by employing ``tensors`` - i.e., three (and higher) dimensional matrices.  Here the entire stack (``tensor``) of images is processed simultaneously, minimizing the number of explicit for-loops required.

In [3]:
class tensor_conv_layer:    
    # convolution function
    def conv_function(self,tensor_window):
        tensor_window = np.reshape(tensor_window,(np.shape(tensor_window)[0],np.shape(tensor_window)[1]*np.shape(tensor_window)[2]))
        t = np.dot(self.kernels,tensor_window.T)
        return t

    # pooling / downsampling parameters
    def pool_function(self,tensor_window):
        t = np.max(tensor_window,axis = (1,2))
        return t

    # activation 
    def activation(self,tensor_window):
        return np.maximum(0,tensor_window)

    # pad image with appropriate number of zeros for convolution
    def pad_tensor(self,tensor,kernel_size):
        odd_nums = np.array([int(2*n + 1) for n in range(100)])
        pad_val = np.argwhere(odd_nums == kernel_size)[0][0]
        tensor_padded = np.zeros((np.shape(tensor)[0], np.shape(tensor)[1] + 2*pad_val,np.shape(tensor)[2] + 2*pad_val))
        tensor_padded[:,pad_val:-pad_val,pad_val:-pad_val] = tensor
        return tensor_padded    
    
    # sliding window for image augmentation
    def sliding_window_tensor(self,tensor,window_size,stride,func):
        # grab image size, set container for results
        image_size = np.shape(tensor)[1]
        results = []
        
        # slide window over input image with given window size / stride and function
        for i in np.arange(0, image_size - window_size + 1, stride):
            for j in np.arange(0, image_size - window_size + 1, stride):
                # take a window of input tensor
                tensor_window =  tensor[:,i:i+window_size, j:j+window_size]
                
                # now process entire windowed tensor at once
                tensor_window = np.array(tensor_window)
                yo = func(tensor_window)

                # store weight
                results.append(yo)
        
        # re-shape properly
        results = np.array(results)
        results = results.swapaxes(0,1)
        if func == self.conv_function:
            results = results.swapaxes(1,2)
        return results 

    # make feature map
    def make_feature_tensor(self,tensor):
        # create feature map via convolution --> returns flattened convolution calculations
        conv_stride = 1
        feature_tensor = self.sliding_window_tensor(tensor,self.kernel_size,conv_stride,self.conv_function) 

        # re-shape convolution output ---> to square of same size as original input
        num_filters = np.shape(feature_tensor)[0]
        num_images = np.shape(feature_tensor)[1]
        square_dim = int((np.shape(feature_tensor)[2])**(0.5))
        feature_tensor = np.reshape(feature_tensor,(num_filters,num_images,square_dim,square_dim))
        
        # shove feature map through nonlinearity
        feature_tensor = self.activation(feature_tensor)

        # pool feature map --- i.e., downsample it
        pool_stride = 3
        pool_window_size = 6
        downsampled_feature_map = []
        for t in range(np.shape(feature_tensor)[0]):
            temp_tens = feature_tensor[t,:,:,:]
            d = self.sliding_window_tensor(temp_tens,pool_window_size,pool_stride,self.pool_function)
            downsampled_feature_map.append(d)
        downsampled_feature_map = np.array(downsampled_feature_map)

        # return downsampled feature map --> flattened
        return downsampled_feature_map

    # our normalization function
    def normalize(self,data,data_mean,data_std):
        normalized_data = (data - data_mean)/(data_std + 10**(-5))
        return normalized_data

    # convolution layer
    def conv_layer(self,tensor,kernels):
        #### prep input tensor #####
        # pluck out dimensions for image-tensor reshape
        num_images = np.shape(tensor)[0]
        num_kernels = np.shape(kernels)[0]
        
        # create tensor out of input images (assumed to be stacked vertically as columns)
        tensor = np.reshape(tensor,(np.shape(tensor)[0],int((np.shape(tensor)[1])**(0.5)),int( (np.shape(tensor)[1])**(0.5))),order = 'F')

        # pad tensor
        kernel = kernels[0]
        self.kernel_size = np.shape(kernel)[0]
        padded_tensor = self.pad_tensor(tensor,self.kernel_size)

        #### prep kernels - reshape into array for more effecient computation ####
        self.kernels = np.reshape(kernels,(np.shape(kernels)[0],np.shape(kernels)[1]*np.shape(kernels)[2]))
        
        #### compute convolution feature maps / downsample via pooling one map at a time over entire tensor #####
        # compute feature map for current image using current convolution kernel
        feature_tensor = self.make_feature_tensor(padded_tensor)

        feature_tensor = feature_tensor.swapaxes(0,1)
        feature_tensor = np.reshape(feature_tensor, (np.shape(feature_tensor)[0],np.shape(feature_tensor)[1]*np.shape(feature_tensor)[2]),order = 'F')
        
        return feature_tensor
    
    ##### some supervised learning capabilities #####
    def load_data(self,x,y):
        self.x = x
        self.y = y
        
    def predict(self,x,w):
        # pass input data through convolutional layer
        x_conv = self.conv_layer(x,w[0])
        
        # take inner product against output of conv layer
        value = w[1][0] + np.dot(x_conv,w[1][1:])
        return value
    
    # the softmax cost function 
    def softmax(self,w):
        cost  = np.sum(np.log(1 + np.exp((-self.y)*(self.predict(self.x,w)))))
        return cost
    
    def count(self,w):
        return 0.25*np.sum((np.sign(self.predict(self.x,w)) - self.y)**2)

# A speed test in evaluating the convolution layers implemented above

Lets compare the speeds of our naive versus tensor-based convolution layer implementations using the face detection image dataset and the set of edge detecting kernels given below.

In [4]:
edge_detection_kernels = np.array([
       [[-1, -1, -1],
        [ 0,  0,  0],
        [ 1,  1,  1]],

       [[-1, -1,  0],
        [-1,  0,  1],
        [ 0,  1,  1]],
    
        [[-1,  0,  1],
        [-1,  0,  1],
        [-1,  0,  1]],

       [[ 0,  1,  1],
        [-1,  0,  1],
        [-1, -1,  0]],

       [[ 1,  0, -1],
        [ 1,  0, -1],
        [ 1,  0, -1]],

       [[ 0, -1, -1],
        [ 1,  0, -1],
        [ 1,  1,  0]],

       [[ 1,  1,  1],
        [ 0,  0,  0],
        [-1, -1, -1]],

       [[ 1,  1,  0],
        [ 1,  0, -1],
        [ 0, -1, -1]]])          

Lets try it out.  Here we will use the ``datetime`` library to do this.

In [5]:
# declare instances of each convolution function
naive_conv_test = naive_conv_layer()
tensor_conv_test = tensor_conv_layer()

In [6]:
kernels = np.random.randn(1,3,3)

Load in the face detection dataset.

In [17]:
# load data
datapath = '../../mlrefined_datasets/convnet_datasets/feat_face_data.csv'
data = np.loadtxt(datapath,delimiter = ',')

# import data and reshape appropriately
x = data[:,:-1]
y = data[:,-1:1]

# take a small number of examples for the speed test
x = x[:100,:]
y = y[:100]

Now we create a set of fixed convolution features (using edge detecting kernels) - first using our naive implementation ``naive_conv_layer``.

In [19]:
# start timer
startTime= datetime.now() 

feature_maps_1 = naive_conv_test.conv_layer(x_train,edge_detection_kernels)

# finish timing
timeElapsed=datetime.now()-startTime 
print('Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))

Time elpased (hh:mm:ss.ms) 0:00:22.732094


And our more effecient tensor-based implementation ``tensor_conv_layer``.

In [20]:
# start timer
startTime= datetime.now() 

feature_maps_2 = tensor_conv_test.conv_layer(x_train,edge_detection_kernels)

# finish timing
timeElapsed=datetime.now()-startTime 
print('Time elpased (hh:mm:ss.ms) {}'.format(timeElapsed))

Time elpased (hh:mm:ss.ms) 0:00:00.072597


Wow!  Thats around 1000 times faster.