In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import torch

  from numpy.core.umath_tests import inner1d


In [68]:
class ConvRF(object):
    def __init__(self, kernel_size=5, stride=2):
        self.kernel_size = kernel_size
        self.stride = stride
        self.kernel_forests = None
        self.num_outputs = 10

    def _convolve_chop(self, images, labels=None, flatten=False):

        batch_size, in_dim, _, num_channels = images.shape

        out_dim = int((in_dim - self.kernel_size) / self.stride) + 1  # calculate output dimensions

        # create matrix to hold the chopped images
        out_images = np.zeros((batch_size, out_dim, out_dim,
                               self.kernel_size, self.kernel_size, num_channels))
        out_labels = None

        curr_y = out_y = 0
        # move kernel vertically across the image
        while curr_y + self.kernel_size <= in_dim:
            curr_x = out_x = 0
            # move kernel horizontally across the image
            while curr_x + self.kernel_size <= in_dim:
                # chop images
                out_images[:, out_x, out_y] = images[:, curr_x:curr_x +
                                                     self.kernel_size, curr_y:curr_y+self.kernel_size, :]
                curr_x += self.stride
                out_x += 1
            curr_y += self.stride
            out_y += 1

        if flatten:
            out_images = out_images.reshape(batch_size, out_dim, out_dim, -1)

        if labels is not None:
            out_labels = np.zeros((batch_size, out_dim, out_dim))
            out_labels[:, ] = labels.reshape(-1, 1, 1)

        return out_images, out_labels

    def convolve_fit(self, images, labels):
        num_channels = images.shape[-1]
        self.num_outputs = len(np.unique(labels))
        
        # check if this is the raw input image, to decide to flatten or not
        flatten = True if num_channels in (1, 3) else False
        sub_images, sub_labels = self._convolve_chop(images, labels=labels, flatten=flatten)

        if flatten:
            batch_size, out_dim, _, features = sub_images.shape
            self.kernel_forests = np.zeros((out_dim, out_dim), dtype=np.int).tolist()
        else:
            batch_size, out_dim, _, kernel_size, _, num_channels = sub_images.shape
            self.kernel_forests = np.zeros((out_dim, out_dim, num_channels), dtype=np.int).tolist()
        
        convolved_image = np.zeros((images.shape[0], out_dim, out_dim, self.num_outputs))
        
        for i in range(out_dim):
            for j in range(out_dim):
                if not flatten:
                    for k in range(num_channels):
                        self.kernel_forests[i][j][k] = RandomForestClassifier(n_estimators=self.num_outputs)
                        self.kernel_forests[i][j][k].fit(sub_images[:, i, j, :, :, k].reshape(batch_size, -1), sub_labels[:, i, j])
                        convolved_image[:, i, j, k] = np.max(self.kernel_forests[i][j][k].apply(sub_images[:, i, j, :, :, k].reshape(batch_size, -1)), axis=1)
                else:
                    self.kernel_forests[i][j] = RandomForestClassifier(n_estimators=self.num_outputs)
                    self.kernel_forests[i][j].fit(sub_images[:, i, j], sub_labels[:, i, j])
                    convolved_image[:, i, j] = self.kernel_forests[i][j].apply(sub_images[:, i, j])
        return convolved_image

    def convolve_predict(self, images):
        if not self.kernel_forests:
            raise Exception("Should fit training data before predicting")

        num_channels = images.shape[-1]
        
        # check if this is the raw input image, to decide to flatten or not
        flatten = True if num_channels in (1, 3) else False
        sub_images, _ = self._convolve_chop(images, flatten=flatten)

        if flatten:
            batch_size, out_dim, _, features = sub_images.shape
        else:
            batch_size, out_dim, _, kernel_size, _, num_channels = sub_images.shape
        
        kernel_predictions = np.zeros((images.shape[0], out_dim, out_dim, self.num_outputs))
        
        for i in range(out_dim):
            for j in range(out_dim):
                if not flatten:
                    for k in range(num_channels):
                        kernel_predictions[:, i, j, k] = np.max(self.kernel_forests[i][j][k].apply(sub_images[:, i, j, :, :, k].reshape(batch_size, -1)), axis=1)
                else:
                    kernel_predictions[:, i, j] = self.kernel_forests[i][j].apply(sub_images[:, i, j])
        return kernel_predictions

In [66]:
# prepare MNIST data
import torchvision.datasets as datasets
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=None)
mnist_train_images = mnist_trainset.train_data.numpy()[..., np.newaxis]
mnist_train_labels = mnist_trainset.train_labels.numpy()

mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=None)
mnist_test_images = mnist_testset.test_data.numpy()[..., np.newaxis]
mnist_test_labels = mnist_testset.test_labels.numpy()

In [57]:
def run_experiment(mnist_train_images, mnist_train_labels, mnist_test_images, mnist_test_labels):
    ##########################################################
    print("Num. of Convolution Layers: 1")
    # conv layer 1
    conv1 = ConvRF(kernel_size=3, stride=2)
    conv1_map = conv1.convolve_fit(mnist_train_images, mnist_train_labels)
    
    # full RF (conv 1)
    conv1_full_RF = RandomForestClassifier()
#     conv1_full_RF.fit(np.argmax(conv1_map, axis=3).reshape(len(mnist_train_images), -1), mnist_train_labels)
    conv1_full_RF.fit(conv1_map.reshape(len(mnist_train_images), -1), mnist_train_labels)
    
    # test (conv 1)
    conv1_map_test = conv1.convolve_predict(mnist_test_images)
#     mnist_test_preds = conv1_full_RF.predict(np.argmax(conv1_map_test, axis=3).reshape(len(mnist_test_images), -1))
    mnist_test_preds = conv1_full_RF.predict(conv1_map_test.reshape(len(mnist_test_images), -1))

    print("Test Accuracy: " + str(accuracy_score(mnist_test_labels, mnist_test_preds)))
    print("Validation Confusion Matrix: \n" + str(confusion_matrix(mnist_test_labels, mnist_test_preds)))
    
    ##########################################################
    print("Num. of Convolution Layers: 2")
    # conv layer 2
    conv2 = ConvRF(kernel_size=3, stride=2)
    conv2_map = conv2.convolve_fit(conv1_map, mnist_train_labels)
    
    # full RF (conv 2)
    conv2_full_RF = RandomForestClassifier()
#     conv2_full_RF.fit(np.argmax(conv2_map, axis=3).reshape(len(mnist_train_images), -1), mnist_train_labels)
    conv2_full_RF.fit(conv2_map.reshape(len(mnist_train_images), -1), mnist_train_labels)
    
    # test (conv 2)
    conv2_map_test = conv2.convolve_predict(conv1_map_test)
#     mnist_test_preds = conv2_full_RF.predict(np.argmax(conv2_map_test, axis=3).reshape(len(mnist_test_images), -1))
    mnist_test_preds = conv2_full_RF.predict(conv2_map_test.reshape(len(mnist_test_images), -1))

    print("Test Accuracy: " + str(accuracy_score(mnist_test_labels, mnist_test_preds)))
    print("Validation Confusion Matrix: \n" + str(confusion_matrix(mnist_test_labels, mnist_test_preds)))

    ##########################################################
    print("Num. of Convolution Layers: 3")
    # conv layer 3
    conv3 = ConvRF(kernel_size=3, stride=1)
    conv3_map = conv3.convolve_fit(conv2_map, mnist_train_labels)
    
    # full RF (conv 3)
    conv3_full_RF = RandomForestClassifier()
#     conv3_full_RF.fit(np.argmax(conv3_map, axis=3).reshape(len(mnist_train_images), -1), mnist_train_labels)
    conv3_full_RF.fit(conv3_map.reshape(len(mnist_train_images), -1), mnist_train_labels)
    
    # test (conv 3)
    conv3_map_test = conv3.convolve_predict(conv2_map_test)
#     mnist_test_preds = conv3_full_RF.predict(np.argmax(conv3_map_test, axis=3).reshape(len(mnist_test_images), -1))
    mnist_test_preds = conv3_full_RF.predict(conv3_map_test.reshape(len(mnist_test_images), -1))

    print("Test Accuracy: " + str(accuracy_score(mnist_test_labels, mnist_test_preds)))
    print("Validation Confusion Matrix: \n" + str(confusion_matrix(mnist_test_labels, mnist_test_preds)))

In [61]:
# full depth until min_split = 2
run_experiment(mnist_train_images, mnist_train_labels, mnist_test_images, mnist_test_labels)

Num. of Convolution Layers: 1
Test Accuracy: 0.9512
Validation Confusion Matrix: 
[[ 967    1    1    0    0    2    4    1    4    0]
 [   0 1119    4    3    1    1    1    1    5    0]
 [   9    0  985    4    6    0    2   11   13    2]
 [   3    1   19  952    0   12    0    9    7    7]
 [   2    1    2    1  930    1    6    5    7   27]
 [  11    3    4   32    2  820    8    2    5    5]
 [   6    4    2    1    4   15  920    0    6    0]
 [   3    4   19    4    4    1    0  978    4   11]
 [   8    2    7    9   11    9    7    5  907    9]
 [   9    5    5   11   21    6    0   13    5  934]]
Num. of Convolution Layers: 2
Test Accuracy: 0.9376
Validation Confusion Matrix: 
[[ 967    1    0    0    1    2    2    2    4    1]
 [   1 1116    6    3    1    1    2    0    5    0]
 [   9    1  986    5    6    2    3   10   10    0]
 [   0    2   24  926    1   16    0   16   19    6]
 [   3    2    4    1  921    2    7    0   12   30]
 [  11    2    3   43    8  787   12    

In [67]:
# max_depth = 6
run_experiment(mnist_train_images[:1000], mnist_train_labels[:1000], mnist_test_images[:100], mnist_test_labels[:100])

Num. of Convolution Layers: 1
Test Accuracy: 0.81
Validation Confusion Matrix: 
[[ 8  0  0  0  0  0  0  0  0  0]
 [ 0 14  0  0  0  0  0  0  0  0]
 [ 1  0  5  0  0  0  1  1  0  0]
 [ 0  0  1  9  0  1  0  0  0  0]
 [ 0  0  0  0 12  1  0  0  0  1]
 [ 0  0  0  0  0  4  1  1  0  1]
 [ 0  0  2  0  2  0  6  0  0  0]
 [ 0  0  0  0  0  0  0 14  0  1]
 [ 0  0  1  0  0  0  0  0  1  0]
 [ 0  0  0  0  1  1  0  1  0  8]]
Num. of Convolution Layers: 2
Test Accuracy: 0.85
Validation Confusion Matrix: 
[[ 8  0  0  0  0  0  0  0  0  0]
 [ 0 14  0  0  0  0  0  0  0  0]
 [ 1  0  6  0  0  0  0  1  0  0]
 [ 0  0  0 10  1  0  0  0  0  0]
 [ 0  0  1  0  9  0  0  1  0  3]
 [ 0  0  0  0  0  7  0  0  0  0]
 [ 1  0  0  0  3  0  6  0  0  0]
 [ 0  0  0  0  1  0  0 14  0  0]
 [ 0  0  1  0  0  0  0  0  1  0]
 [ 0  0  0  0  1  0  0  0  0 10]]
Num. of Convolution Layers: 3
Test Accuracy: 0.79
Validation Confusion Matrix: 
[[ 8  0  0  0  0  0  0  0  0  0]
 [ 0 12  0  1  0  0  0  0  0  1]
 [ 0  0  6  0  0  1  0  1  0  0]