In [6]:
import copy

import numpy as np
import torch.utils.data as Data
import torchvision.datasets as datasets
import torchvision.transforms as transforms


def normalize(dataset_name, x):
    if dataset_name == "CIFAR10":
        mean = np.array([0.4914, 0.4822, 0.4465])
        std = np.array([0.2023, 0.1994, 0.2010])
    elif dataset_name == "MNIST" or dataset_name == "FashionMNIST":
        mean = np.array([0.1307, ])
        std = np.array([0.3081, ])
    else:
        mean = np.array([0.5, 0.5, 0.5])
        std = np.array([0.5, 0.5, 0.5])

    if not isinstance(x, np.ndarray):
        x = x.numpy()

    if len(x.shape) != 4:
        x = x[..., np.newaxis]

    return (x / 255.0 - mean) / std


def get_dataset(data_path, dataset_name="CIFAR10", is_numpy=True):
    if is_numpy:
        transformer = None
    else:
        if dataset_name == "CIFAR10":
            transformer = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
            ])
        elif dataset_name == "MNIST" or dataset_name == "FashionMNIST":
            transformer = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307,), (0.3081,)),
            ])
        else:
            transformer = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            ])

    if dataset_name == "MNIST":
        trainset = datasets.MNIST(root=data_path, train=True, download=True, transform=transformer)
        testset = datasets.MNIST(root=data_path, train=False, download=True, transform=transformer)

    elif dataset_name == "FashionMNIST":
        trainset = datasets.FashionMNIST(root=data_path, train=True, download=True, transform=transformer)
        testset = datasets.FashionMNIST(root=data_path, train=False, download=True, transform=transformer)

    elif dataset_name == "SVHN":
        trainset = datasets.SVHN(root=data_path, split='train', download=True, transform=transformer)
        trainset.train_data = np.transpose(trainset.data, (0, 2, 3, 1))
        trainset.train_labels = trainset.labels

        testset = datasets.SVHN(root=data_path, split='test', download=True, transform=transformer)
        testset.test_data = np.transpose(testset.data, (0, 2, 3, 1))
        testset.test_labels = testset.labels

    else:
        trainset = datasets.CIFAR10(root=data_path, train=True, download=True, transform=transformer)
        testset = datasets.CIFAR10(root=data_path, train=False, download=True, transform=transformer)

    if is_numpy:
        train_images = normalize(dataset_name, trainset.train_data)
        test_images = normalize(dataset_name, testset.test_data)

        train_labels = np.array(trainset.train_labels)
        test_labels = np.array(testset.test_labels)

        return (train_images, train_labels), (test_images, test_labels)

    else:
        return trainset, testset


def get_subset_data(dataset_name, data, choosen_classes, sub_train_indices, is_numpy=True, batch_size=None):
    if is_numpy:
        train_images = data["train_images"][sub_train_indices]
        train_labels = data["train_labels"][sub_train_indices]
        for i, class_index in enumerate(choosen_classes):
            train_labels[train_labels == class_index] = i

        test_images = np.concatenate(
            [data["test_images"][data["test_labels"] == class_index] for class_index in choosen_classes])
        test_labels = np.concatenate([np.repeat(i, np.sum(data["test_labels"] == class_index))
                                      for i, class_index in enumerate(choosen_classes)])

        return (train_images, train_labels), (test_images, test_labels)
    else:
        if dataset_name == "SVHN":
            train_labels = data["trainset"].labels
            test_labels = data["testset"].labels
        else:
            train_labels = data["trainset"].train_labels
            test_labels = data["testset"].test_labels

        # get all train class indices
        train_indices = sub_train_indices

        # prepare subset trainset
        trainset_sub = copy.deepcopy(data["trainset"])
        if dataset_name == "SVHN":
            trainset_sub.data = trainset_sub.data[train_indices, ...]
            trainset_sub.labels = np.asarray(trainset_sub.labels)[train_indices]
            for i, class_index in enumerate(choosen_classes):
                trainset_sub.labels[trainset_sub.labels == class_index] = i
        else:
            trainset_sub.train_data = trainset_sub.train_data[train_indices, ...]
            trainset_sub.train_labels = np.asarray(trainset_sub.train_labels)[train_indices]
            for i, class_index in enumerate(choosen_classes):
                trainset_sub.train_labels[trainset_sub.train_labels == class_index] = i

        # get all test class indices
        test_indices = list()
        for class_index in choosen_classes:
            indx = np.argwhere(np.asarray(test_labels) == class_index).flatten()
            test_indices.append(indx)
        test_indices = np.concatenate(test_indices)

        # prepare subset testset
        testset_sub = copy.deepcopy(data["testset"])
        if dataset_name == "SVHN":
            testset_sub.data = testset_sub.data[test_indices, ...]
            testset_sub.labels = np.asarray(testset_sub.labels)[test_indices]
            for i, class_index in enumerate(choosen_classes):
                testset_sub.labels[testset_sub.labels == class_index] = i
        else:
            testset_sub.test_data = testset_sub.test_data[test_indices, ...]
            testset_sub.test_labels = np.asarray(testset_sub.test_labels)[test_indices]
            for i, class_index in enumerate(choosen_classes):
                testset_sub.test_labels[testset_sub.test_labels == class_index] = i

        train_loader = Data.DataLoader(dataset=trainset_sub, batch_size=batch_size, shuffle=True)
        test_loader = Data.DataLoader(dataset=testset_sub, batch_size=batch_size, shuffle=False)

        return train_loader, test_loader


In [18]:
import time
from multiprocessing import cpu_count

import numpy as np
from sklearn.ensemble import RandomForestClassifier

from rerf.rerfClassifier import rerfClassifier

class ConvMF(object):
    def __init__(self, type = 'native', kernel_size = 5, stride = 2, num_trees = 1000, num_split_trees = 100, tree_type = 'S-RerF', patch_height_min = 1, patch_width_min = 1, patch_height_max = 5, patch_width_max = 5):
        self.kernel_size = kernel_size
        self.stride = stride
        self.num_trees = num_trees;
        self.tree_type = tree_type;
        self.type = type;
        self.patch_height_min = patch_height_min
        self.patch_height_max = patch_height_max
        self.patch_width_max = patch_width_max
        self.patch_width_min = patch_width_min
        self.num_split_trees = num_split_trees
        self.time_taken = {"load": 0, "test_chop": 0, "train": 0, "fit": 0, "train_post": 0, "test": 0, "test_post": 0}

    def _convolve_chop(self, images, labels=None, flatten=False):

        batch_size, in_dim, _, num_channels = images.shape

        #20 x 20


        out_dim = int((in_dim - self.kernel_size) / self.stride) + 1  # calculate output dimensions

        # create matrix to hold the chopped images
        out_images = np.zeros((batch_size, out_dim, out_dim,
                               self.kernel_size, self.kernel_size, num_channels))
        out_labels = None

        curr_y = out_y = 0
        # move kernel vertically across the image
        while curr_y + self.kernel_size <= in_dim:
            curr_x = out_x = 0
            # move kernel horizontally across the image
            while curr_x + self.kernel_size <= in_dim:
                # chop images
                out_images[:, out_x, out_y] = images[:, curr_x:curr_x +
                                                     self.kernel_size, curr_y:curr_y+self.kernel_size, :]
                curr_x += self.stride
                out_x += 1
            curr_y += self.stride
            out_y += 1

        if flatten:
            out_images = out_images.reshape(batch_size, out_dim, out_dim, -1)

        if labels is not None:
            out_labels = np.zeros((batch_size, out_dim, out_dim))
            out_labels[:, ] = labels.reshape(-1, 1, 1)

        return out_images, out_labels



    def fit(self, images, labels):
        MF_image = np.zeros(5)
        self.num_classes = len(np.unique(labels))
        if self.type == 'native':
            batch_size, length, width,_ = images.shape


            reshaped_images = images.reshape(batch_size, length*width)


            self.forest = rerfClassifier(projection_matrix=self.tree_type,
                                             n_estimators=self.num_trees,
                                             n_jobs=cpu_count() - 1,
                                             image_height=length,
                                             image_width=width,
                                             patch_height_min=self.patch_height_min,
                                             patch_width_min=self.patch_width_min,
                                             patch_height_max=self.patch_height_max,
                                             patch_width_max=self.patch_height_min)
            self.forest.fit(reshaped_images, labels)
            #Is this necessary
            #for i in range(length):
            #    for j in range(width):
            #        x = 1
            #        MF_image[:, i, j] = np.array([approx_predict_proba_sample_wise(
            #            sample) for sample in images[:, i, j]])[..., np.newaxis]

            MF_image = self.forest.predict_proba(reshaped_images)

        elif self.type == 'kernel_patches':
            sub_images, sub_labels = self._convolve_chop(images, labels=labels, flatten=True)
            batch_size, out_dim, _, _ = sub_images.shape
            MF_image = np.zeros((images.shape[0], out_dim, out_dim, self.num_classes))
            self.forest = np.zeros((out_dim, out_dim), dtype=np.int).tolist()

            for i in range(out_dim):
                for j in range(out_dim):
                    self.forest[i][j] = rerfClassifier(projection_matrix=self.tree_type,
                                                     n_estimators=self.num_trees,
                                                     n_jobs=cpu_count() - 1,
                                                     image_height=self.kernel_size,
                                                     image_width=self.kernel_size,
                                                     patch_height_min=self.patch_height_min,
                                                     patch_width_min=self.patch_width_min,
                                                     patch_height_max=self.patch_height_max,
                                                     patch_width_max=self.patch_height_min)

                    self.forest[i][j].fit(sub_images[:, i, j], sub_labels[:, i, j])
                    MF_image[:, i, j] = self.forest[i][j].predict_proba(
                        sub_images[:, i, j])[..., 1][..., np.newaxis]

        elif self.type == 'split_forest':
            self.forest = []

            batch_size, length, width,_ = images.shape
            reshaped_images = images.reshape(batch_size, length*width)

            MF_image = np.zeros((batch_size, self.num_trees, self.num_classes))

            for n in range(self.num_trees):
                self.forest.append(rerfClassifier(projection_matrix=self.tree_type,
                                             n_estimators=self.num_split_trees,
                                             n_jobs=cpu_count() - 1,
                                             image_height=length,
                                             image_width=width,
                                             patch_height_min=self.patch_height_min,
                                             patch_width_min=self.patch_width_min,
                                             patch_height_max=self.patch_height_max,
                                             patch_width_max=self.patch_height_min));
                self.forest[n].fit(reshaped_images, labels);
                MF_image[:,n] = self.forest[n].predict_proba(reshaped_images)

        return MF_image


    def predict(self, images):
        kernel_predictions = []
        if not self.forest:
            raise Exception("Should fit training data before  predicting")

        if self.type == 'native':
            batch_size, length, width, _ = images.shape
            reshaped_images = images.reshape(batch_size, length*width)
            kernel_predictions = np.zeros((images.shape[0], length, width, 1))
            kernel_predictions = self.forest.predict_proba(reshaped_images)

        elif self.type == 'kernel_patches':
            sub_images, _ = self._convolve_chop(images, flatten = True)
            batch_size, out_dim, _, _ = sub_images.shape
            kernel_predictions = np.zeros((images.shape[0], out_dim, out_dim, self.num_classes))
            for i in range(out_dim):
                for j in range(out_dim):
                    kernel_predictions[:, i, j] = self.forest[i][j].predict_proba(
                            sub_images[:, i, j])

        elif self.type == 'split_forest':
            batch_size, length, width, _ = images.shape
            reshaped_images = images.reshape(batch_size, length*width)
            kernel_predictions = np.zeros((batch_size, self.num_trees, self.num_classes))
            for n in range(self.num_trees):
                kernel_predictions[:,n] = self.forest[n].predict_proba(reshaped_images)

        return kernel_predictions


    def final_predict(self, images):
        if not self.forest:
            raise Exception("Should fit training data before  predicting")

        kernel_predictions = []

        if self.type == 'native':
            batch_size, length, width, _ = images.shape
            reshaped_images = images.reshape(batch_size, length*width)
            kernel_predictions = np.zeros((images.shape[0], length, width, 1))
            kernel_predictions = self.forest.predict(reshaped_images)

        if self.type == 'kernel_patches':
            sub_images, _ = self._convolve_chop(images, flatten = True)
            batch_size, out_dim, _, _ = sub_images.shape
            predictions = np.zeros((images.shape[0], self.num_classes))
            for i in range(out_dim):
                for j in range(out_dim):
                        predictions[:,] = predictions[:,] + self.forest[i][j].predict_proba(
                            sub_images[:, i, j])
            kernel_predictions = np.argmax(predictions, axis = 1)

        if self.type == 'split_forest':
            batch_size, length, width, _ = images.shape
            reshaped_images = images.reshape(batch_size, length*width)
            predictions = np.zeros((batch_size, self.num_classes))
            for n in range(self.num_trees):
                predictions = predictions + self.forest[n].predict_proba(reshaped_images)
            kernel_predictions = np.argmax(predictions, axis = 1)

        return kernel_predictions


In [7]:
trainset, testset = get_dataset("./data", "FashionMNIST", is_numpy=True)



In [8]:
testset[0].shape

(10000, 28, 28, 1)

In [19]:
test = ConvMF(type = 'split_forest', num_trees = 1, num_split_trees = 1)
x  = test.fit(trainset[0], trainset[1])
x.shape



(60000, 1, 10)

In [23]:

import torch.nn as nn
import torch.nn.functional as F


class CustomNet(nn.Module):
    def __init__(self):
        super(CustomNet, self).__init__()
        self.fc1 = nn.Linear(25, 10, 250)
        self.fc2 = nn.Linear(250, 80)
        self.fc3 = nn.Linear(80, 10)

    def forward(self, b):
        b = F.relu(self.fc1(b))
        b = F.relu(self.fc2(b))
        b = self.fc3(b)
        return b
    
t = CustomNet()

t.type

<bound method Module.type of CustomNet(
  (fc1): Linear(in_features=25, out_features=10, bias=True)
  (fc2): Linear(in_features=250, out_features=80, bias=True)
  (fc3): Linear(in_features=80, out_features=10, bias=True)
)>

In [17]:
test.final_predict(testset[0])

array([9, 2, 1, ..., 8, 1, 5])

In [35]:
import time
from multiprocessing import cpu_count

import numpy as np
from sklearn.ensemble import RandomForestClassifier

from rerf.rerfClassifier import rerfClassifier

class ConvMF(object):
    def __init__(self, type = 'native', kernel_size = 5, stride = 2, num_trees = 1000, tree_type = 'S-RerF', patch_height_min = 1, patch_width_min = 1, patch_height_max = 5, patch_width_max = 5):
        self.kernel_size = kernel_size
        self.stride = stride
        self.num_trees = num_trees;
        self.tree_type = tree_type;
        self.type = type;
        self.patch_height_min = patch_height_min
        self.patch_height_max = patch_height_max
        self.patch_width_max = patch_width_max
        self.patch_width_min = patch_width_min
        self.time_taken = {"load": 0, "test_chop": 0, "train": 0, "fit": 0, "train_post": 0, "test": 0, "test_post": 0}

    def _convolve_chop(self, images, labels=None, flatten=False):

        batch_size, in_dim, _, num_channels = images.shape

        #20 x 20


        out_dim = int((in_dim - self.kernel_size) / self.stride) + 1  # calculate output dimensions

        # create matrix to hold the chopped images
        out_images = np.zeros((batch_size, out_dim, out_dim,
                               self.kernel_size, self.kernel_size, num_channels))
        out_labels = None

        curr_y = out_y = 0
        # move kernel vertically across the image
        while curr_y + self.kernel_size <= in_dim:
            curr_x = out_x = 0
            # move kernel horizontally across the image
            while curr_x + self.kernel_size <= in_dim:
                # chop images
                out_images[:, out_x, out_y] = images[:, curr_x:curr_x +
                                                     self.kernel_size, curr_y:curr_y+self.kernel_size, :]
                curr_x += self.stride
                out_x += 1
            curr_y += self.stride
            out_y += 1

        if flatten:
            out_images = out_images.reshape(batch_size, out_dim, out_dim, -1)

        if labels is not None:
            out_labels = np.zeros((batch_size, out_dim, out_dim))
            out_labels[:, ] = labels.reshape(-1, 1, 1)

        return out_images, out_labels



    def fit(self, images, labels):
        MF_image = np.zeros(5)
        self.num_classes = len(np.unique(labels))
        if self.type == 'native':
            batch_size, length, width,_ = images.shape


            reshaped_images = images.reshape(batch_size, length*width)


            self.forest = rerfClassifier(projection_matrix="S-RerF",
                                             n_estimators=self.num_trees,
                                             n_jobs=cpu_count() - 1,
                                             image_height=length,
                                             image_width=width,
                                             patch_height_min=self.patch_height_min,
                                             patch_width_min=self.patch_width_min,
                                             patch_height_max=self.patch_height_max,
                                             patch_width_max=self.patch_height_min)
            self.forest.fit(reshaped_images, labels)
            #Is this necessary
            #for i in range(length):
            #    for j in range(width):
            #        x = 1
            #        MF_image[:, i, j] = np.array([approx_predict_proba_sample_wise(
            #            sample) for sample in images[:, i, j]])[..., np.newaxis]

            MF_image = self.forest.predict_proba(reshaped_images)

        elif self.type == 'kernel_patches':
            sub_images, sub_labels = self._convolve_chop(images, labels=labels, flatten=True)
            batch_size, out_dim, _, _ = sub_images.shape
            MF_image = np.zeros((images.shape[0], out_dim, out_dim, 1))
            self.forest = np.zeros((out_dim, out_dim), dtype=np.int).tolist()

            for i in range(out_dim):
                for j in range(out_dim):
                    self.forest[i][j] = rerfClassifier(projection_matrix="S-RerF",
                                                     n_estimators=self.num_trees,
                                                     n_jobs=cpu_count() - 1,
                                                     image_height=self.kernel_size,
                                                     image_width=self.kernel_size,
                                                     patch_height_min=self.patch_height_min,
                                                     patch_width_min=self.patch_width_min,
                                                     patch_height_max=self.patch_height_max,
                                                     patch_width_max=self.patch_height_min)

                    self.forest[i][j].fit(sub_images[:, i, j], sub_labels[:, i, j])
                    MF_image[:, i, j] = self.forest[i][j].predict_proba(
                        sub_images[:, i, j])[..., 1][..., np.newaxis]

        return MF_image


    def predict(self, images):
        kernel_predictions = []
        if not self.forest:
            raise Exception("Should fit training data before  predicting")

        if self.type == 'native':
            batch_size, length, width, _ = images.shape
            reshaped_images = images.reshape(batch_size, length*width)
            kernel_predictions = np.zeros((images.shape[0], length, width, 1))
            kernel_predictions = self.forest.predict_proba(reshaped_images)

        elif self.type == 'kernel_patches':
            sub_images, _ = self._convolve_chop(images, flatten = True)
            batch_size, out_dim, _, _ = sub_images.reshape
            kernel_predictions = np.zeros((images.shape[0], out_dim, out_dim, self.num_classes))
            for i in range(out_dim):
                for j in range(out_dim):
                    kernel_predictions[:, i, j] = self.forest[i][j].predict_proba(
                            sub_images[:, i, j])
        return kernel_predictions


    def final_predict(self, images):
        if not self.forest:
            raise Exception("Should fit training data before  predicting")

        kernel_predictions = []

        if self.type == 'native':
            batch_size, length, width, _ = images.shape
            reshaped_images = images.reshape(batch_size, length*width)
            kernel_predictions = np.zeros((images.shape[0], length, width, 1))
            kernel_predictions = self.forest.predict(reshaped_images)

        if self.type == 'kernel_patches':
            sub_images, _ = self._convolve_chop(images, flatten = True)
            batch_size, out_dim, _, _ = sub_images.reshape
            predictions = np.zeros((images.shape[0], self.num_classes))
            for i in range(out_dim):
                for j in range(out_dim):
                        predictions[:,] = predictions[:,] + self.forest[i][j].predict_proba(
                            sub_images[:, i, j])
            kernel_predictions = np.argmax(predictions, axis = 1)


        return kernel_predictions


In [27]:
test= ConvMF(type = 'kernel_patches')

In [28]:
test.fit(trainset[0], trainset[1])

KeyboardInterrupt: 

In [15]:
x.shape

(10000, 12, 12, 5, 5, 1)

In [29]:
x = np.zeros((10,5))

In [30]:
x.shape

(10, 5)

In [31]:
y = x[..., 1][..., np.newaxis]

In [32]:
y.shape

(10, 1)

In [52]:
predictions = np.zeros((100, 5))
predictions[:,] = [1, 1, 3, 1, 1]

predictions = np.argmax(predictions, 1)

In [53]:
print (predictions)

[2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


In [54]:
int(11.5)

11