In [8]:
from IPython.display import clear_output
import numpy as np

Regressor class

In [61]:
class Regressor:

    def __init__(self, A_DIM, R_DIM):
        self.A_DIM = A_DIM
        self.R_DIM = R_DIM
        self.W = [np.random.random(
            (self.A_DIM, self.R_DIM)), np.random.random((self.R_DIM, self.A_DIM))]
        self.label_norm = 1

    def validate(self, a, b):
        self.variance, self.max_error, self.accuracy = self.metrics = self.compare_vector(
            a, b)

    def compare_vector(self, a, b):
        c = a - b
        c = np.squeeze(c)
        v = np.var(c)
        m = np.max(c)
        a = - np.log10(v) * 2
        return v, m, a

    def dropout_matrix(self, percent, shape):
        x = np.random.choice(2, shape, p=[1 - percent, percent])
        return x

    def fitUtil(self, training_data, training_label, alpha=0.0001, retain=1):

        hidden_layer = (training_data * self.dropout_matrix(retain,
                        training_data.shape[1])).dot(self.W[0])
        predicted_label = (
            hidden_layer * self.dropout_matrix(retain, hidden_layer.shape[1])).dot(self.W[1])

        error = training_label - predicted_label

        dW0 = - \
            training_data.T.dot(
                2 * error.dot(self.W[1].T)) / training_data.size
        dW1 = - hidden_layer.T.dot(2 * error) / training_data.size

        self.W[0] -= dW0 * alpha
        self.W[1] -= dW1 * alpha

    # Applies cross validation
    def fit(self, vecs, vecsy, epochs, **kwargs):

        vecs = np.array(np.split(vecs, 10))
        vecsy = np.array(np.split(vecsy, 10))

        for i in range(10):

            c_v_training_data, c_v_training_label = np.vstack(
                np.delete(vecs, i, axis=0)), np.vstack(np.delete(vecsy, i, axis=0))
            c_v_testing_data, c_v_testing_label = vecs[i], vecsy[i]

            for epoch in range(epochs // 10):
                self.fitUtil(c_v_training_data, c_v_training_label, **kwargs)

            self.validate(self.predict(c_v_testing_data), c_v_testing_label)
            return self.metrics

    def predict(self, data):
        return data.dot(self.W[0]).dot(self.W[1])

    def encode(self, data):
        return data.dot(self.W[0])

    def decode(self, data):
        return data.dot(self.W[1])

    def __str__(self):
        return '\n'.join(["Error variance " + str(self.variance),
                          "Error amplitude " + str(self.max_error),
                          "Accuracy " + str(self.accuracy),
                          "Weights" + str(self.W)])

    def __repr__(self):
        return '\n'.join(["Error variance " + str(self.variance),
                          "Error amplitude " + str(self.max_error),
                          "Accuracy " + str(self.accuracy),
                          "Weights" + str(self.W)])


In [94]:
class DataGenerator:
    
    def __init__(self, dim_a, dim_b, rotation_count = 0):
        self.actual_dim = dim_a
        self.reduced_dim = dim_b
        self.r = np.identity(self.actual_dim)
        for i in range(rotation_count):
            self.r = self.r.dot(self.set_rotational_matrix())


    def generate(self, data_count = 1000, noise = 0):
        
        vecs = (np.random.random((data_count, self.reduced_dim)) * 2) - 1
        vecs = np.append(vecs, np.random.random((data_count, self.actual_dim - self.reduced_dim)) * noise, axis = 1)
        vecs = vecs.dot(self.r)
        return vecs

    def set_rotational_matrix(self):
    
        theta = 2 * np.pi * np.random.random()
        axes = np.random.choice(self.actual_dim, 2, replace=False)

        i, j = axes[0], axes[1]

        rotational_matrix = np.identity(self.actual_dim)
        rotational_matrix[i][i] = rotational_matrix[j][j] = np.cos(theta)
        rotational_matrix[i][j] = np.sin(theta)
        rotational_matrix[j][i] = -np.sin(theta)

        return rotational_matrix

Tries reduction to each dimension below

In [165]:
def get_reduction_scores(training_data, testing_data, **kwargs):
    dim = testing_data.shape[1]
    reduced_dim = 1

    zero_reduction_score = 0

    for i in range(dim, 0, -1):
        
        regressor = Regressor(dim, i)
        regressor.fit(training_data, training_data, **kwargs)
        
        reduction_score = regressor.compare_vector(regressor.predict(testing_data), testing_data)[2]
        
        if i == dim:
            zero_reduction_score = reduction_score

        if reduction_score / zero_reduction_score * 100 > 15:
            reduced_dim = i
    
    kwargs['epochs'] *= 10
    regressor = Regressor(dim, reduced_dim)
    regressor.fit(training_data, training_data, **kwargs)   

    
    return regressor

Creating random data that is reducible to a random dimension

In [168]:
dim = 13
dim2 = np.random.randint(1, dim)
datagen = DataGenerator(dim, dim2, rotation_count = 2442)
training_data = datagen.generate(10000, noise=0)
testing_data = datagen.generate(3000, noise=0)
dim2

7

In [169]:
regressor = get_reduction_scores(training_data, testing_data, epochs = 10000, alpha = 1, retain = 1)


In [173]:
v = datagen.generate(1, noise=0)
v, regressor.decode(regressor.encode(v))

(array([[ 0.21093727,  0.57704979,  0.17386529, -0.37373057,  0.19706294,
          0.00530981,  0.88305193, -0.20863718,  0.03767595,  0.54557498,
         -0.04501163, -0.10076567,  0.36036176]]),
 array([[ 0.21093727,  0.57704979,  0.17386529, -0.37373057,  0.19706294,
          0.00530981,  0.88305193, -0.20863718,  0.03767595,  0.54557498,
         -0.04501163, -0.10076567,  0.36036176]]))