In [2]:
from IPython.display import clear_output
import numpy as np

Regressor class

In [4]:
class Regressor:

    def __init__(self, A_DIM, R_DIM):
        self.A_DIM = A_DIM
        self.R_DIM = R_DIM
        self.W = [np.random.random(
            (self.A_DIM, self.R_DIM)), np.random.random((self.R_DIM, self.A_DIM))]
        self.label_norm = 1

    def validate(self, a, b):
        self.variance, self.max_error, self.accuracy = self.metrics = self.compare_vector(
            a, b)

    def compare_vector(self, a, b):
        c = a - b
        c = np.squeeze(c)
        v = np.var(c)
        m = np.max(c)
        a = - np.log10(v) * 2
        return v, m, a

    def dropout_matrix(self, percent, shape):
        x = np.random.choice(2, shape, p=[1 - percent, percent])
        return x

    def fitUtil(self, training_data, training_label, alpha=0.0001, retain=1):

        hidden_layer = (training_data * self.dropout_matrix(retain,
                        training_data.shape[1])).dot(self.W[0])
        predicted_label = (
            hidden_layer * self.dropout_matrix(retain, hidden_layer.shape[1])).dot(self.W[1])

        error = training_label - predicted_label

        dW0 = - \
            training_data.T.dot(
                2 * error.dot(self.W[1].T)) / training_data.size
        dW1 = - hidden_layer.T.dot(2 * error) / training_data.size

        self.W[0] -= dW0 * alpha
        self.W[1] -= dW1 * alpha

    # Applies cross validation
    def fit(self, vecs, vecsy, epochs, **kwargs):

        vecs = np.array(np.split(vecs, 10))
        vecsy = np.array(np.split(vecsy, 10))

        for i in range(10):

            c_v_training_data, c_v_training_label = np.vstack(
                np.delete(vecs, i, axis=0)), np.vstack(np.delete(vecsy, i, axis=0))
            c_v_testing_data, c_v_testing_label = vecs[i], vecsy[i]

            for epoch in range(epochs // 10):
                self.fitUtil(c_v_training_data, c_v_training_label, **kwargs)

            self.validate(self.predict(c_v_testing_data), c_v_testing_label)
            return self.metrics

    def predict(self, data):
        return data.dot(self.W[0]).dot(self.W[1])

    def encode(self, data):
        return data.dot(self.W[0])

    def decode(self, data):
        return data.dot(self.W[1])

    def __str__(self):
        return '\n'.join(["Error variance " + str(self.variance),
                          "Error amplitude " + str(self.max_error),
                          "Accuracy " + str(self.accuracy),
                          "Weights" + str(self.W)])

    def __repr__(self):
        return '\n'.join(["Error variance " + str(self.variance),
                          "Error amplitude " + str(self.max_error),
                          "Accuracy " + str(self.accuracy),
                          "Weights" + str(self.W)])


In [3]:
class DataGenerator:
    
    def __init__(self, dim_a, dim_b, rotation_count = 0):
        self.actual_dim = dim_a
        self.reduced_dim = dim_b
        self.r = np.identity(self.actual_dim)
        for i in range(rotation_count):
            self.r = self.r.dot(self.set_rotational_matrix())


    def generate(self, data_count = 1000, noise = 0):
        
        vecs = (np.random.random((data_count, self.reduced_dim)) * 2) - 1
        vecs = np.append(vecs, np.random.random((data_count, self.actual_dim - self.reduced_dim)) * noise, axis = 1)
        vecs = vecs.dot(self.r)
        return vecs

    def set_rotational_matrix(self):
    
        theta = 2 * np.pi * np.random.random()
        axes = np.random.choice(self.actual_dim, 2, replace=False)

        i, j = axes[0], axes[1]

        rotational_matrix = np.identity(self.actual_dim)
        rotational_matrix[i][i] = rotational_matrix[j][j] = np.cos(theta)
        rotational_matrix[i][j] = np.sin(theta)
        rotational_matrix[j][i] = -np.sin(theta)

        return rotational_matrix

Tries reduction to each dimension below

In [21]:
def get_reduction_scores(training_data, testing_data, **kwargs):
    dim = testing_data.shape[1]
    reduced_dim = 1

    zero_reduction_score = 0

    for i in range(dim, 0, -1):
        
        print("Reduction progress", (1 - i/dim) * 100)
        regressor = Regressor(dim, i)
        regressor.fit(training_data, training_data, **kwargs)
        
        reduction_score = regressor.compare_vector(regressor.predict(testing_data), testing_data)[2]
        
        if i == dim:
            zero_reduction_score = reduction_score

        if reduction_score / zero_reduction_score * 100 > 15:
            reduced_dim = i
    
    kwargs['epochs'] *= 10
    regressor = Regressor(dim, reduced_dim)
    regressor.fit(training_data, training_data, **kwargs)   

    
    return regressor

Creating random data that is reducible to a random dimension

In [33]:
dim = np.random.randint(2, 20)
dim2 = np.random.randint(1, dim)
datagen = DataGenerator(dim, dim2, rotation_count = 2442)
training_data = datagen.generate(10000, noise=0.1)
testing_data = datagen.generate(3000, noise=0.1)
dim2, dim

(7, 11)

In [None]:
regressor = get_reduction_scores(training_data, testing_data, epochs = 10000, alpha = 1, retain = 1)

In [6]:
import tensorflow as tf

In [34]:
class AutoEncoder(tf.keras.Model):

    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.encoder = tf.keras.models.Sequential([
            tf.keras.layers.Dense(dim),
            tf.keras.layers.Dense(dim2)
        ])
        self.decoder = tf.keras.models.Sequential([
            tf.keras.layers.Dense(dim2),
            tf.keras.layers.Dense(dim)
        ])

    def call(self, x):
        return self.decoder(self.encoder(x))


In [35]:
ae_model  = AutoEncoder()
ae_model.compile(optimizer='adam', loss='mse', metrics='accuracy')
ae_model.fit(training_data, training_data, epochs = 40, validation_data=(testing_data, testing_data))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x25a70b98220>

In [36]:
val = datagen.generate(1, noise=0)
ae_model.decoder.predict(ae_model.encoder.predict(val)), val

(array([[-0.30741146, -0.59358096, -0.59292877,  0.26722306,  0.01966223,
          0.19557744, -0.15878285,  0.4760937 ,  0.25293785,  0.60018605,
          0.713426  ]], dtype=float32),
 array([[-0.28586104, -0.61407919, -0.56473913,  0.24293466,  0.02796364,
          0.22020268, -0.18305967,  0.46658211,  0.19325339,  0.58282988,
          0.7570655 ]]))