## Part 1. Creating a 8x3x8 autoencoder

In [119]:
import numpy as np
import os
import itertools

Note: for another class I'm taking concurrently (Stanford CS 230), I wrote some functions to build a simple neural net from scratch for an early homework assignment. I'm recycling those helper functions I've already made here below.

In [4]:
def layer_sizes(X, Y, n_h):
    """
    Arguments:
    X -- input dataset of shape (input size, number of examples)
    Y -- labels of shape (output size, number of examples)
    n_h -- the desired size of the hidden layer
    
    Returns:
    n_x -- the size of the input layer
    n_h -- the size of the hidden layer
    n_y -- the size of the output layer
    """
    n_x = X.shape[0] # size of input layer
    n_h = n_h #size of hidden layer--for the 8x3x8 encoder, I want this number to be 3.
    n_y = Y.shape[0] # size of output layer
    return (n_x, n_h, n_y)

In [5]:
def initialize_parameters(n_x, n_h, n_y):
    """
    Argument:
    n_x -- size of the input layer
    n_h -- size of the hidden layer
    n_y -- size of the output layer
    
    Returns:
    params -- python dictionary containing your parameters:
                    W1 -- weight matrix of shape (n_h, n_x)
                    b1 -- bias vector of shape (n_h, 1)
                    W2 -- weight matrix of shape (n_y, n_h)
                    b2 -- bias vector of shape (n_y, 1)
    """
    #I'm randomizing my weights to break symmetry but also multiplying it by 0.01 to avoid exploding gradients/make it run faster.
    np.random.seed(1)
    
    W1 = np.random.randn(n_h, n_x)*0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h)*0.01
    b2 = np.zeros((n_y, 1))
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [6]:
#activation function I will be using for all my neurons in my encoder

def sigmoid(z):
    """
    Compute the sigmoid of z
    Arguments:
    z -- A scalar or numpy array of any size.
    Return:
    s -- sigmoid(z)
    """
    s = (1+np.exp(-z))**(-1)
    
    return s

In [7]:
def forward_propagation(X, parameters):
    """
    Argument:
    X -- input data of size (n_x, m)
    parameters -- python dictionary containing your parameters (output of initialization function)
    
    Returns:
    A2 -- The sigmoid output of the second activation
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2"
    """
    # Retrieve each parameter from the dictionary "parameters"
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    # Implement Forward Propagation
    Z1 = np.dot(W1, X) + b1
    A1 = sigmoid(Z1) #sigmoid activation function for Z1
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2) #sigmoid activation function for Z2
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
    return A2, cache

I chose to use cross-entropy for this task because I want my values to be as close to 0 or 1 as possible, and cross-entropy tends to maximize the odds of values being pushed to either of these extremes, unlike MSE which would equally allow for a distribution between 0 and 1. In this way, I'm wanting my output to look more like a classification task (where the outputs are either 0 or 1) rather than a regression task (where my outputs can RANGE between 0 and 1).

In [247]:
def compute_cost(A2, Y):
    """
    Computes the cost function (cross-entropy)
    
    Arguments:
    A2 -- The sigmoid output of the second activation, of shape (1, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)

    Returns:
    cost -- cross-entropy cost
    
    """
    
    m = Y.shape[1] # number of example

    # Compute the cost function
    
    #cost = np.square(np.subtract(A2,Y)).mean() -- I wrote this in case I decide to change to MSE later on
    logprobs = np.multiply(np.log(A2),Y) + np.multiply(np.log(1-A2),(1-Y))
    cost = -(1/m)*np.sum(logprobs) 
    
    cost = float(np.squeeze(cost))  # makes sure cost is the dimension we expect. 
                                     
    assert(isinstance(cost, float))
    
    return cost

In [248]:
def backward_propagation(parameters, cache, X, Y):
    """
    Implement the backward propagation using the instructions above.
    
    Arguments:
    parameters -- python dictionary containing our parameters 
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".
    X -- input data of shape (2, number of examples)
    Y -- "true" labels vector of shape (2, number of examples)
    
    Returns:
    grads -- python dictionary containing your gradients with respect to different parameters
    """
    m = X.shape[1]
    
    # First, retrieve W1 and W2 from the dictionary "parameters".
    W1 = parameters["W1"]
    W2 = parameters["W2"]
        
    # Retrieve also A1 and A2 from dictionary "cache".
    A1 = cache["A1"]
    A2 = cache["A2"]
    
    # Backward propagation: calculate dW1, db1, dW2, db2. 
    dZ2 = A2 - Y
    dW2 = (1/m)*np.dot(dZ2, A1.T)
    db2 = (1/m)*np.sum(dZ2, axis = 1, keepdims = True)
    dZ1 = np.dot(W2.T,dZ2)*(A1*(1-A1)) #note: the (A1*(1-A1)) is the gradient/derivative for the sigmoid activation function
    dW1 = (1/m)*np.dot(dZ1,X.T)
    db1 = (1/m)*np.sum(dZ1, axis = 1, keepdims = True)
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
    return grads

In [259]:
def update_parameters(parameters, grads, learning_rate = 0.15):
    """
    Updates parameters using the gradient descent update rule given above
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    grads -- python dictionary containing your gradients 
    
    Returns:
    parameters -- python dictionary containing your updated parameters 
    """
    # Retrieve each parameter from the dictionary "parameters"
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    # Retrieve each gradient from the dictionary "grads"
    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]
    
    # Update rule for each parameter
    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [250]:
def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False):
    """
    Arguments:
    X -- dataset of shape (2, number of examples)
    Y -- labels of shape (1, number of examples)
    n_h -- size of the hidden layer
    num_iterations -- Number of iterations in gradient descent loop
    print_cost -- if True, print the cost every 1000 iterations
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    np.random.seed(1)
    n_x = layer_sizes(X, Y, n_h)[0]
    n_y = layer_sizes(X, Y, n_h)[2]
    
    # Initialize parameters
    parameters = initialize_parameters(n_x, n_h, n_y)
    
    # Loop (gradient descent)

    for i in range(0, num_iterations):
         

        # Forward propagation
        A2, cache = forward_propagation(X, parameters)
        
        # Cost function.
        cost = compute_cost(A2, Y)
 
        # Backpropagation.
        grads = backward_propagation(parameters, cache, X, Y)
 
        # Gradient descent parameter update.
        parameters = update_parameters(parameters, grads)
        
        # Print the cost every 1000 iterations
        if print_cost and i % 1000 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))

    return parameters

In [251]:
def predict(parameters, X):
    """
    Using the learned parameters, predicts a class for each example in X
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    X -- input data of size (n_x, m)
    
    Returns
    predictions -- vector of predictions of our model (red: 0 / blue: 1)
    """
    
    # Computes outputs using forward propagation
    A2, cache = forward_propagation(X, parameters)
    predictions = A2 #note that A2 equals the outpuit
    
    return predictions

In [234]:
np.random.seed(5)
autoencoder_X = np.random.rand(8, 1000)
autoencoder_X

array([[0.22199317, 0.87073231, 0.20671916, ..., 0.86960068, 0.31039373,
        0.79059309],
       [0.71624615, 0.64380983, 0.03245853, ..., 0.49029671, 0.52621009,
        0.40343823],
       [0.78396039, 0.45898864, 0.61811338, ..., 0.42298102, 0.73225143,
        0.56095315],
       ...,
       [0.43804885, 0.47199715, 0.61046446, ..., 0.8097037 , 0.48905981,
        0.32760345],
       [0.71350345, 0.71421727, 0.31524009, ..., 0.36112389, 0.899994  ,
        0.1123772 ],
       [0.85540685, 0.72970446, 0.99803679, ..., 0.4032995 , 0.87013078,
        0.06819341]])

In [252]:
np.random.seed(5)
autoencoder_X_int = np.random.randint(0, 2, size=(8, 1000))
autoencoder_X_int

array([[1, 0, 1, ..., 1, 0, 1],
       [0, 0, 1, ..., 1, 1, 0],
       [1, 0, 0, ..., 1, 0, 1],
       ...,
       [0, 0, 1, ..., 0, 1, 0],
       [1, 0, 0, ..., 0, 1, 1],
       [0, 0, 0, ..., 0, 1, 0]])

In [253]:
test_rand = autoencoder_X[:,0].reshape(8,1)
test_int = autoencoder_X_int[:,0].reshape(8,1)
print(test_int)

[[1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [1]
 [0]]


In [260]:
optimized_parameters = nn_model(autoencoder_X_int, autoencoder_X_int, 3, 100000, print_cost=True)

Cost after iteration 0: 5.545267
Cost after iteration 1000: 3.930433
Cost after iteration 2000: 3.632015
Cost after iteration 3000: 3.501390
Cost after iteration 4000: 3.382141
Cost after iteration 5000: 3.304842
Cost after iteration 6000: 3.259207
Cost after iteration 7000: 3.226562
Cost after iteration 8000: 3.201052
Cost after iteration 9000: 3.180103
Cost after iteration 10000: 3.162339
Cost after iteration 11000: 3.146936
Cost after iteration 12000: 3.133357
Cost after iteration 13000: 3.121232
Cost after iteration 14000: 3.110295
Cost after iteration 15000: 3.100346
Cost after iteration 16000: 3.091231
Cost after iteration 17000: 3.082829
Cost after iteration 18000: 3.075044
Cost after iteration 19000: 3.067797
Cost after iteration 20000: 3.061024
Cost after iteration 21000: 3.054671
Cost after iteration 22000: 3.048692
Cost after iteration 23000: 3.043049
Cost after iteration 24000: 3.037710
Cost after iteration 25000: 3.032645
Cost after iteration 26000: 3.027830
Cost after ite

In [261]:
print(optimized_parameters)

{'W1': array([[ 7.07619441e+00, -9.13235516e-03, -6.47971534e-02,
         1.34390741e-02,  6.79145303e-02, -4.85088194e-03,
         9.09512203e-01, -7.37835063e+00],
       [-1.14691839e+00, -6.57791451e-03, -3.96337231e-02,
         2.37822147e-03,  2.62621953e-02,  4.55086420e-03,
         7.38520900e+00, -5.94275058e+00],
       [ 7.75069038e-02, -7.14096854e-03, -7.45956837e+00,
         9.87790109e-03,  7.02262898e+00,  7.85692834e-03,
         1.23038668e-01, -1.40365145e-01]]), 'b1': array([[-0.35918334],
       [-0.2174124 ],
       [-0.02831873]]), 'W2': array([[ 4.29575859e+01, -2.13729761e+01, -9.77356028e-01],
       [-1.58072524e-01, -1.21265461e-01, -4.83419114e-01],
       [ 2.44787762e+00,  1.65193668e+00, -5.00884927e+01],
       [ 2.94672384e-01,  1.38522485e-02,  1.40554973e-01],
       [ 6.10742488e-02, -3.90420042e-01,  5.65797499e+00],
       [-1.22415188e-01,  1.26887324e-01,  9.90605914e-02],
       [ 1.15298544e+01,  3.91675568e+01, -9.21760627e-01],
       [

In [262]:
answers = predict(optimized_parameters, test_int)
print(answers)
print(test_int)

[[0.99998944]
 [0.5334759 ]
 [1.        ]
 [0.52854655]
 [0.06021715]
 [0.47879817]
 [1.        ]
 [0.00527484]]
[[1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [1]
 [0]]


Based on this single example where I'm just comparing by eye the "real" answer and the "autoencoder" answer, I am getting fairly OK answers (1s are indeed 1s and 0s are 0.54 or below) after 100,000 epochs and a learning rate of 0.15. However, to really determine how good this is, I need to do a more robust evaluation metric. As a result, I will now implement cross-validation.

Below are attempts to preprocess my positive training data

In [79]:
pathway = os.path.join('./data', 'rap1-lieb-positives.txt')
train = np.loadtxt(os.path.abspath(pathway), dtype=str)
train[0]

'ACATCCGTGCACCTCCG'

In [83]:
len(train)

137

In [127]:
###one-hot encoding all training examples and flattening into matrix of shape (length of sequence*length of onehot vector, m)


# define universe of possible input values
alphabet = 'ACTG'
# define a mapping of chars to integers
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))


#X = np.zeros((17*4, len(train)))
X = []
for sequence in train:
    # integer encode input data
    integer_encoding = [char_to_int[char] for char in sequence]
    # one hot encode
    onehot_encoding = []
    for value in integer_encoding:
        letter = [0 for _ in range(len(alphabet))]
        letter[value] = 1
        onehot_encoding.append(letter)
    final_onehot = list(itertools.chain(*onehot_encoding))
    X.append(final_onehot)
X = np.array(X)

In [128]:
print(X)

[[1 0 0 ... 0 0 1]
 [1 0 0 ... 1 0 0]
 [0 1 0 ... 1 0 0]
 ...
 [1 0 0 ... 0 0 0]
 [0 0 1 ... 0 1 0]
 [1 0 0 ... 1 0 0]]


In [136]:
print(X.shape)
print(X[:,5].shape)

(137, 68)
(137,)


In [129]:
Y = X #for an autoencoder

In [138]:
optimized_parameters = nn_model(X, Y, 3)

In [139]:
optimized_parameters

{'W1': array([[-1.19871719, -0.44568733,  1.2027864 , -3.33359988, -2.0197612 ,
         -1.23423178, -0.70492671, -0.73472676, -2.06623434, -1.18367359,
         -0.21156829, -1.17050076, -2.0448145 , -0.08808805, -1.84384248,
         -2.09977152,  1.47748143, -0.86352517,  0.56085262, -2.12373595,
         -1.12049616, -1.25999124, -2.54334989, -1.29816183, -1.89706703,
         -1.43421922, -0.6155915 , -1.5310607 , -2.27325765, -2.16520337,
         -1.89962733, -0.62104   , -2.02756589, -1.46971935, -1.41671677,
         -2.10429764, -1.61042139, -0.30264605, -1.36616325, -0.35317211,
         -0.61669504, -2.88817411, -1.4931996 ,  0.09320683, -2.1310597 ,
         -0.40612378, -2.27618685,  0.11616285, -1.03366195, -2.3141491 ,
          0.05184475,  0.72986834,  0.46329423, -0.60871242, -0.47629112,
          0.10928639, -1.20460324, -2.27824343, -1.32449251,  1.73513968,
         -2.369514  , -2.15191996, -0.1621025 , -1.6994323 , -1.51453577,
         -2.26549247, -2.5499838

In [140]:
predict(optimized_parameters, X[:,5]).astype(int)

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0,

In [141]:
X[:,5]

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 0, 0, 1])