In [None]:
import numpy as np
import pandas as pd
from random import *
class NeuralNetwork:
    """Class that creates a NN and includes methods to train and test"""
    def __init__(self, setup=[[68,25,"sigmoid",0],[25,1,"sigmoid",0]],lr=.05,seed=1,error_rate=0,bias=0,iters=500,lamb=.00001,simple=0):
        #Note - these paramaters are examples, not the required init function parameters
        self._lr = lr
        self._seed = seed
        self._error_rate = error_rate
        self._bias = bias
        self._iters = iters
        self._lamb = lamb
        self._simple = simple
        

        # network is represented as a list of layers,
        # where layers are a list of nodes, where nodes
        # are a list of weights.
        # weights = [ [[w1,w2...], [w1,w2...]] <- layer1
        #             [[w1,w2...], [w1,w2...]] <- layer2
        #           ]
        weights = []
        outputs = []
        change = []
        
        # initialize the given number of layers with weights
        for layer in setup:
            weights.append(self.make_weights(n_inputs=layer[0],n_nodes=layer[1]))
            outputs.append([0] * layer[1])
            change.append([0] * layer[1])
        
        self._weights = weights
        self._outputs = outputs
        self._change = change
        
    @property
    def lr(self):
        return self._lr

    @lr.setter
    def lr(self, lr):
        self._lr = lr

    @property
    def bias(self):
        return self._bias

    @bias.setter
    def bias(self, bias):
        self._bias = bias 
    
    @property
    def seed(self):
        return self._seed

    @seed.setter
    def seed(self, seed):
        self._seed = seed 
    
    @property
    def outputs(self):
        return self._outputs

    @outputs.setter
    def outputs(self, outputs):
        self._outputs = outputs 
        
    @property
    def change(self):
        return self._change

    @change.setter
    def change(self, change):
        self._change = change 
    
    @property
    def weights(self):
        return self._weights

    @weights.setter
    def weights(self, weights):
        self._weights = weights 
        
        
    def make_weights(self,n_inputs, n_nodes):
        """
        Generates random weights for the network initialization

        Parameters
        ---------
        n_inputs
            Number of input nodes to this layer
        n_nodes
            Number of nodes to generate weights for
            
        Returns
        ---------
        Layer with random weights initialized for each node
        """
        #seed(self.seed)
        layer = []
        
        # Get n_inputs random float between -1 and 1 for each node
        for i in range(n_nodes):
            node_weights = [uniform(-1, 1) for j in range(n_inputs + 1)]
            #node_weights.append(self.bias) # add bias at end
            layer.append(node_weights)
        
        return layer

    def feedforward(self, data):
        """
        Takes in data and passes it through the NN

        Parameters
        ---------
        data
            One datapoint
            
        Returns
        ---------
        The output(s) of the final layer in the network
        """
        inputs = data
        
        # pass data through all layers
        for layer in range(len(self.weights)):
            next_inputs = []
            for node in range(len(self.weights[layer])):
                sum = 0
                for i in range(len(inputs)): # multiply inputs by weights and add to sum
                    sum += inputs[i]*self.weights[layer][node][i]
                    
                sum += self.weights[layer][node][-1] # add bias
                output = sigmoid(sum) # Apply activation function
                self.outputs[layer][node] = output
                next_inputs.append(output)
            inputs = next_inputs
        
        # inputs should now be the final layer output
        return inputs
    
    def backprop(self, true_values, data):
        """
        Calculates the loss and gradient for each output node.
        Propagates the gradient through the network and records
        the error for each node.
        
        Parameters
        ---------
        true_values
            true classification of example
        data
            training example

        Returns
        ---------
        None, change matrix is filled in for weight updating
        """
        # start at last layer
        for layer in reversed(range(len(self.outputs))): 
            if layer == len(self.outputs) - 1: # for last layer, calculate loss using true values
                for node in range(len(self.outputs[layer])):
                    loss = (true_values[node] - self.outputs[layer][node])
                    # fill in change matrix
                    self.change[layer][node] = loss*sigmoid_derivative(self.outputs[layer][node])
            else: # for all other layers
                for node in range(len(self.outputs[layer])):
                    loss = 0
                    # sum weighted losses from previous layer
                    for prev_layer_node in range(len(self.weights[layer + 1])):
                        loss += self.weights[layer+1][prev_layer_node][node]*self.change[layer+1][prev_layer_node]
                    # fill in change matrix
                    self.change[layer][node] = loss*sigmoid_derivative(self.outputs[layer][node])
         
        
        # Update weights
        for layer in reversed(range(len(self.outputs))): 
            input = data[0] 
            if layer != 0: # the input to the first layer is the training example
                input = [self.outputs[layer][node] for node in range(len(self.outputs[layer - 1]))]
            for node in range(len(self.outputs[layer])):
                for i in range(len(input)):
                    self.weights[layer][node][i] += self.lr*self.change[layer][node]*input[i]
                # update bias
                self.weights[layer][node][-1] += self.lr*self.change[layer][node]
        print(self.change)

    def fit(self, training, validation, n_epochs):
        """
        Trains the neural network and computes training loss.
        After each epoch, computes the training and validation loss.
        
        Parameters
        ---------
        true_values
            a list of true value(s) associated with the current
            training example

        Returns
        ---------
        Dataframe of 
        """
        #losses = pd.DataFrame(columns = ['Epoch', 'Train', 'Validation']) 
        #for epoch in range(n_epochs):
            train_loss = 0
            for row in training:
                output = self.feedforward(row[0])
                expected = row[-1] # Expected value should be last element of training row
                # Sum loss of all output nodes
                train_loss += sum([(expected[i]-output[i])**2 for i in range(len(expected))])
                self.backprop(true_values=expected, data=row)
            val_loss = 0
            for val in validation:
                output = self.predict(val[0])
                expected = val[-1]
                val_loss += sum([(expected[i]-output[i])**2 for i in range(len(expected))])
            losses = losses.append({'Epoch' : epoch, 'Train' : train_loss/len(training), 'Validation': val_loss/len(validation)}, ignore_index=True)

        return losses

    def predict(self, data):
        return self.feedforward(data)

def activation(input, weights):
    pass
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x)*(1 - sigmoid(x))

In [31]:
def read_train_seqs(pos_file, neg_file):
    """
    Reads in positive and negative sequences from paths
    into two lists, pos and neg. For negative sequences,
    skips the lines starting with ">"

    Parameters
    ---------
    pos_file
        path to positive examples
    neg_file
        path to negative examples
        
    Returns
    ---------
    two lists, where each element is a sequence from the file
    """
    with open(pos_file) as f:
        pos = f.read().splitlines()
    
    neg = []
    seq = ''
    for line in open(neg_file):
        if line.startswith(">"):
            if seq != '':
                neg.append(seq)
                seq = ''
        else:
            seq += line.strip() 
    neg.append(seq)
    return pos,neg

def encode_seq(sequence):
    """
    Performs one-hot encoding of a nucleotide sequence,
    where each nucleotide is represented by a binary vector
    of length 4, ie: [1, 0, 0, 0], where a 1 corresponds to
    which nucleotide it is: [A, C, G, T]

    Parameters
    ---------
    sequence
        the sequence string to encode
        
    Returns
    ---------
    A one-hot encoded sequence represented as a list of lists,
    each with length 4
    """
    encoded = []
    for nuc in sequence:
        if nuc == 'A':
            encoded.append([1,0,0,0])
        elif nuc == 'C':
            encoded.append([0,1,0,0])
        elif nuc == 'G':
            encoded.append([0,0,1,0])
        elif nuc == 'T':
            encoded.append([0,0,0,1])
    return encoded

def train_val_split():
    """
    Randomly split the positive and negative examples 
    into training and validation sets at an 80/20 ratio.
    Since there are less pos examples, we are limited by
    the 

    Parameters
    ---------
    sequence
        the sequence string to encode
        
    Returns
    ---------
    A one-hot encoded sequence represented as a list of lists,
    each with length 4
    """

In [33]:

def test_make_weights():
    nn = NeuralNetwork(setup=[[8,3,"sigmoid",0],[3,8,"sigmoid",0]])
    assert len(nn.weights) == 2
    assert len(nn.weights[0]) == 3
    assert len(nn.weights[1]) == 8
    assert len(nn.weights[0][0]) == 9
    assert len(nn.weights[1][0]) == 4

def test_feedforward():
    nn = NeuralNetwork([[2,1, "sigmoid",0], [1,2, "sigmoid",0]])
    # a 2x1x2 network
    out = nn.feedforward([1,1])
    assert len(out) == 2
    assert nn.outputs[1] == out
    
def test_encoder():
    assert True

def test_encoder_relu():
    assert True

def test_one_d_ouput():
    assert True

def test_read_train_seqs():
    pos,neg = read_train_seqs(pos_file = "./data/rap1-lieb-positives.txt", neg_file = "./data/yeast-upstream-1k-negative.fa")
    print(len(pos),len(neg))
    assert len(neg) == 3164
    assert len(pos) == 137
    for i in range(len(pos)):
        assert len(pos[i]) == 17

def test_encode_seq():
    seq = 'ACTG'
    encoded = encode_seq(seq)
    assert len(encoded) == 4
    assert encoded == [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0]]

In [34]:
test_encode_seq()

In [None]:
nn = NeuralNetwork([[8,3, "sigmoid",0], [3,8, "sigmoid",0]])
# a 2x1x2 network
nn.feedforward([1,0,0,0,0,0,0,1])

In [None]:
test_feedforward()
test_make_weights()


In [None]:
nn.weights

In [None]:
nn.backprop(true_values = [1,0,0,0,0,0,0,1], data = [[1,0,0,0,0,0,0,1], [1,0,0,0,0,0,0,1]])

In [None]:
nn.weights

In [None]:
nn.feedforward([1,0,0,0,0,0,0,1])

In [None]:
nn.weights


## Test autoencoder

In [None]:
# generate 8 bit binary vectors
data = [list(np.random.randint(2, size=8)) for i in range(1000)]

In [None]:
new_data = [[vector,vector] for vector in data]

In [None]:
# split data into 80% train, 20% test
train = new_data[0:800]
test = new_data[800:1000]


In [None]:
nn = NeuralNetwork([[8,3, "sigmoid",0], [3,8, "sigmoid",0]])
nn.lr = 0.08

In [None]:
df = nn.fit(training = train, validation = test, n_epochs=100)

In [None]:
import matplotlib.pyplot as plt
fig,ax = plt.subplots()

for name in ['Train','Validation']:
    ax.plot(df['Epoch'],df[name], label=name)

ax.set_xlabel("epoch")
ax.set_ylabel("loss")
ax.legend(loc='best')

In [None]:
expected = [0,0,0,0,0,0,0,0]
output = nn.predict([0,0,0,0,0,0,0,0]) 
sum([(expected[i]-output[i])**2 for i in range(len(expected))])


In [None]:
train = new_data[0:2]
test = new_data[2:3]

In [None]:
nn = NeuralNetwork([[8,3, "sigmoid",0], [3,8, "sigmoid",0]])
nn.lr = 0.08

In [None]:
df = nn.fit(training = train, validation = test, n_epochs=1)