## Import the necessary libraries

In [1]:
from nn import nn, preprocess, io
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

## Read in the positive and negative sequence files

In [2]:
pos_seqs = io.read_text_file("./data/rap1-lieb-positives.txt")
neg_seqs = io.read_fasta_file("./data/yeast-upstream-1k-negative.fa")

In [3]:
pos_seqs

['ACATCCGTGCACCTCCG',
 'ACACCCAGACATCGGGC',
 'CCACCCGTACCCATGAC',
 'GCACCCATACATTACAT',
 'ACATCCATACACCCTCT',
 'ACACCCTTACACTTTTA',
 'GCATCCGTGCCTCCCAC',
 'AAACCCATGCACAGTGA',
 'ACATCCGTGCACCATTT',
 'ACACCCATACATACGGA',
 'ACACCCACACCCCGGGC',
 'ACCTCCGTACACCAATC',
 'ACACCCATACATGTTGA',
 'TGACCCATACATTTCCT',
 'ACATCCGTACATCAGAA',
 'AAACCCATACATATCTT',
 'TCACCCAGTCATCCAAC',
 'AGACCCACACACCGCAT',
 'TAGCCCATACACCGCAG',
 'ACACCCACACCCCTCAT',
 'ACACCCACGCCCCGCAA',
 'GAACCCACACCTCTCAC',
 'GCACCCACACATCGCAT',
 'AAATCCGTGCACCGCAT',
 'AAACCCATGCACCTCCA',
 'ACACCCATTCACCGCAC',
 'ACATCCGTGCACTGTGG',
 'ACATCCATACATTCGGT',
 'ACACCCATACATTTATA',
 'ACACCCAGACACCTCAA',
 'GCACCCGTACCCCACAA',
 'ACATCCGAACACCAAAT',
 'ACACCCATACCTCTCAG',
 'GCACCCGCACACCGCAG',
 'ACACCCACACATTTACA',
 'AAACCCATACAATATAT',
 'ACATCCGTACACTTTTG',
 'CCGCCCATACACCCCAT',
 'GCACCCACACACCGGAC',
 'ACACCCAAACATTAGGG',
 'CCATCCATACATTTTGG',
 'GCACCCATGCACCTCAC',
 'GAACCCATTCACCACAT',
 'CCATCCATACATGTTCA',
 'ACACCCACACATATCTA',
 'AAATCCGT

In [4]:
neg_seqs

['CTTCATGTCAGCCTGCACTTCTGGGTCGTTGAAGTTTCTACCGATCAAACGCTTAGCGTCGAAAACGGTATTCGAAGGATTCATAGCAGCTTGATTCTTAGCAGCATCACCAATCAATCTTTCAGTGTCAGTGAAAGCGACAAAAGATGGAGTGGTTCTGTTACCTTGATCGTTGGCAATAATGTCCACACGATCATTAGCAAAGTGAGCAACACACGAGTATGTTGTACCTAAATCAATACCGACAGCTTTTGACATATTATCTGTTATTTACTTGAATTTTTGTTTCTTGTAATACTTGATTACTTTTCTTTTGATGTGCTTATCTTACAAATAGAGAAAATAAAACAACTTAAGTAAGAATTGGGAAACGAAACTACAACTCAATCCCTTCTCGAAGATACATCAATCCACCCCTTATATAACCTTGAAGTCCTCGAAACGATCAGCTAATCTAAATGGCCCCCCTTCTTTTTGGGTTCTTTCTCTCCCTTTTGCCGCCGATGGAACGTTCTGGAAAAAGAAGAATAATTTAATTACTTTCTCAACTAAAATCTGGAGAAAAAACGCAAATGACAGCTTCTAAACGTTCCGTGTGCTTTCTTTCTAGAATGTTCTGGAAAGTTTACAACAATCCACAAGAACGAAAATGCCGTTGACAATGATGAAACCATCATCCACACACCGCGCACACGTGCTTTATTTCTTTTTCTGAATTTTTTTTTTCCGCCATTTTCAACCAAGGAAATTTTTTTTCTTAGGGCTCAGAACCTGCAGGTGAAGAAGCGCTTTAGAAATCAAAGCACAACGTAACAATTTGTCGACAACCGAGCCTTTGAAGAAAAAATTTTTCACATTGTCGCCTCTAAATAAATAGTTTAAGGTTATCTACCCACTATATTTAGTTGGTTCTTTTTTTTTTCCTTCTACTCTTTATCTTTTTACCTCATGCTTTCTACCTTTCAGCACTGAAGAGTCCAACCGAATATATACACACA

In [5]:
# split the negative sequences into 17-mers
target_length = len(pos_seqs[0])
negatives_split = []
for seq in neg_seqs:
    split = []
    for i in range(int(len(seq)/target_length)):
        split.append(seq[i*target_length:(i+1)*target_length])
    negatives_split += split

In [6]:
X = pos_seqs + negatives_split
y = np.concatenate([np.ones(len(pos_seqs)), np.zeros(len(negatives_split))]).tolist()

In [7]:
X, y = preprocess.sample_seqs(X, y)

In [8]:
X = preprocess.one_hot_encode_seqs(X)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [10]:
y_train = np.array(y_train)
y_test = np.array(y_test)

In [11]:
layers_clf = [{'input_dim': 68, 'output_dim': 34, 'activation': 'relu'},
              {'input_dim': 34, 'output_dim': 1, 'activation': 'sigmoid'},]
epochs = 10
clf = nn.NeuralNetwork(layers_clf, lr = 0.001, seed = 0, batch_size = 1000, epochs = epochs, loss_function = "BCE", convergence_thresh = 0.0001)

In [None]:
train_loss, val_loss = clf.fit(X_train, y_train, X_test, y_test)

1
Forward pass
Forwaaaaaaaard
Forward function, going through layer 1
Switcheroo done
Forward function, going through layer 2
Switcheroo done
[0.25295199154599446]
Backprop pass
Updating parameters
Finished updating
Forwaaaaaaaard
Forward function, going through layer 1
Switcheroo done
Forward function, going through layer 2
Switcheroo done
Validation forward complete
[[0.28038552]
 [0.1730926 ]
 [0.19233288]
 ...
 [0.22688791]
 [0.24596516]
 [0.28912231]]
