### Loading the dataset and the model

In [None]:
from predpairutils import *

dataset_file = "./data/predpair_dataset.tsv"
train_file, val_file, test_file = "./data/train_rfs.pk", "./data/val_rfs.pk", "./data/test_rfs.pk"

In [None]:
# defining a generator for working with sequences of different lengths

class TrainGenerator(tf.keras.utils.Sequence):
    def __init__(self, x, y):
        self.x = x
        self.y = y
        self.n = 0
        self.max = self.__len__()

    def __len__(self):
        return len(self.x)

    def __getitem__(self, index):
        k = len(self.y[index])
        if k > 32:
            x_new, y_new = [], []
            for i in range(k):
                r = random.random()
                if r <= 32/k:
                    x_new.append(self.x[index][i])
                    y_new.append(self.y[index][i])
            y = np.array(y_new)
            return np.array(x_new), y.reshape(y.shape[0], 1, y.shape[1])
        y_tmp = np.array(self.y[index])
        return np.array(self.x[index]), y_tmp.reshape(y_tmp.shape[0], 1, y_tmp.shape[1])

In [None]:
#loading and parsing the dataset of RNA sequences

rf_id2seq2data, seq2rf_id = parse_data(dataset_file)
train_q, train_ans, val_q, val_ans, test_q, test_ans = prepare_data(rf_id2seq2data, train_file, val_file, test_file)

In [None]:
#loading the model
model = predpair()

In [None]:
del rf_id2seq2data

### Training the model

In [None]:
import datetime

for i in range(10):
    s = str(datetime.datetime.now())
    ep_now = 1
    print(s)
    sn = len(train_q)
    vn = len(val_q)

    history = model.fit(TrainGenerator(train_q[:sn], train_ans[:sn]), epochs=ep_now, steps_per_epoch=sn, validation_data=TrainGenerator(val_q[:vn], val_ans[:vn]), validation_steps=vn) 

    fname = 'predpair_weights_{0}.h5'.format(s, ep_now)
    model.save_weights(fname)
    print("saved weights")

### Evaluation

In [None]:
model.evaluate(TrainGenerator(test_q, test_ans))