In [1]:
# # get functions from OPART.ipynb
# %run OPART.ipynb

In [2]:
import numpy as np
from ipynb.fs.full.utility_functions import gen_data_dict, get_data, get_cumsum, error_count, write_to_csv, opart

In [3]:
seqs   = gen_data_dict('sequence_label_data/genome/signals.gz')
labels = gen_data_dict('sequence_label_data/genome/labels.gz')

header = ['sequenceID', 'fold_1_total_labels', 'fold_2_total_labels', 'fold_1_errs', 'fold_2_errs']

for i in range(len(seqs)):
    # generate data
    sequence, neg_start_1, neg_end_1, pos_start_1, pos_end_1, neg_start_2, neg_end_2, pos_start_2, pos_end_2 = get_data(i, seqs=seqs, labels=labels)
    sequence_length = len(sequence)-1

    # vectors of cumulative sums
    y, z = get_cumsum(sequence)

    # calculate lambda
    lda = np.log(sequence_length)

    # get total labels
    fold1_total_labels = len(neg_start_1) + len(pos_start_1)
    fold2_total_labels = len(neg_start_2) + len(pos_start_2)

    # run each lambda and record it into csv file
    row  = [i, fold1_total_labels, fold2_total_labels]

    chpnt = opart(lda, sequence)
    err_1 = error_count(chpnt, neg_start_1, neg_end_1, pos_start_1, pos_end_1)
    err_2 = error_count(chpnt, neg_start_2, neg_end_2, pos_start_2, pos_end_2)
    row.append(sum(err_1))
    row.append(sum(err_2))

    write_to_csv('1.genome_learning_output/BIC_test.csv', header, row)