In [9]:
def get_average_consensus(starts_all_programs, bp_lists_all_programs):
    avg_consensus = []
    #starts with a list of locations from a single predictor ("predictor A") 
    for idx1, program1_starts in enumerate(starts_all_programs):
        #retrieves the bp_lists from predictor A
        program1_bp_lists = bp_lists_all_programs[idx1]
        program1_consensus_scores = []
        #takes a second list of locations (from a different predictor, "predictor B")
        for idx2, program2_starts in enumerate(starts_all_programs):
            #checks to make sure that predictor A and B are not the same
            if idx1 != idx2:
                #retrieves the bp_lists from predictor B
                program2_bp_lists = bp_lists_all_programs[idx2]
                #generates a list of scores for predictor A and how well its bp_lists agree with predictor B
                consensus_scores = get_consensus_scores(program1_starts, program1_bp_lists, 
                                                        program2_starts, program2_bp_lists)
                program1_consensus_scores.append(consensus_scores)

        program1_avgs = []
        # now, the three scores generated for pk1 from predictor A are averaged together into a single score for pk1
        for i in range(len(program1_consensus_scores[0])):
            window = []
            for program in program1_consensus_scores:
                window.append(program[i])
            avg_score = sum(window)/len(window)
            program1_avgs.append(avg_score)
            
        # the list program1_avgs contains the final averaged scores for pk1, pk2, and pk3
        avg_consensus.append(program1_avgs)
        
    return avg_consensus

def get_consensus_scores(program1_starts, bp_lists1, program2_starts, bp_lists2):
    scores = []
    for i, start1 in enumerate(program1_starts):
        for idx, start2 in enumerate(program2_starts):
            if start1 == start2:
                # retrieves consensus score for that structure
                bp_list_score = compare_bp_lists(bp_lists1[i], bp_lists2[idx])
                scores.append(bp_list_score)
    return scores

def compare_bp_lists(bp_list1, bp_list2):
    bp_list_score = 0
    for bp1 in bp_list1: 
        for bp2 in bp_list2: 
            if bp1 == bp2: 
                bp_list_score += 1
        # divide by total number of base pairs in bp_list1 to normalize results
    score = 0
    if len(bp_list1) == 0:
        score = score + (bp_list_score/1)
    else:
        score = score + (bp_list_score/len(bp_list1))
    return score

In [3]:
import pandas as pd

predictorA_starts = [0, 100, 200]
predictorA_dotbracket = ['.((.[[.))..]]', '.((.[..))...]', '((.[[[..))]]]']
predictorA_bplist = [[[1,8],[2,7],[4,12],[5,11]], [[1,8],[2,7],[4,12]], [[0,9],[1,8],[3,12],[4,11],[5,10]]]

predictorB_starts = [0, 100, 200]
predictorB_dotbracket = ['.((.[[.))..]]', '.(((...)))...', '((.(((.))).))']
predictorB_bplist = [[[1,8],[2,7],[4,12],[5,11]], [[1,9],[2,8],[3,7]], [[0,12],[1,11],[3,9],[4,8],[5,7]]]

predictorC_starts = [0, 100, 200]
predictorC_dotbracket = ['.((..[.))..].', '.(((...)))...', '.............']
predictorC_bplist = [[[1,8],[2,7],[5,11]], [[1,9],[2,8],[3,7]], []]

# now putting everything together into proper formats for the functions:

test_starts_all_programs = [predictorA_starts, predictorB_starts, predictorC_starts]
test_bp_lists_all_programs = [predictorA_bplist, predictorB_bplist, predictorC_bplist]
print(test_starts_all_programs)
print(test_bp_lists_all_programs)

[[0, 100, 200], [0, 100, 200], [0, 100, 200]]
[[[[1, 8], [2, 7], [4, 12], [5, 11]], [[1, 8], [2, 7], [4, 12]], [[0, 9], [1, 8], [3, 12], [4, 11], [5, 10]]], [[[1, 8], [2, 7], [4, 12], [5, 11]], [[1, 9], [2, 8], [3, 7]], [[0, 12], [1, 11], [3, 9], [4, 8], [5, 7]]], [[[1, 8], [2, 7], [5, 11]], [[1, 9], [2, 8], [3, 7]], []]]


In [10]:
test_avg_consensus = get_average_consensus(test_starts_all_programs,test_bp_lists_all_programs)
print(test_avg_consensus)

[[0.875, 0.0, 0.0], [0.875, 0.5, 0.0], [1.0, 0.5, 0.0]]
