In [8]:
import pandas as pd
import arnie
from arnie.utils import *
from arnie.utils import _group_into_non_conflicting_bp

def get_groups(dotbracket):
    bp_list = convert_dotbracket_to_bp_list(dotbracket, allow_pseudoknots=True)
    groups = _group_into_non_conflicting_bp(bp_list)
    return groups

def get_bracket_count(struct):
    bracket_count = 0
    for char in struct:
        if char == '[':
            bracket_count+=1
    return bracket_count

def get_PK_rank(struct):
    PK_rank = 0
    if get_bracket_count(struct) > 1:
        for pos,char in enumerate(struct):
            if (char == '[' and struct[pos+1] == '['):
                PK_rank += 1
    for pos,char in enumerate(struct):
        if char == '[':
            if (pos == 0) and (struct[pos+1] != '[') or (struct[pos+1] != '.'):
                PK_rank -= 1
            elif (pos != 0) and (struct[pos-1] != '[') or (struct[pos-1] != '.'):
                PK_rank-= 1
            elif (pos != 0) and (struct[pos+1] != '[') or (struct[pos+1] != '.'):
                PK_rank -= 1
            elif (pos == len(pos)) and (struct[pos-1] != '[') or (struct[pos-1] != '.'):
                PK_rank -= 1
        elif char == ']':
            if (pos == 0) and ((struct[pos+1] != ']') or (struct[pos+1] != '.')):
                PK_rank -= 1
            elif (pos != 0) and (struct[pos-1] != ']') or (struct[pos-1] != '.'):
                PK_rank-= 1
            elif (pos != 0) and (struct[pos+1] != ']') or (struct[pos+1] != '.'):
                PK_rank -= 1
            elif (pos == len(pos)) and (struct[pos-1] != ']') or (struct[pos-1] != '.'):
                PK_rank -= 1
    return PK_rank

def is_probable_PK(groups):
    likely_bp = 0
    for i in groups[1:]:
        if len(i) > 2:
            likely_bp += 1
    if likely_bp >= 2:
        return True
    else:
        return False

In [9]:
def get_ranks(csv):
    df = pd.read_csv(csv)
    df_start = df['start']
    df_end = df['end']
    df_seqs = df['sequence']
    df_structs = df['struct']
    start = df_start.values.tolist()
    end = df_end.values.tolist()
    seqs = df_seqs.values.tolist()
    dotbrackets = df_structs.values.tolist()
    groups = []
    for struct in dotbrackets:
        group = get_groups(struct)
        groups.append(group)
    ranks = []
    for i, group in enumerate(groups):
        if is_probable_PK(group):
            dotbracket = dotbrackets[i]
            rank = get_PK_rank(dotbracket)
            ranks.append(rank)
    PK_list = zip(start, end, seqs, dotbrackets, ranks)
    ranked_df = pd.DataFrame(PK_list, columns=['start', 'end', 'seq', 'struct', 'rank'])
    ranked_df = ranked_df.sort_values('rank', ascending=False)
    return ranked_df

In [10]:
knotty_ranked = get_ranks('/home/gnye8/Desktop/PK_research/pipeline_results/knotty/knotty_output.csv')

In [11]:
knotty_ranked

Unnamed: 0,start,end,seq,struct,rank
282,12560,12680,AACAGGUUGUAGAUGCAGAUAGUAAAAUUGUUCAACUUAGUGAAAU...,...(((((((.[[[[[[[[(((...((((((((((((............,-3
166,7520,7640,CAAGAGUCGAAUGUACAACUAUUGUUAAUGGUGUUAGAAGGUCCUU...,...((((([[[[[[[..(((.((((((((((...(((((..[[[))...,-3
1,40,160,UUUCGAUCUCUUGUAGAUCUGUUCUCUAAACGAACUUUAAAAUCUG...,....((((.......)))).((((.......))))........(((...,-4
560,24240,24360,AUUACAAAUACCAUUUGCUAUGCAAAUGGCUUAUAGGUUUAAUGGU...,......(((((((((..((((.....[[[[[[))))....))))))...,-4
564,24440,24560,ACACGCUUGUUAAACAACUUAGCUCCAAUUUUGGUGCAAUUUCAAG...,..(((((((..(....[[[[[[[[.[[[[[[[[...[[[)..))))...,-4
...,...,...,...,...,...
133,6200,6320,UUAAGAAAGGAGCUAAAUUGUUACAUAAACCUAUUGUUUGGCAUGU...,...........((((((([[[[[[[[.........)))))))]]]]...,-38
314,13840,13960,UUGAUGAAGGUAAUUGUGACACAUUAAAAGAAAUACUUGUCACAUA...,.((((((..(((..((((((([[[[[...........)))))))))...,-38
454,19840,19960,AUUUGGGUGUGGACAUUGCUGCUAAUACUGUGAUCUGGGACUACAA...,........((((......))))......((((..(((((....[[[...,-39
358,15680,15800,AUAUUUGCGUAAACAUUUCUCAAUGAUGAUACUCUCUGACGAUGCU...,.............((((....)))).(((.(([[[.[[[.[[[[[....,-40


In [13]:
pknots_ranked = get_ranks('/home/gnye8/Desktop/PK_research/pipeline_results/pknots/pk_predictor_output.csv')

In [14]:
pknots_ranked

Unnamed: 0,start,end,seq,struct,rank
246,21960,22080,UCAAUUUUGUAAUGAUCCAUUUUUGGGUGUUUAUUACCACAAAAAC...,....((((((....[[[[[((((((((((.....)))).))))))....,-4
23,2440,2560,GAAACUGGCCUACUCAUGCCUCUAAAAGCCCCAAAAGAAAUUAUCU...,......(((...[[[...[[[[[[[..)))......(((......)...,-4
282,24880,25000,AGGAAUUUUUAUGAACCACAAAUCAUUACUACAGACAACACAUUUG...,.((.......[[[[.)).....]]]].....((((((............,-4
281,24840,24960,UGUCUUUGUUUCAAAUGGCACACACUGGUUUGUAACACAAAGGAAU...,(((((((.....))).))))((((.((((((((((..............,-4
204,17960,18080,AAUGUCUGAUAGAGACCUUUAUGACAAGUUGCAAUUUACAAGUCUU...,...((((.....))))(((......)))..(((((((...((((((...,-4
...,...,...,...,...,...
3,240,360,CGUCCGGGUGUGACCGAAAGGUAAGAUGGAGAGCCUUGUCCCUGGU...,...(((((....(((....)))..(((((......))))))))))(...,-23
105,9680,9800,GGAUAACAAUUGCUUAUAUCAUUUGUAUUUCCACAAAGCAUUUCUA...,.((((.....[[[[[.))))...(((......)))]]]]]...(((...,-23
85,8320,8440,CGUGACCUUGGUGCUUGUAUUGACUGUAGUGCGCGUCAUAUUAAUG...,.(((((((((((..........)))).))(((((((.......)))...,-24
43,4160,4280,UGGUUAUACCUACUAAAAAGGCUGGUGGCACUACUGAAAUGCUAGC...,(((((((((((.......)))(((.(((((((..[[....[[[[((...,-27
