# Analysis of multi Loop-Constructs randomly created in the wet Lab

In [2]:
## Packages needed to run the data clean up
using DataFrames
using CSV
using FASTX
using BioAlignments
using BioSequences
using Random
using StatsBase
using StatsPlots
using Combinatorics
using ViennaRNA
using Unitful
using Clustering
using PlotRNA
using DelimitedFiles
using RNAstructure

In [3]:
file_name = "rXLC_LoopRegio.fasta"

"rXLC_LoopRegio.fasta"

In [4]:
function isolate_hairpin(df::DataFrame, upstream::String, downstream::String)
    full_seq = []
    all_seq = df.Sequence

    for n = 1:size(all_seq, 1)
        if occursin(upstream, all_seq[n]) && occursin(downstream, all_seq[n]) # looks for sequences that have both sequences
            edge1 = findfirst(upstream, all_seq[n])[end] # finds the beginning of stem 1
            edge2 = findfirst(downstream, all_seq[n])[1] # finds the end of stem 1
            # the processing above must always be checked to ensure single base duplication or
            # deletion is not being accidentally introduced 
    
            new_seq = all_seq[n][edge1:edge2]
            if new_seq != ""
                push!(full_seq, new_seq) # pushes non-empty sequences into a set
            end
        end
    end
    df.Loop_regio= full_seq
    return df
end    

isolate_hairpin (generic function with 1 method)

In [5]:
## Loading the folding parameters for ViennaRNA
ViennaRNA.params_load_RNA_Langdon2018()

In [6]:
## generating structural data 
function rna_details(dataframe::DataFrame)
    Loop_structure = DataFrame([String[],String[], Float32[], Float32[]],["ID", "MFE_structure","MFE_energy","Mbp_dist"])
    new_loop = []
    for n = 1: size(dataframe, 1)
        new_loop = dataframe.Loop_regio[n]
        new_fold = FoldCompound(new_loop; temperature=37u"°C", uniq_ML=false, circular=false);
        ViennaRNA.partfn(new_fold)
        new_mfe = ViennaRNA.mfe(new_fold)
        new_bpdis = ViennaRNA.mean_bp_distance(new_fold)
        
        push!(Loop_structure, (dataframe.ID[n], new_mfe[1], ustrip(new_mfe[2]), new_bpdis))
        finalize(new_fold)
    end
     dataframe = innerjoin(dataframe, Loop_structure, on = :ID);
     insertcols!(dataframe, :Seq_Cluster => 0, :Str_Cluster => 0)
    return dataframe
end

rna_details (generic function with 1 method)

In [14]:
## Loops parameters
upstream = "CGTAATCTGCTGCTTGCAAA" #upstream Sequence of LoopI
downstream = "AAATACTGT" #downstream Sequence of LoopIII
wildtype = "ACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCC" #of LoopI
upstream1 = "CTTTTTTTCTGCGCGTAATCTGCTG"
downstream1 = "AGTGTAGCCGTAGTTAGGCCACCA"

"AGTGTAGCCGTAGTTAGGCCACCA"

In [15]:
WT = "gtgCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTCCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCA"
r2LC_1_1 = "TGTGGTGTAATTTGGACCTCTTTACGTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGA"
r2LC_1_3 = "TCTGGGGGAGGTGGACTCTTACGTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTTAGCCATTGAGATGTTAGATAGGCACATACTCACTTTTGCCCTTTAGAAGGGGAAAGCTGGGAGAATTTTTTCGTAATAACGCTAAAAATTTTAAAGGGGTTTTCTAAATCTCCGCAGGGGGCAAAAGTAAATTTTTGGGCCCGGCCTCCAGAAAAAAGATTGAGAACCCCCAAATAATTTCCTTTTTTGTCCCAAGGGTGTTTTTTAAGAAAGAATTATTTCCCCCCCCCCGGGGGGGGTGTTTTTTTTTGTGTGTGTGGGGAAAAAAAAAAAAAACCCCCCCCCAAAAAGAAGAGAGACCCCCCCCCTATGGGGGCCCCCTTCTCAAAAAAAAAAATATTTTTTTATATGGGGGGCGGCGCCCCCTCTTCCTCTCTTCTTTTTTTTGTTGTTAAAAAAAAAAAAAAAAAAAGAGAGGGGGAGGGAAAAAAACTCTCGGGGG"
r2LC_1_4 = "CAAAAAAGGTTGGCAAAAGGATACTTGCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAAATCGAAGGTTTAACAACCCGTAACTCGCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCTAGCCCTTGAGATGTTAGATAGGCACATACTCACTTTGCCCTTTAAAAGGGAAACTGGCAGAATTTTTTCGTATAACGCTAAAAGTTTTAAAGGGCTTTCTAATCTCCCGATGGGCAAAATACTTTTTGGTCCCGCCTCCAAAAAAAATTTGAAATTCCCAAATCATTACCTTTTTGTCCCCAGGTGTTCTTAGAAGGGTTTTTCCCCCCGCCGGGGGTGTTTTTTGTTGTGTTGGAAAATAAGAACAACCCCCAAAAAAGAAGAAACCTCCTGTTGTCGCCTTTCCACAACAAATTTTTTTTAGGGGGCGCCCCCCCCCCTTTTTTTTTGAAAAAAAAAAAAAGAAAAAAAAACTCTCTCTCCCCTTATCAAGGGTTGATAATAAATATTTTTCTTTCCCCCCCCCCCCCCCCGGGGCCGGCCCGAAAAAAAAAAACGCACCTAACTACACTCAACTACTTATCTTGGAGTCCCCCCTCAACTCACTCACATCACCCCTCCTCAGTCACCCCCATTCCCAAATCTCATCCACCCCGATCGGGGGGGTGTATGAAAAAATTAAAAATAAACAATAACT"
r2LC_15_1 = "CTAAGTTTGGACCTTCTTTACGTTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGGGGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTTACCCCTTGAGATGTTAGATAGGCCCCTACTCACTTTTGCCCTTTAGAAGGGGAAACTGGGAGAATTTTTTTCGTATAACGCCCAAAATTTTTAAGGGCTTTCTAAACCTCCCGCAGGAGCAAAATAAATTTTGTGTCCCGCCTCGCAAAAAAAATTGAAACCCGCAAATAAATATCCTTTTTGTCGCCAAGGTGTTTTTTAAAAAAATTTTGTTCCCCCGCCCGGGGGGTTTTTTTTTTTTGTTTTGTTGGTAAAAAAAAACAACCTCCCCCAAAAGAAGAAAGAGACCCCCTCTATTGCGCCTTTTTCCCCCCTCCAAATATTATTTAATGGTGGCGCGCGCCCCCCCCCGCCTTCCTTTCTTTTAAAAAAAAAAAAAAAAAGAAAAGAAAAAACCCCTCGGCCCCCCCCCCCCCCGGGAGAGAATATATATAAAATAATATTGTTTTTTTTTCTTTCCCACACCTTCTCTTTTGCTACCGCCGCCCCACCAAAAAAAGGTAGATGGGTGAGGCCCTCCGCCCCCTGCACTCGTGTCCCTGCC"
r3LC_1_2 = "GATGGGTGAGGTTGGGACCTCTTACGTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTCCGTCAAGGGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTTAGCATTGAGATGTTAGATAGGCCCCTACTCACTTTTGCCCTTTAGAAGGGGAAGACTGGCAGAATTTTTTCGTAAAAACCCTAAAGATTTTAAAGGGGCTTTCTAAATCTTCGCAGGGGGAAAGAAATTTTTGGTCCCGGCCCCCAGAAAAAAGATGAAAATTCCAAAAAATTAACTTTTTTTCACAAGGTGTTCTTTAGAAAGAATTTTGTTCCCCCGCCGGGGGGTTTTTTTTTTGTGGTTTGGAAAAAAAGAAAAAACGCCTAAAAAAAAGGAACCCCCCCTTGTTTTTCCCCTTTTCCCCATCAAATATTATTTCAGGGGAGCCGCCCCTTCCCTTTTTTTTGTGTAGAAAAAAAAAAAAGGAGAAAAACCTTTTTTTCCTTTCTTTTTAAATTATAATAATTATTTTTCCCCCCAAACCCCCCCGCCCCCCCCCCCCCAGAGGAAAAATTACTATTCTGCGTTATGTCAACTACTTCTACTCCTCTATCCTTCTTCACCATTTCTAGTGTGAATAAGAACGTGGGGGGAG"
r3LC_4_1 = "TAGTGGGTGAGGTGGGACTCTTACGTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTTAAATCGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTTAGCCATTGAGATGTTAGATAGGCACCATACTCACTTTTGCCCTTTAGAAGGGGAAAGCTGGCAAGATTTTTACGTATAACGCTAAAGTTTTAATGGGCTTTCTAAGTCATCGCGAGGGACAAAATACTTTTGGGACCCGGCCTACAGAAAAAAGTTTAGAATCTCCAAATAAATTTGCCTTTTTTGCCCCAAGGTTTTTCCAAGAGAAGGTTTATTGCCCACCGCGGGGGGGTTTTCTTTGTTGGGTTTGAGAAAAAAAGAAAAACCCCCAAAAAAAAGGAGAACCCCCTTTTTGTGTTCGCCTTTTTTAAAAATTAATTTTTATACGGGGAAACCCCCTTTTTTTCTTTTTTTTTTTGGGAAAAAAAAAAAAAAAGAGGGAAAGAAAACTCCCCTCTCCCCCTAAAAAAAAAAATTAAAATAAACATTATCTTTCTCCCCCCCCCCCCACACGCGCG"
r3LC_4_2 = "GAAGTGGTGGAGTTGGGACTCTTACGTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTCCGTCAAGGGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTTAGCCATTGAGATGTTAGATAGGCACCATACTCACTTTTGCCCTTTAGAAGGGGAAAGCTGGCAGAATTTTTACGTAATAACGCTAAAAGTTTTAAAGGGCTTTCTAAATCCTCGCGAGGGGGCAAAGTACTTTTTGGTCCCGGCCTCCAGAAAAAAGATGAAAATCCCCAGAAAAATTTACCTTTTTTGCGCCAAGGTGTTTCTTAGAAAAGATTTTTTCCCCCGCGCGGGGGGTTTTCTTTTGTTTGGTTTTGGAGAAAAAAAACCCCTTCCCTCAAAAAAAAGAAGACCCCCCCCTTATGTGTGCCCCTTTTTAAAAAACTAAATATTTATTATTTTAGGGGGAAGGCCCCCTTCTTTCCTCCTTTTTTTTTGGTTGATTAAAAACCAAAAAAGAAGGGGGAGGGGAACCTTCTCCTGTCTCCTCTTTTTAAGGAGAGAAAAAATAAAAAAAATATATATTTTTCTTTCTTCCCCACAAAACGATCTGACAGAAAAGGAAGGG"
d3LC_18_1 = "GGACATTCGGGCCCTGGATGCGGGCTAATCCTAACTGCGGGAGGGATCCTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTAGAGTCAGGGTTGGTGGGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAAATCGAAGGTTTAACAACCCGTAAACTCGCCAGAAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTAGCCCTTGAGAAGTTAGATAGGCCCCTACTCACTTTTGCCCTTTTAAAGGGGAAGACTGGCAGAATTTTTTCGTATAACGCCAAAGATTTTAAAGGGGTTTTTAAACCTCCGCAGGGGCAAAGAGATTTTTTGTTCCCGCCCCCAAAAAAAGATTGAAACTCCCAAAAAATTAACTTTTTTGTCCCCAAGGGTGTTTTTTAAAAAGAATTATGTCCCCCCCCCGGGGGGTTTTTTTTTTGTTTTTGTGGAGATAAAAAAACACCCCCCAAAAAAAGAAGAAACCCCCCTGTTTTTCGCCTTTCCCAAACAAAATATATTATTAGGGGGGCCCCTTCCCCTCCCTTCTTTTGTTTGAAAAAAAAAAAAAAGAGGAAGAAAGAACCCCCCCCTCCCTCTCTCTCTTTGGGAAAAAGAAAAAACAAACATTCTTTTTCTCCCTCCCCCCCACTCCTCCCACACACACCGCAAAGAGGTGGGGGACAAAAACGCCTCCAATACTCG"
HP_6_1 = "TGGGGAGTTGTGCCTTCCCTTCGGGAAGGGGCGTGGATCTTCTTGGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCCAGAATGAATTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTCCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAATAAGCGGGCTTTGCTCGACGCCTAGCCATTGAGATGTTAGATAGGCACATACTCACTTTGCCCTTTAGAAGGGGAAAGCTGGCAGAATTTTTACGTATAACGCTAAAAGTTTTAGAGTGCTTACAAGTCACGCGAGGGGCAAAATACTTTAGGTCCCGCCTCCGAAAACAGTTGAAACCCCCAAATCATTTCCTTTTGTCCACCAGGTGTCCCTAGAGATGCTATGTCCCCCCCCCCGGGGGGTTTCTTTTTTGTTGGTTTGGAGAATAAAGACCCCCCCCCCAAAAAAAGAGACCACCCCCGGATTGGCCCCTTTCAAACAACAAAATTTTTTCCCGGGGCGCGCCCCTTCCCCCCCTTGTTCTTTGGTAGAAAAAAAAAAAAGAGGGGGGGGGAACCCTTCTCTTTTCCTCTTTTTTTTAAAAATTTTTTTTTTATATATTTCTTTTTTCTCTCTCCCCTAAATATATACTTTCCACCCAAACAGAGCCCCACGCGCGGAGGAAGAGAGGAGGACCCGCTCTCTTTTCTTCTTCTCGGCCGACCCCTCCTGCTTTAGTTGTCGATCATCGAATACTGTGCAGACTGTATCTCTATCATCGTAATACTCTTCACGACCTCCTCAAAGTAATCCGAACG"
HP_5_3 = "TCGGGGGGGAGTTGGGACCTCTTACGTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACAGCTACCAGCGGTGGTTTGTTTGCCGGATCATGTATTTATTTTCCGAAGGTAACTGGCTTCAGCGCGGTGGTTTGTTTGCCGGATCATTGCTGGATTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAGTCCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAGATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTTAGCCATTGAGATGTTAGATAGGCACCATACTCACTTTTGCCCTTTAGAAGGGGAAAGCTGGCAGGATTTTTACGTAATAACGCTAAAAGTTTTAGATGTGCTTTACTAAGTCATCCCGATGGAGCAAAAGTACATTTAGGTCCCGGCCTACGAAAAAAAGAATGAAACCTCGCCAAACCATTAACCTTTTTTGCCCAAAGGGTTTTTCTAAAAGAAGGTTTTTTGTCCCCCCGCGCGGGGGGTTTTTTTTTTTTTTGTTTTGGAAAAAAAAACCACCCCTAAAAAAAGGAAACCCCATCCTAATTTTCCCCCTTTACAAAAAAATGTTTTTTTTTTTACGCGGAAAACCCCCCTTTTTTTTTCTGTTTATTTATAAAAAAAAAAAAGGAGGGGTGCCACCCCCTTTTTGCGTGTCTTTAAAGAGGGGATGTTTTTTC"
KPneumoniae = "CGAATTTGGTTACTGCGCTCCTCCAAGCCAGTTACCTCGGTTCAAAGAGCGCCAACTCCTCGAACCTTCGAAAAAACCGCCCGCCAAGGCGGTTTTTTCGT"

"CGAATTTGGTTACTGCGCTCCTCCAAGCCAGTTACCTCGGTTCAAAGAGCGCCAACTCCTCGAACCTTCGAAAAAACCGCCCGCCAAGGCGGTTTTTTCGT"

In [16]:
r2LC_Analysis = DataFrame(ID = ["WT", "r2LC_1_1", "r2LC_1_3", "r2LC_1_4", "r2LC_15_1", "r3LC_1_2", "r3LC_4_1", "r3LC_4_2", "d3LC_18_1", "HP_6_1", "HP_5_3"], Sequence = [WT, r2LC_1_1, r2LC_1_3, r2LC_1_4, r2LC_15_1, r3LC_1_2, r3LC_4_1, r3LC_4_2, d3LC_18_1, HP_6_1, HP_5_3])

Row,ID,Sequence
Unnamed: 0_level_1,String,String
1,WT,gtgCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTCCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCA
2,r2LC_1_1,TGTGGTGTAATTTGGACCTCTTTACGTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGA
3,r2LC_1_3,TCTGGGGGAGGTGGACTCTTACGTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTTAGCCATTGAGATGTTAGATAGGCACATACTCACTTTTGCCCTTTAGAAGGGGAAAGCTGGGAGAATTTTTTCGTAATAACGCTAAAAATTTTAAAGGGGTTTTCTAAATCTCCGCAGGGGGCAAAAGTAAATTTTTGGGCCCGGCCTCCAGAAAAAAGATTGAGAACCCCCAAATAATTTCCTTTTTTGTCCCAAGGGTGTTTTTTAAGAAAGAATTATTTCCCCCCCCCCGGGGGGGGTGTTTTTTTTTGTGTGTGTGGGGAAAAAAAAAAAAAACCCCCCCCCAAAAAGAAGAGAGACCCCCCCCCTATGGGGGCCCCCTTCTCAAAAAAAAAAATATTTTTTTATATGGGGGGCGGCGCCCCCTCTTCCTCTCTTCTTTTTTTTGTTGTTAAAAAAAAAAAAAAAAAAAGAGAGGGGGAGGGAAAAAAACTCTCGGGGG
4,r2LC_1_4,CAAAAAAGGTTGGCAAAAGGATACTTGCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAAATCGAAGGTTTAACAACCCGTAACTCGCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCTAGCCCTTGAGATGTTAGATAGGCACATACTCACTTTGCCCTTTAAAAGGGAAACTGGCAGAATTTTTTCGTATAACGCTAAAAGTTTTAAAGGGCTTTCTAATCTCCCGATGGGCAAAATACTTTTTGGTCCCGCCTCCAAAAAAAATTTGAAATTCCCAAATCATTACCTTTTTGTCCCCAGGTGTTCTTAGAAGGGTTTTTCCCCCCGCCGGGGGTGTTTTTTGTTGTGTTGGAAAATAAGAACAACCCCCAAAAAAGAAGAAACCTCCTGTTGTCGCCTTTCCACAACAAATTTTTTTTAGGGGGCGCCCCCCCCCCTTTTTTTTTGAAAAAAAAAAAAAGAAAAAAAAACTCTCTCTCCCCTTATCAAGGGTTGATAATAAATATTTTTCTTTCCCCCCCCCCCCCCCCGGGGCCGGCCCGAAAAAAAAAAACGCACCTAACTACACTCAACTACTTATCTTGGAGTCCCCCCTCAACTCACTCACATCACCCCTCCTCAGTCACCCCCATTCCCAAATCTCATCCACCCCGATCGGGGGGGTGTATGAAAAAATTAAAAATAAACAATAACT
5,r2LC_15_1,CTAAGTTTGGACCTTCTTTACGTTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGGGGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTTACCCCTTGAGATGTTAGATAGGCCCCTACTCACTTTTGCCCTTTAGAAGGGGAAACTGGGAGAATTTTTTTCGTATAACGCCCAAAATTTTTAAGGGCTTTCTAAACCTCCCGCAGGAGCAAAATAAATTTTGTGTCCCGCCTCGCAAAAAAAATTGAAACCCGCAAATAAATATCCTTTTTGTCGCCAAGGTGTTTTTTAAAAAAATTTTGTTCCCCCGCCCGGGGGGTTTTTTTTTTTTGTTTTGTTGGTAAAAAAAAACAACCTCCCCCAAAAGAAGAAAGAGACCCCCTCTATTGCGCCTTTTTCCCCCCTCCAAATATTATTTAATGGTGGCGCGCGCCCCCCCCCGCCTTCCTTTCTTTTAAAAAAAAAAAAAAAAAGAAAAGAAAAAACCCCTCGGCCCCCCCCCCCCCCGGGAGAGAATATATATAAAATAATATTGTTTTTTTTTCTTTCCCACACCTTCTCTTTTGCTACCGCCGCCCCACCAAAAAAAGGTAGATGGGTGAGGCCCTCCGCCCCCTGCACTCGTGTCCCTGCC
6,r3LC_1_2,GATGGGTGAGGTTGGGACCTCTTACGTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTCCGTCAAGGGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTTAGCATTGAGATGTTAGATAGGCCCCTACTCACTTTTGCCCTTTAGAAGGGGAAGACTGGCAGAATTTTTTCGTAAAAACCCTAAAGATTTTAAAGGGGCTTTCTAAATCTTCGCAGGGGGAAAGAAATTTTTGGTCCCGGCCCCCAGAAAAAAGATGAAAATTCCAAAAAATTAACTTTTTTTCACAAGGTGTTCTTTAGAAAGAATTTTGTTCCCCCGCCGGGGGGTTTTTTTTTTGTGGTTTGGAAAAAAAGAAAAAACGCCTAAAAAAAAGGAACCCCCCCTTGTTTTTCCCCTTTTCCCCATCAAATATTATTTCAGGGGAGCCGCCCCTTCCCTTTTTTTTGTGTAGAAAAAAAAAAAAGGAGAAAAACCTTTTTTTCCTTTCTTTTTAAATTATAATAATTATTTTTCCCCCCAAACCCCCCCGCCCCCCCCCCCCCAGAGGAAAAATTACTATTCTGCGTTATGTCAACTACTTCTACTCCTCTATCCTTCTTCACCATTTCTAGTGTGAATAAGAACGTGGGGGGAG
7,r3LC_4_1,TAGTGGGTGAGGTGGGACTCTTACGTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTTAAATCGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTTAGCCATTGAGATGTTAGATAGGCACCATACTCACTTTTGCCCTTTAGAAGGGGAAAGCTGGCAAGATTTTTACGTATAACGCTAAAGTTTTAATGGGCTTTCTAAGTCATCGCGAGGGACAAAATACTTTTGGGACCCGGCCTACAGAAAAAAGTTTAGAATCTCCAAATAAATTTGCCTTTTTTGCCCCAAGGTTTTTCCAAGAGAAGGTTTATTGCCCACCGCGGGGGGGTTTTCTTTGTTGGGTTTGAGAAAAAAAGAAAAACCCCCAAAAAAAAGGAGAACCCCCTTTTTGTGTTCGCCTTTTTTAAAAATTAATTTTTATACGGGGAAACCCCCTTTTTTTCTTTTTTTTTTTGGGAAAAAAAAAAAAAAAGAGGGAAAGAAAACTCCCCTCTCCCCCTAAAAAAAAAAATTAAAATAAACATTATCTTTCTCCCCCCCCCCCCACACGCGCG
8,r3LC_4_2,GAAGTGGTGGAGTTGGGACTCTTACGTGCCCGATCAACTCGAGTGCAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTCCGTCAAGGGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTTAGCCATTGAGATGTTAGATAGGCACCATACTCACTTTTGCCCTTTAGAAGGGGAAAGCTGGCAGAATTTTTACGTAATAACGCTAAAAGTTTTAAAGGGCTTTCTAAATCCTCGCGAGGGGGCAAAGTACTTTTTGGTCCCGGCCTCCAGAAAAAAGATGAAAATCCCCAGAAAAATTTACCTTTTTTGCGCCAAGGTGTTTCTTAGAAAAGATTTTTTCCCCCGCGCGGGGGGTTTTCTTTTGTTTGGTTTTGGAGAAAAAAAACCCCTTCCCTCAAAAAAAAGAAGACCCCCCCCTTATGTGTGCCCCTTTTTAAAAAACTAAATATTTATTATTTTAGGGGGAAGGCCCCCTTCTTTCCTCCTTTTTTTTTGGTTGATTAAAAACCAAAAAAGAAGGGGGAGGGGAACCTTCTCCTGTCTCCTCTTTTTAAGGAGAGAAAAAATAAAAAAAATATATATTTTTCTTTCTTCCCCACAAAACGATCTGACAGAAAAGGAAGGG
9,d3LC_18_1,GGACATTCGGGCCCTGGATGCGGGCTAATCCTAACTGCGGGAGGGATCCTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTAGAGTCAGGGTTGGTGGGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAAATCGAAGGTTTAACAACCCGTAAACTCGCCAGAAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGCTTTGCTCGACGCCTAGCCCTTGAGAAGTTAGATAGGCCCCTACTCACTTTTGCCCTTTTAAAGGGGAAGACTGGCAGAATTTTTTCGTATAACGCCAAAGATTTTAAAGGGGTTTTTAAACCTCCGCAGGGGCAAAGAGATTTTTTGTTCCCGCCCCCAAAAAAAGATTGAAACTCCCAAAAAATTAACTTTTTTGTCCCCAAGGGTGTTTTTTAAAAAGAATTATGTCCCCCCCCCGGGGGGTTTTTTTTTTGTTTTTGTGGAGATAAAAAAACACCCCCCAAAAAAAGAAGAAACCCCCCTGTTTTTCGCCTTTCCCAAACAAAATATATTATTAGGGGGGCCCCTTCCCCTCCCTTCTTTTGTTTGAAAAAAAAAAAAAAGAGGAAGAAAGAACCCCCCCCTCCCTCTCTCTCTTTGGGAAAAAGAAAAAACAAACATTCTTTTTCTCCCTCCCCCCCACTCCTCCCACACACACCGCAAAGAGGTGGGGGACAAAAACGCCTCCAATACTCG
10,HP_6_1,TGGGGAGTTGTGCCTTCCCTTCGGGAAGGGGCGTGGATCTTCTTGGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCCAGAATGAATTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTCCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGACCCGACACCATCGAATGGCCAGATGATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAAATCTAGATAACGAGGGCAAAAAATGATGTCTCGTTTAGATAAAAGTAAAGTGATTAACAGCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAATAAGCGGGCTTTGCTCGACGCCTAGCCATTGAGATGTTAGATAGGCACATACTCACTTTGCCCTTTAGAAGGGGAAAGCTGGCAGAATTTTTACGTATAACGCTAAAAGTTTTAGAGTGCTTACAAGTCACGCGAGGGGCAAAATACTTTAGGTCCCGCCTCCGAAAACAGTTGAAACCCCCAAATCATTTCCTTTTGTCCACCAGGTGTCCCTAGAGATGCTATGTCCCCCCCCCCGGGGGGTTTCTTTTTTGTTGGTTTGGAGAATAAAGACCCCCCCCCCAAAAAAAGAGACCACCCCCGGATTGGCCCCTTTCAAACAACAAAATTTTTTCCCGGGGCGCGCCCCTTCCCCCCCTTGTTCTTTGGTAGAAAAAAAAAAAAGAGGGGGGGGGAACCCTTCTCTTTTCCTCTTTTTTTTAAAAATTTTTTTTTTATATATTTCTTTTTTCTCTCTCCCCTAAATATATACTTTCCACCCAAACAGAGCCCCACGCGCGGAGGAAGAGAGGAGGACCCGCTCTCTTTTCTTCTTCTCGGCCGACCCCTCCTGCTTTAGTTGTCGATCATCGAATACTGTGCAGACTGTATCTCTATCATCGTAATACTCTTCACGACCTCCTCAAAGTAATCCGAACG


In [17]:
r2LC_filtered = isolate_hairpin(r2LC_Analysis, upstream1, downstream1)
clean_r2LC_filtered = select!(r2LC_filtered, Not(:Sequence))

Row,ID,Loop_regio
Unnamed: 0_level_1,String,Any
1,WT,GCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTCCTTCTA
2,r2LC_1_1,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTA
3,r2LC_1_3,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTA
4,r2LC_1_4,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTA
5,r2LC_15_1,GCTTGCAAACAAAAAAACCACCGGGGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTA
6,r3LC_1_2,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTCCGTCAAGGGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTA
7,r3LC_4_1,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTTAAATCGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTA
8,r3LC_4_2,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTCCGTCAAGGGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTA
9,d3LC_18_1,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTAGAGTCAGGGTTGGTGGGAGCGCAGATACCAAATACTGTTCTTCTA
10,HP_6_1,GCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCCAGAATGAATTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTCCTTCTA


In [19]:
## Adding structural information to the dataframe
r2LC_analysis = rna_details(clean_r2LC_filtered)

Row,ID,Loop_regio,MFE_structure,MFE_energy,Mbp_dist,Seq_Cluster,Str_Cluster
Unnamed: 0_level_1,String,Any,String,Float32,Float32,Int64,Int64
1,WT,GCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTCCTTCTA,((..((((((((....((((((((.....))))))))))))))))((((..(((((.......)))))..))))..((((.(((((((.....)))).))).)))).......)).......,-44.5,9.22847,0,0
2,r2LC_1_1,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTA,((..((((((((....((((((((...........))))))))))))))))((((..(((((((((...............))).))))))..))))..((((.(((((((.....)))).))).)))).......)).......,-46.3,15.2166,0,0
3,r2LC_1_3,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTA,((..((((((((....((((((((...........))))))))))))))))((((..(((((.......)))))..))))..((((.(((((((.....)))).))).)))).......)).......,-42.3,12.6579,0,0
4,r2LC_1_4,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTA,((..((((((((....((((((((...........))))))))))))))))((((..(((((..................)))))..))))..((((.(((((((.....)))).))).)))).......)).......,-43.7,13.4994,0,0
5,r2LC_15_1,GCTTGCAAACAAAAAAACCACCGGGGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTA,((..((((((((....((((((.....))))))))))))))((((..(((((.......)))))..))))..((((.(((((((.....)))).))).)))).......)).......,-39.0,13.0591,0,0
6,r3LC_1_2,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTCCGTCAAGGGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTA,((((((((((((....((((((((...........))))))))))))))))((((..(((((((((...............))).))))))..)))).......(((((.((.....)))))))..))))((((..........)))).......,-50.5,14.4823,0,0
7,r3LC_4_1,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTTAAATCGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTA,((((((((((((....((((((((...........))))))))))))))))((((..(((((..................)))))..)))).......(((((........)))))..))))((((..........)))).......,-44.6,19.7885,0,0
8,r3LC_4_2,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGGTATGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTCCGTCAAGGGTCAGCTGAGCGCAGATACCAAATACTGTTCTTCTA,((((((((((((....((((((((...........))))))))))))))))((((..(((((..................)))))..)))).......(((((.((.....)))))))..))))((((..........)))).......,-47.9,12.7681,0,0
9,d3LC_18_1,GCTTGCAAACAAAAAAACCACCGTACAAATCGCACGCGGTGGTTTGTTTGCCGGATCAAGAGTAGTGTTTCTGCCGTGTGAACTAACTCTTTTTCCGAAGGTAACTGGCTTAGAGTCAGGGTTGGTGGGAGCGCAGATACCAAATACTGTTCTTCTA,((((((((((((....((((((((...........))))))))))))))))((((..(((((((((...............))).))))))..))))......((((((((.......))))))))..))))((((..........)))).......,-48.3,31.0462,0,0
10,HP_6_1,GCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCCAGAATGAATTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTCCTTCTA,((..((((((((....((((((((.....))))))))))))))))((((..............))))..((((.(((((((.....)))).))).)))).......)).......,-38.8,9.07938,0,0


In [11]:
save_name = "clean_" * file_name[1:end-6] * ".csv"


## Saving results
CSV.write(save_name, r2LC_analysis)

## Opening saved results
#save_name = "StructureAnalysisReady_Old_large_Loop1_rc.csv"
r2LC_analysis = CSV.read(save_name, DataFrame);

### Structure Predictions with RNAFold

In [12]:
RNAstructure.mfe("ACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCA")

(-43.9 kcal mol⁻¹, ".((((.(((((((((((...))))))))))).)))).((((..(((((.......)))))..))))..((((.(((((((.....)))).))).)))).")

In [13]:
function calc_RNAStructure(dataframe::DataFrame)
   for  
end

LoadError: ParseError:
[90m# Error @ [0;0m]8;;file:///Users/marlenewerner/Desktop/Diplom/Labo Work/Code for Analysis/RNA_structure_loops/In[13]#3:1\[90mIn[13]:3:1[0;0m]8;;\
   for  
[48;2;120;70;70mend[0;0m
[90m└─┘ ── [0;0m[91minvalid identifier[0;0m

In [14]:
function rna_details(dataframe::DataFrame)
    Loop_structure = DataFrame([String[],String[], Float32[], Float32[]],["ID", "MFE_structure","MFE_energy","Mbp_dist"])
    new_loop = []
    for n = 1: size(dataframe, 1)
        new_loop = dataframe.Loop_regio[n]
        new_fold = FoldCompound(new_loop; temperature=37u"°C", uniq_ML=false, circular=false);
        ViennaRNA.partfn(new_fold)
        new_mfe = ViennaRNA.mfe(new_fold)
        new_bpdis = ViennaRNA.mean_bp_distance(new_fold)
        
        push!(Loop_structure, (dataframe.ID[n], new_mfe[1], ustrip(new_mfe[2]), new_bpdis))
        finalize(new_fold)
    end
     dataframe = innerjoin(dataframe, Loop_structure, on = :ID);
     insertcols!(dataframe, :Seq_Cluster => 0, :Str_Cluster => 0)
    return dataframe
end

rna_details (generic function with 1 method)