In [1]:
import csv
import numpy as np
import random
from nupack import *
import pandas as pd
import statistics
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams.update({'font.size': 22})

In [2]:
from rna_inverse import *

# Designing an Aptamer Library

First, find the structure of the EP23 aptamer, using NUPACK

In [3]:
ep23_sequence = 'ACGUAUCCCUUUUCGCGUA'
ep23_structure, ep23_mfe = nupack_analyze_sequence(ep23_sequence)

((((..........)))).
-1.3716583251953125


In [9]:
dataframe, initial_sequence = initialize_sequences(ep23_structure, 10, constraints)
dataframe

Unnamed: 0,random_sequences,nupack_structures,mfe,mismatch,similarity_scores
6,CCGUAAAGUCUAUCGCGGG,((((..........)))).,-3.420576,[],1.0
8,CCAUCCAGAAGGAGGUGGU,(((((........))))).,-4.597502,"[4, 13]",0.894737
9,GUCUUAUCCACUGGGGACC,(((((........))))).,-3.2988,"[4, 13]",0.894737
3,UAAUGAGCCUGAGCGUUAU,(((((........))))).,-1.147502,"[4, 13]",0.894737
7,CUCUAAAGGACGCCGGAGU,((((...((...)))))).,-1.647501,"[7, 8, 12, 13]",0.789474
0,UCAUAUCCGUUAAGGUGAC,(((...((.....))))).,-1.026573,"[3, 6, 7, 13]",0.789474
2,CGUUCCAGCCAGGAGACGA,((((((.....)))).)).,-4.395622,"[4, 5, 11, 12, 13, 15]",0.684211
5,UUGUGGCAAUUUGAGCAAU,.....((.......))...,-0.941872,"[0, 1, 2, 3, 5, 6, 16, 17]",0.578947
1,AUCUGGUAGUUAGAGGAUA,.((((.....)))).....,-1.316113,"[0, 4, 10, 11, 12, 13, 14, 15, 16, 17]",0.473684
4,GAUUAUAUCGGUACGAUCC,......((((...))))..,-1.134494,"[0, 1, 2, 3, 6, 7, 8, 9, 13, 17]",0.473684


In [10]:
library_size = 100
initial_sequences = []
final_sequences = []
successes = []
constraints = {}
constraints[3] = 'U'
constraints[14] = 'G'

for i in range(library_size):
    _, initial_sequence = initialize_sequences(ep23_structure, 50, constraints)
    initial_sequences.append(initial_sequence)
    nupack_structure, _ = nupack_analyze_sequence(initial_sequence)
    result = mutate_sequence_iterate(initial_sequence, ep23_structure, nupack_structure, 100, constraints)
    successes.append(result['success'])
    final_sequence = result['final_sequence']
    final_sequences.append(final_sequence)
print('Initial sequences:', initial_sequences)
print('Final sequences:', final_sequences)
print('Successful:', successes)

Initial sequences: ['GGGUAGAGGCUAGGGCCCA', 'GGGUGUACGUUUUGGCCCU', 'GAGUCUCGUUUCCAGCUCA', 'GGCUCCAAUCCACUGGCCA', 'GCGUUGCAGACCUUGCGCA', 'UGCUAAUUAGAACAGGCAA', 'GCCUCAGUAUCGCAGGGCG', 'GGCUCCUCGCUGAUGGCCU', 'GGCUUAUUUAGAGUGGCCA', 'GGCUUUCACAAUGCGGCCC', 'GGCUGGACGGACAAGGCCA', 'AGGUGACCUUUAGAGCCUG', 'GCCUCCUUGCUGUUGGGCG', 'GGCUGCGACUUGAGGGCCA', 'GGGUACUAGGGAUAGCCCU', 'GCCUAAGACUGGAGGGGCU', 'CGGUGUAAGCUCUGGCCGG', 'GGCUUCUAGACGCUGGCCG', 'GCCUAAACUGCGACGGGCA', 'CCGUAUUCCCUUGAGCGGA', 'GGCUCCAAACGCAAGGCCA', 'GGGUGCAUGCUGAAGCCCA', 'GGUUGGCAAAGAGAGACCC', 'CGGUGCUUUUCCUAGCCGG', 'CGCUCGUUUCAAAAGGCGA', 'GCGUCGAGGAAGUAGCGCA', 'CCGUGUGGUUCGUAGCGGA', 'CCUUGGUUCAUUAAGAGGG', 'GCGUAUCUGUACCCGCGCG', 'CGCUGGUCAACGAGGGCGC', 'GCAUUUAGGGCGUUGUGCU', 'GCCUGAGCACUAGGGGGCC', 'GCCUCUCUAAGUUAGGGCA', 'GGGUGUGGAUCAUGGCCCU', 'GCCUGUGUGAGAUGGGGCU', 'GGUUCUGCAACAUUGACCG', 'GGAUGUCCUGGUGAGUCCU', 'GGGUGGAUGGCAAAGCCCU', 'CGGUCACUCACUACGCCGA', 'CCGUAUCAUAGAAAGCGGG', 'GGCUUGCUAUCCUUGGCCU', 'CGGUGUUAGAACUGGCCGG', 'GGCUACGUGCGCA

In [11]:
mfe_list = []
similarity_scores = []
reverse_sequences = []
for sequence in final_sequences:
    nupack_structure, mfe = nupack_analyze_sequence(sequence)
    mfe_list.append(mfe)
    mismatch = structure_differences(ep23_structure, nupack_structure)
    similarity_score = (len(sequence) - len(mismatch)) / len(sequence)
    similarity_scores.append(similarity_score)
    reversedsequence=''.join(reversed(sequence))
    reverse_sequences.append(reversedsequence)

In [12]:
aptamer_library = pd.DataFrame(list(zip(final_sequences, mfe_list, similarity_scores, reverse_sequences)), columns=['sequences', 'mfe', 'similarity_scores', '3to5_sequence'])

In [13]:
aptamer_library

Unnamed: 0,sequences,mfe,similarity_scores,3to5_sequence
0,GGGUAGAGGCUAGGGCCCA,-4.987886,1.0,ACCCGGGAUCGGAGAUGGG
1,GGGUGUACGUUUUGGCCCU,-5.032212,1.0,UCCCGGUUUUGCAUGUGGG
2,GAGUCUCGUUUCCAGCUCA,-2.487885,1.0,ACUCGACCUUUGCUCUGAG
3,GGCUCCAAUCCACUGGCCA,-4.087886,1.0,ACCGGUCACCUAACCUCGG
4,GCGUUGCAGACCUUGCGCA,-4.487885,1.0,ACGCGUUCCAGACGUUGCG
...,...,...,...,...
95,GCCUUCAUAACUACGGGCC,-3.398800,1.0,CCGGGCAUCAAUACUUCCG
96,GGGUAGCAACUAUGGCCCA,-4.987886,1.0,ACCCGGUAUCAACGAUGGG
97,GCGUAUGGUAGUUCGCGCA,-3.887886,1.0,ACGCGCUUGAUGGUAUGCG
98,UGCUAGGCACGGGGGGCAG,-2.248799,1.0,GACGGGGGGCACGGAUCGU


In [14]:
with open('ep23_library_3to5_seq.txt', 'w') as filehandle:
    for listitem in reverse_sequences:
        filehandle.write('%s\n' % listitem)