In [1]:
import csv
import numpy as np
import random
from nupack import *
import pandas as pd
import statistics
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams.update({'font.size': 22})

In [2]:
from rna_inverse import *

# Designing an Aptamer Library

First, find the structure of the EP23 aptamer, using NUPACK

In [3]:
ep23_sequence = 'ACGUAUCCCUUUUCGCGUA'
ep23_structure, ep23_mfe = nupack_analyze_sequence(ep23_sequence)
print(ep23_structure)
print(ep23_mfe)

((((..........)))).
-1.3716583251953125


In [13]:
library_size = 20
initial_sequences = []
final_sequences = []
successes = []
constraints = {}
constraints[3] = 'U'
constraints[14] = 'G'

for i in range(library_size):
    _, initial_sequence = initialize_sequences(ep23_structure, 50, constraints)
    initial_sequences.append(initial_sequence)
    nupack_structure, _ = nupack_analyze_sequence(initial_sequence)
    result = mutate_sequence_iterate(initial_sequence, ep23_structure, nupack_structure, 100, constraints)
    successes.append(result['success'])
    final_sequence = result['final_sequence']
    final_sequences.append(final_sequence)
print('Initial sequences:', initial_sequences)
print('Final sequences:', final_sequences)
print('Successful:', successes)

Initial sequences: ['GCCUCGCGUAGAAUGGGCG', 'GAGUAAAUCACGCCGCUCG', 'GGAUGGUGCCCAGGGUCCG', 'GGGUAAUUUAUAACGCCCU', 'CCGUUAGGUCGGAUGCGGC', 'CGCUGAAUCAUACAGGCGC', 'CGGUUUCCGUCAUUGCCGC', 'CCCUUCAAGAAAAUGGGGA', 'GCCUGGAUUAAAUAGGGCU', 'GCCUACACGGUUCAGGGCU', 'GGGUGGUGAGGGAAGCCCU', 'GGGUUACACUUUGCGCCCA', 'GUCUGAAGGAGAAGGGACU', 'CUCUAAACCACUCAGGAGG', 'GGCUAACGGUACUCGGCCG', 'GCCUUGCAACCUUCGGGCA', 'GCCUAACGUGGAAGGGGCU', 'CGCUAGCUAUGUCAGGCGA', 'GCCUAGAAAUAAUAGGGCA', 'GAGUUGUGCAUUGUGCUCA']
Final sequences: ['GCCUCGCGUAGAAUGGGCG', 'GAGUAAAUCACGCCGCUCG', 'GGAUGGUGCCCAGGGUCCG', 'GGGUAAUUUAUAACGCCCU', 'CCGUUAGGUCGGAUGCGGC', 'CGCUGAAUCAUACAGGCGC', 'CGGUUUCCGUCAUUGCCGC', 'CCCUUCAAGAAAAUGGGGA', 'GCCUGGAUUAAAUAGGGCU', 'GCCUACACGGUUCAGGGCU', 'GGGUGGUGAGGGAAGCCCU', 'GGGUUACACUUUGCGCCCA', 'GUCUGAAGGAGAAGGGACU', 'CUCUAAACCACUCAGGAGG', 'GGCUAACGGUACUCGGCCG', 'GCCUUGCAACCUUCGGGCA', 'GCCUAACGUGGAAGGGGCU', 'CGCUAGCUAUGUCAGGCGA', 'GCCUAGAAAUAAUAGGGCA', 'GAGUUGUGCAUUGUGCUCA']
Successful: [True, True, True, True, True,

In [37]:
mfe_list = []
similarity_scores = []
reverse_sequences = []
for sequence in final_sequences:
    nupack_structure, mfe = nupack_analyze_sequence(sequence)
    mfe_list.append(mfe)
    mismatch = structure_differences(ep23_structure, nupack_structure)
    similarity_score = (len(sequence) - len(mismatch)) / len(sequence)
    similarity_scores.append(similarity_score)
    reversedsequence=''.join(reversed(sequence))
    reverse_sequences.append(reversedsequence)

In [38]:
aptamer_library = pd.DataFrame(list(zip(final_sequences, mfe_list, similarity_scores, reverse_sequences)), columns=['sequences', 'mfe', 'similarity_scores', '3to5_sequence'])

In [39]:
aptamer_library

Unnamed: 0,sequences,mfe,similarity_scores,3to5_sequence
0,GCCUCGCGUAGAAUGGGCG,-4.087886,1.0,GCGGGUAAGAUGCGCUCCG
1,GAGUAAAUCACGCCGCUCG,-2.587885,1.0,GCUCGCCGCACUAAAUGAG
2,GGAUGGUGCCCAGGGUCCG,-3.487886,1.0,GCCUGGGACCCGUGGUAGG
3,GGGUAAUUUAUAACGCCCU,-4.232212,1.0,UCCCGCAAUAUUUAAUGGG
4,CCGUUAGGUCGGAUGCGGC,-3.309097,1.0,CGGCGUAGGCUGGAUUGCC
5,CGCUGAAUCAUACAGGCGC,-3.009097,1.0,CGCGGACAUACUAAGUCGC
6,CGGUUUCCGUCAUUGCCGC,-3.309097,1.0,CGCCGUUACUGCCUUUGGC
7,CCCUUCAAGAAAAUGGGGA,-4.345622,1.0,AGGGGUAAAAGAACUUCCC
8,GCCUGGAUUAAAUAGGGCU,-4.53221,1.0,UCGGGAUAAAUUAGGUCCG
9,GCCUACACGGUUCAGGGCU,-4.132211,1.0,UCGGGACUUGGCACAUCCG
