In [2]:
import csv
import numpy as np
import random
from nupack import *
import pandas as pd
import statistics
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams.update({'font.size': 22})

In [3]:
from rna_inverse import *

# Designing an Aptamer Library

First, find the structure of the EP23 aptamer, using NUPACK

In [10]:
ep23_sequence = 'ACGUAUCCCUUUUCGCGUA'
ep23_structure, ep23_mfe = nupack_analyze_sequence(ep23_sequence)
print(ep23_structure)
print(ep23_mfe)

((((..........)))).
-1.3716583251953125


In [7]:
library_size = 20
initial_sequences = []
final_sequences = []
successes = []
for i in range(library_size):
    _, initial_sequence = initialize_sequences(ep23_structure, 50)
    initial_sequences.append(initial_sequence)
    nupack_structure, _ = nupack_analyze_sequence(initial_sequence)
    result = mutate_sequence_iterate(initial_sequence, ep23_structure, nupack_structure, 100)
    successes.append(result['success'])
    final_sequence = result['final_sequence']
    final_sequences.append(final_sequence)
print('Initial sequences:', initial_sequences)
print('Final sequences:', final_sequences)
print('Successful:', successes)

Initial sequences: ['GACGGCGCAAAAUACGUCG', 'GGGUGCGGGCAACAACCCA', 'CCCCUUCACAUCAUGGGGG', 'CAGGGAUAACUCCACCUGG', 'GGUCGUACUGGUUGGACCU', 'GGGCGAGGUGAGCGGCCCU', 'GGCGGCUUUAACGACGCCG', 'UCGCGACAAAUCAGGCGAA', 'GGCUGCCACGACCAAGCCA', 'CCGGACCGAACAACCCGGU', 'GGCGGUACGAAAUACGCCG', 'AGCCGAGUCACGUAGGCUG', 'GGCCGCUACACAUAGGCCA', 'GGCCACAGAUGACCGGCCC', 'GCUCAUCACCGCUCGAGCA', 'GGCGAGAAUGGAUACGCCG', 'GGGCCGACAUUAUAGCCCA', 'GAGGUCUCUCUUAUCCUCA', 'GGGGUUGAUUGGGUCCCCG', 'GGGCUUCGAUGACUGCCCG']
Final sequences: ['GACGGCGCAAAAUACGUCG', 'GGGUGCGGGCAACAACCCA', 'CCCCUUCACAUCAUGGGGG', 'CAGGGAUAACUCCACCUGG', 'GGUCGUACUGGUUGGACCU', 'GGGCGAGGUGAGCGGCCCU', 'GGCGGCUUUAACGACGCCG', 'UCGCGACAAAUCAGGCGAA', 'GGCUGCCACGACCAAGCCA', 'CCGGACCGAACAACCCGGU', 'GGCGGUACGAAAUACGCCG', 'AGCCGAGUCACGUAGGCUG', 'GGCCGCUACACAUAGGCCA', 'GGCCACAGAUGACCGGCCC', 'GCUCAUCACCGCUCGAGCA', 'GGCGAGAAUGGAUACGCCG', 'GGGCCGACAUUAUAGCCCA', 'GAGGUCUCUCUUAUCCUCA', 'GGGGUUGAUUGGGUCCCCG', 'GGGCUUCGAUGACUGCCCG']
Successful: [True, True, True, True, True,

In [15]:
mfe_list = []
similarity_scores = []
for sequence in final_sequences:
    nupack_structure, mfe = nupack_analyze_sequence(sequence)
    mfe_list.append(mfe)
    mismatch = structure_differences(ep23_structure, nupack_structure)
    similarity_score = (len(sequence) - len(mismatch)) / len(sequence)
    similarity_scores.append(similarity_score)

In [16]:
aptamer_library = pd.DataFrame(list(zip(final_sequences, mfe_list, similarity_scores)), columns=['sequences', 'mfe', 'similarity_scores'])

In [17]:
aptamer_library

Unnamed: 0,sequences,mfe,similarity_scores
0,GACGGCGCAAAAUACGUCG,-4.737886,1.0
1,GGGUGCGGGCAACAACCCA,-5.587886,1.0
2,CCCCUUCACAUCAUGGGGG,-6.870575,1.0
3,CAGGGAUAACUCCACCUGG,-4.870575,1.0
4,GGUCGUACUGGUUGGACCU,-5.082211,1.0
5,GGGCGAGGUGAGCGGCCCU,-7.182211,1.0
6,GGCGGCUUUAACGACGCCG,-6.837885,1.0
7,UCGCGACAAAUCAGGCGAA,-4.598799,1.0
8,GGCUGCCACGACCAAGCCA,-5.587886,1.0
9,CCGGACCGAACAACCCGGU,-4.797502,1.0
