# Introduction

In [None]:
from infrared import *
from infrared.rna import *

import matplotlib.pyplot as plt

In [None]:
target = "((((((((((...))))((((....))))))))))"
model = Model(len(target), 4)
model.add_constraints(BPComp(i,j) for (i,j) in parse(target))
sampler = Sampler(model)
samples = [sampler.sample() for _ in range(10)]

In [None]:
sequences = [ass_to_seq(x) for x in samples]
sequences

We are going to visualize the nucleotide frequencies of the sampled sequences 
if module ```logomaker``` is availabe.
(e.g. install by ```conda install logomaker```)

In [None]:
def draw_logo(samples,name=None):
    import logomaker as lm
    
    sequences = [ass_to_seq(x) for x in samples]
    
    matrix = lm.alignment_to_matrix(sequences = sequences)
    logo = lm.Logo(matrix)
    logo.style_xticks(rotation=90, fmt='%d', anchor=0)
    logo.ax.xaxis.set_ticks_position('none')
    if name is not None:
        plt.savefig(name)
    plt.show()
    logo.ax.xaxis.set_tick_params(pad=-1)
    return sequences

def opt_draw_logo(samples,name=None):
    try:
        draw_logo(samples,name)
    except ModuleNotFoundError as e:
        print(e)
    return [ass_to_seq(x) for x in samples]


opt_draw_logo(samples)

### Multiple targets

In [None]:
#           01234567890123456789012345678901234
targets = ["((((((((((...))))((((....))))))))))",
           "((((((.((((((((....))))..))))))))))",
           ".((((((...)))))).(((((((....)))))))"]

In [None]:
for target in targets:
    model.add_constraints(BPComp(i,j) for (i,j) in parse(target))

In [None]:
sampler = Sampler(model)
designs = [sampler.sample() for _ in range(10)]

samples = [x for x in designs]
opt_draw_logo(samples)

# Methods

## Elementary use of Infrared

In [None]:
n = 35

In [None]:
model = Model(n,4)

In [None]:
target = "((((((((((...))))((((....))))))))))"
model.add_constraints(BPComp(i,j) for (i,j) in parse(target))

In [None]:
sampler = Sampler(model)
samples = [sampler.sample() for _ in range(10)]

opt_draw_logo(samples)

## Sequence constraints in IUPAC code

In [None]:
iupac_sequence = "SNNNNNNNNNRYYNNNNNNNNGNRANNNNNNNNNS"

In [None]:
for i, x in enumerate(iupac_sequence):
    model.add_constraints(ValueIn(i, iupacvalues(x)))

In [None]:
sampler = Sampler(model)
samples = [sampler.sample() for _ in range(20)]

opt_draw_logo(samples)

## Control of GC content

In [None]:
# add functions for GC control
model.add_functions([GCCont(i) for i in range(n)], 'gc')

In [None]:
# set a weight and sample
model.set_feature_weight(0.15, 'gc')

sampler = Sampler(model)
samples = [sampler.sample() for _ in range(1000)]
opt_draw_logo(samples)

In [None]:
## Code to produce the figures in the paper
WRITEFIGS = False
for name,weight in [('minus', -1), ('zero', 0), ('plus', 1)]:
    
    model.set_feature_weight(weight, 'gc')
    sampler = Sampler(model)
    samples = [sampler.sample() for _ in range(1000)]

    sequences = opt_draw_logo(samples, f"gc_content_{name}-logo.svg")

    gc_contents = [100*sum(x in "GC" for x in sequence)/len(sequence) for sequence in sequences]
    h = plt.hist(gc_contents,bins=10,range=(0,100))
    if WRITEFIGS:
        plt.savefig(f"gc_content_{name}-hist.svg")

Set a target of 75% GC content and then draw targeted samples

In [None]:
sampler = Sampler(model)

sampler.set_target( 0.75 * n, 0.01 * n, 'gc' )

samples = [sampler.targeted_sample() for _ in range(1000)]
sequences = opt_draw_logo(samples)

gc_contents = [100*sum(x in "GC" for x in sequence)/len(sequence) for sequence in sequences]
gc_content = sum(gc_contents) / len(gc_contents)
print(f"GC content in samples: {gc_content:0.2f}%")

# Controlling energy - Multiple features

In [None]:
# recall current model
model = Model(n,4) 
bps = parse(target)
model.add_constraints(BPComp(i,j) for (i,j) in bps)
model.add_functions([GCCont(i) for i in range(n)], 'gc')

In [None]:
# add (base pair) energy control
model.add_functions([BPEnergy(i, j, (i-1, j+1) not in bps)
                     for (i,j) in bps], 'energy')

In [None]:
# target specific GC and low energy 
model.set_feature_weight(-2, 'energy')
sampler = Sampler(model)
sampler.set_target(0.75*n, 0.01*n, 'gc')
samples = [sampler.targeted_sample() for _ in range(10)]

In [None]:
opt_draw_logo(samples)

In [None]:
# add stacking energy control 
# - this could be used in place of defining base pair energy
#   in the code above
model.add_functions([StackEnergy(i, j)
    for (i,j) in bps if (i+1,j-1) in bps], 'energy')

## Targeting Turner energy

NOTE: here we make use of the Vienna RNA library.
The code in this section won't work,
if the library is not installed.

In [None]:
import RNA

In [None]:
# Restate current model
model = Model(n,4) 
bps = parse(target)
model.add_constraints(BPComp(i,j) for (i,j) in bps)
model.add_functions([GCCont(i) for i in range(n)], 'gc')
model.add_functions([BPEnergy(i, j, (i-1, j+1) not in bps)
                     for (i,j) in bps], 'energy')

In [None]:
# add the Turner energy feature
model.add_feature('Energy', 'energy',
    lambda sample, target=target:
        RNA.energy_of_struct(ass_to_seq(sample), target))

In [None]:
# specify targets and draw targeted samples
sampler = Sampler(model)
sampler.set_target(0.75*n, 0.05*n, 'gc')
sampler.set_target(-10, 0.5, 'Energy')
samples = [sampler.targeted_sample() for _ in range(10)]

sequences = opt_draw_logo(samples)

[(seq,RNA.energy_of_struct(seq,target)) for seq in sequences]

# Multiple target targets

In [None]:
# construct model
model = Model(n,4) 
model.add_functions([GCCont(i) for i in range(n)], 'gc')

for k, target in enumerate(targets):
    bps = parse(target)
    model.add_constraints(BPComp(i,j) for (i,j) in bps)
    model.add_functions([BPEnergy(i, j, (i-1, j+1) not in bps)
                         for (i,j) in bps], f'energy{k}')

### Target specific GC content and high affinity to all targets

In [None]:
# set weights for energy targets
for k,_ in enumerate(targets):
    model.set_feature_weight(-2, f'energy{k}')

# create sampler and set target
sampler = Sampler(model)
sampler.set_target(0.75*n, 0.05*n, 'gc')
samples = [sampler.targeted_sample() for _ in range(5)]

sequences = opt_draw_logo(samples)

# annotate sequences with energies (annotate with Turner energies only if RNA module is available)
try:
    import RNA
    sequences = ["".join([seq]+[f" {RNA.energy_of_struct(seq,target):5.1f}" for target in targets]) for seq in sequences]
except ModuleNotFoundError:
    pass

sequences

### Target specific GC content and specific Turner energies for all targets

Note: this will again require the Vienna RNA library

In [None]:
# add Turner energy features for all target targets
for k, target in enumerate(targets):
    model.add_feature(f'Energy{k}', f'energy{k}',
        lambda sample, target=target:
            RNA.energy_of_struct(ass_to_seq(sample), target))

sampler = Sampler(model)
sampler.set_target(0.75*n, 0.01*n, 'gc')

sampler.set_target( -15, 1, 'Energy0')
sampler.set_target( -20, 1, 'Energy1')
sampler.set_target( -20, 1, 'Energy2')

samples = [sampler.targeted_sample() for _ in range(5)]

sequences = opt_draw_logo(samples)

# annotate sequences with energies
["".join([seq]+[f" {RNA.energy_of_struct(seq,target):5.1f}" for target in targets]) for seq in sequences]

## Plot dependencies and tree decomposition

In [None]:
from IPython.display import Image

In [None]:
# Plot dependency graph

filename = 'dependency_graph.dot'
model.write_graph(filename, True)

dotfile_to_png(filename)
dotfile_to_pdf(filename)

filename = re.sub(r"dot$","png",filename)

Image(filename=filename,width=600)

In [None]:
# Plot tree decomposition
sampler = Sampler(model)
print(f"Tree width: {sampler.treewidth()}")
filename="treedecomp"
sampler.plot_td(filename,'png')
sampler.plot_td(filename,'pdf')
sampler.plot_td(filename+".dot",'dot')
Image(filename=filename+".png",width=300)

# Negative design by sampling

In [None]:
target = targets[0]
n = len(target)

In [None]:
def is_mfe_design(sequence, target):
    fc = RNA.fold_compound(sequence)
    return fc.eval_structure(target) == fc.mfe()[1]

In [None]:
def single_target_design_model(target):
    n, bps = len(target), parse(target)
    model = Model(n, 4)
    model.add_constraints(BPComp(i, j) for (i, j) in bps)
    model.add_functions([GCCont(i) for i in range(n)], 'gc')
    model.add_functions([BPEnergy(i, j, (i-1, j+1) not in bps)
        for (i,j) in bps], 'energy')
    model.set_feature_weight(-1.5, 'energy')
    return model

In [None]:
# solve by direct sampling
sampler = Sampler(single_target_design_model(target))
sampler.set_target(0.7 * n, 0.1 * n, 'gc')
for i in range(50):
    seq = ass_to_seq(sampler.targeted_sample())
    if is_mfe_design(seq,target):
        print(f"{i} {seq}")

In [None]:
def target_frequency(sequence, target):
    fc = RNA.fold_compound(sequence)
    fc.pf()
    return fc.pr_structure(target)

In [None]:
sampler = Sampler(single_target_design_model(target))
sampler.set_target(0.7 * n, 0.1 * n, 'gc')
best = 0
for i in range(100):
    seq = ass_to_seq(sampler.targeted_sample())
    freq = target_frequency(seq,target)
    if freq > best:
        best = freq
        print(f"{i} {seq} {freq:.6f}")

## Disruptive base pairs - RNAPOND-like negative design

In [None]:
from collections import Counter

## A hard instance, eterna37
# target = "(((((.((((((.((((((.((((((.((((((.((((((....((((((......)))))).)))))).(((((...(((((((...)))))))))))).)))))).((((((((((((...)))))))...))))).))))))....))))))....))))))....)))))"

## a slightly harder instance
target = "..(((..((((.....)))).((...(((.....)))...))...))).."

n = len(target)
bps = parse(target)

In [None]:
def cg_design_iteration():
    model = single_target_design_model(target)
    model.add_constraints(NotBPComp(i, j) for (i, j) in dbps)
    sampler = Sampler(model, lazy=True)
    if sampler.treewidth() > 10 or not sampler.is_consistent():
        return "Not found"
    ctr = Counter()
    found, sol = False, None
    for i in range(100):
        seq = ass_to_seq(sampler.targeted_sample())
        fc = RNA.fold_compound(seq)
        mfe, mfe_e = fc.mfe()
        if fc.eval_structure(target) == mfe_e:
            sol = seq
        ctr.update(parse(mfe))
    ndbps = [x[0] for x in ctr.most_common() if x[0] not in bps]
    dbps.extend(ndbps[:2])
    return sol
dbps, seq = [], None
while seq is None: seq = cg_design_iteration()
print(seq)

# Negative design optimization with resampling

In [None]:
## define multi-target design model for resampling of subsets 
def multi_design_model(subset=None, solution=None):
    n = len(targets[0])
    model = Model(n, 4)
    if subset is None: subset = set(range(n))
    for i in set(range(n))-subset:
        value = solution.values()[i]
        model.restrict_domains(i,(value,value))
    model.add_functions([GCCont(i) for i in subset], 'gc')
    for target in targets:
        s = parse(target)
        ss = [(i,j) for (i,j) in s if i in subset or j in subset]
        model.add_constraints(BPComp(i, j) for (i, j) in ss)
        model.add_functions([BPEnergy(i, j, (i-1, j+1) not in s)
            for (i,j) in ss], 'energy')
    model.set_feature_weight(-0.8, 'energy')
    model.set_feature_weight(-0.3, 'gc')
    return model

In [None]:
def multi_defect(sequence, targets, xi=1):
    k = len(targets)
    fc = RNA.fold_compound(sequence)
    ee = fc.pf()[1]
    eos = [fc.eval_structure(target) for target in targets]
    diff_ee = sum(1/k * (eos[i] - ee) for i in range(k))
    diff_targets = sum(2/(k*(k-1)) * abs(eos[i]-eos[j])
        for i in range(k) for j in range(k) if i<j)
    return diff_ee + xi * diff_targets

In [None]:
from random import random, choices
from math import exp

In [None]:
def multi_design_optimize(steps, temp):
    cc, cur, curval, bestval = None, None, math.inf, math.inf
    for i in range(steps):
        model = multi_design_model(cc, cur)
        new = Sampler(model).sample()
        newval = multi_defect(ass_to_seq(new),targets,1)
        if (newval <= curval
            or random() <= exp(-(newval-curval)/temp)):
            cur, curval = new, newval
            if curval < bestval:
                best, bestval = cur, curval
        if i==0:
            ccs = model.connected_components()
            weights = [1/len(cc) for cc in ccs]
        cc = choices(ccs,weights)[0]
    return (ass_to_seq(best), bestval)

In [None]:
multi_design_optimize(1000,0.015)

In [None]:
import concurrent.futures
def my_multi_design_optimize(i):
    seed(None)
    return multi_design_optimize(1000,0.015)
with concurrent.futures.ProcessPoolExecutor() as executor:
    res = executor.map(my_multi_design_optimize, range(24))
res = list(res)
plt.boxplot([r[1] for r in res])
plt.show()

In [None]:
def evaluate(seq):
    fc = RNA.fold_compound(seq)
    return (seq, multi_defect(seq,targets), fc.mfe(),
        [fc.eval_structure(t) for t in targets],
        f'{100*sum(x in "GC" for x in seq)/len(seq):0.1f}%'
    )
best = min(res,key=lambda x:x[1])[0]
print(evaluate(best))

for seq,val in res:
    print(evaluate(seq))

In [None]:
# good solutions for the running 3-target example

seq = "AGGGUCCGGGGGGCCCGGGGGUUGACCCCGACCCU" # all mfe; GC 65.7% (1000 steps; ~6s)
seq = "GGGGCCCGGGGGGCCCGGGGGUUGACCCCGGCCCC"
seq = "CCCCUUGCCUCAAGGGCCCUCUUCAGAGGAAGGGG"
fc = RNA.fold_compound(seq)
print(fc.mfe())
print(fc.pf())
print([fc.eval_structure(t) for t in targets])
print([fc.pr_structure(t) for t in targets])
print(multi_defect(seq,targets,xi=1))

In [None]:
#01234567890123456789012345678901234
#GGGGCCCGGGGGGCCCGGGGGUUGACCCCGGCCCC

# Riboswitch tandem design

In [None]:
seqTheo0 = "AAGUGAUACCAGCAUCGUCUUGAUGCCCUUGGCAGCACUUCAGAAAUCUC"\
           "UGAAGUGCUGUUUUUUUU"
seqTet0  = "GGCCUAAAACAUACCAGAGAAAUCUGGAGAGGUGAAGAAUACGACCACCU"\
           "AGGCCGACAGUGGCCUAGGUGGUCGUUUUUUUUU"

In [None]:
seqTheo  = "NNNNGAUACCAGCAUCGUCUUGAUGCCCUUGGCAGCNNNNNNNNNNNNNN"\
           "NNNNNNNNNNUUUUUUUU"
aptTheo  = "(((((...((((((((.....)))))...)))...))))).........."\
           ".................."
termTheo = "...............................(((((((((((((....))"\
           ")))))))))))......."


seqTet   = "NNNNNAAAACAUACCAGAGAAAUCUGGAGAGGUGAAGAAUACGACCACCU"\
           "ANNNNNNNNNNNNNNNNNNNNNNNNUUUUUUUUU"
termTet  = "........................................(((((((((("\
           "(((((......)))))))))))))))........"
aptTet   = "((((((.......(((((....)))))...((((...........)))))"\
           ")))))............................."

spacerLen = 30

In [None]:
aptamers    = aptTheo  + "."*spacerLen + aptTet
terminators = termTheo + "."*spacerLen + termTet
sequence    = seqTheo  + "N"*spacerLen + seqTet

n = len(aptTheo) + spacerLen + len(aptTet)

In [None]:
variants = dict(
    empty = '.'*n,
    aptTheo = aptTheo + '.'*(n-len(aptTheo)),
    aptTet = '.'*(n-len(aptTet)) + aptTet,
    termTheo = termTheo + '.'*(n-len(aptTheo)),
    termTet = '.'*(n-len(aptTet)) + termTet,
    spacer = '.'*len(aptTheo) + 'x'*spacerLen + '.'*len(aptTet)
)

In [None]:
def constrained_efe(sequence,c):
    fc = RNA.fold_compound(sequence)
    fc.hc_add_from_db(c)
    return fc.pf()[1]

def rstd_objective(sequence):
    efe = {k:constrained_efe(sequence,variants[k]) 
           for k in variants}
        
    term_stability = efe['termTheo'] + efe['termTet'] \
        - 2*efe['empty']
    apt_target = abs(efe['aptTheo']-efe['empty']-7) \
        + abs(efe['aptTet']-efe['empty']-10)
    spacer_unfolding = efe['spacer']-efe['empty']

    return term_stability + apt_target + spacer_unfolding

In [None]:
def rstd_model(subset=None, solution=None):
    rstd_targets = [aptamers, terminators]
    n = len(rstd_targets[0])
    model = Model(n, 4)
    if subset is None: subset = set(range(n))
    for i in set(range(n))-subset:
        value = solution.values()[i]
        model.restrict_domains(i,(value,value))
        
    for i, x in enumerate(sequence):
        model.add_constraints(ValueIn(i, iupacvalues(x)))
        
    model.add_functions([GCCont(i) for i in subset], 'gc')
    for k,target in enumerate(rstd_targets):
        s = parse(target)
        ss = [(i,j) for (i,j) in s if i in subset or j in subset]
        model.add_constraints(BPComp(i, j) for (i, j) in ss)
        model.add_functions([BPEnergy(i, j, (i-1, j+1) not in s)
            for (i,j) in ss], f'energy{k}')
    model.set_feature_weight(-0.6, 'energy0')
    model.set_feature_weight(-1, 'energy1')
    model.set_feature_weight(-0.3, 'gc')
    return model

def rstd_optimize(steps, temp):
    cc, cur, curval, bestval = None, None, math.inf, math.inf
    for i in range(steps):
        model = rstd_model(cc, cur)
        new = Sampler(model).sample()
        newval = rstd_objective(ass_to_seq(new))
        if (newval <= curval
            or random() <= exp(-(newval-curval)/temp)):
            cur, curval = new, newval
            if curval < bestval:
                best, bestval = cur, curval
                #print(ass_to_seq(best),bestval)
        if i==0:
            ccs = model.connected_components()
            weights = [1/len(cc) for cc in ccs]
        cc = choices(ccs,weights)[0]
    return (ass_to_seq(best), bestval)

In [None]:
rstd_optimize(100,0.03)

In [None]:
import concurrent.futures
def my_rstd_optimize(i):
    seed(None)
    return rstd_optimize(500,0.03)

with concurrent.futures.ProcessPoolExecutor() as executor:
    res = executor.map(my_rstd_optimize, range(12))
res = list(res)

In [None]:
for seq, val in res:
    print(f"{seq} {val:.2f}")
    fc = RNA.fold_compound(seq)
    for k,c in constraints.items():
        print(f"{k:20} {fc.eval_structure(c):8.2f} {constrained_efe(seq,c):8.2f} {constrained_efe(seq,c)-constrained_efe(seq,constraints['empty']):8.2f}")

In [None]:
plt.boxplot([r[1] for r in res])
plt.show()

# APPENDIX

## Generate RNAPOND figures

In [None]:
from collections import Counter

## a slightly harder instance
target = "..(((..((((.....)))).((...(((.....)))...))...))).."

n = len(target)
bps = parse(target)
steps = 100

In [None]:
import matplotlib.patches as patches
import matplotlib.cm as cm
import numpy as np
import seaborn as sns

tick = list(range(0,n,5))
cmap = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)

# Function to draw base pair counts for each sampling
def draw_heatmap(ax, counter, bps, dbps, new_dbps, vmax, steps=steps, cbar=True):
    # Initial count matrix
    counts = np.zeros((n,n))
    mask = np.tri(counts.shape[0], k=-1)
    for bp, count in counter.items():
        counts[bp[0]][bp[1]] = count
    # Normalization
    counts = counts/steps

    sns.heatmap(counts, vmax=vmax/steps, mask=mask, square=True, cmap=cmap, ax=ax, cbar=cbar)
    for i, j in bps:
        ax.add_patch(patches.Rectangle((j,i), 1, 1, fc="none", ec="blue", lw=1))
    for i, j in dbps:
        ax.add_patch(patches.Rectangle((j,i), 1, 1, fc="none", ec="red", lw=1))
    for i, j in new_dbps:
        ax.add_patch(patches.Rectangle((j,i), 1, 1, fc="none", ec="green", lw=1))

    ax.xaxis.tick_top()
    ax.yaxis.tick_right()
    ax.set_xticks(tick)
    ax.set_xticklabels(tick)
    ax.set_yticks(tick)
    ax.set_yticklabels(tick)

In [None]:
def cg_design_iteration():
    model = single_target_design_model(target)
    model.add_constraints(NotBPComp(i, j) for (i, j) in dbps)
    sampler = Sampler(model, lazy=True)
    if sampler.treewidth() > 10 or not sampler.is_consistent():
        return "Not found"
    ctr = Counter()
    found, sol = False, None
    for i in range(steps):
        seq = ass_to_seq(sampler.targeted_sample())
        fc = RNA.fold_compound(seq)
        mfe, mfe_e = fc.mfe()
        if fc.eval_structure(target) == mfe_e:
            found, sol = True, seq
        ctr.update(parse(mfe))
    ndbps = [x[0] for x in ctr.most_common() if x[0] not in bps]
    dbps.extend(ndbps[:2])
    if found:
        records.append((ctr, dbps[:], []))
    else:
        records.append((ctr, dbps[:], ndbps[:2]))
    return found, sol

# One can use seed() provided by infrared to reproduce the result
seed(1000)
found, records, dbps, seq = False, [], [], None
while not found: found, seq = cg_design_iteration()
print(seq)

In [None]:
to_draw = [records[i] for i in [0, 1, -1]]
vmax = max(map(lambda t: max(t[0].values()), to_draw))

In [None]:
fig, axs = plt.subplots(1,3, figsize=(27,7))
fig.tight_layout()
for i in range(3):
    ax = axs[i]
    counter, disruptive, new_dbps = to_draw[i]
    draw_heatmap(ax, counter, bps, disruptive, new_dbps, vmax, steps=100, cbar=i==2)

axs[0].set_title('First Round', y=-0.01)
axs[1].set_title('Second Round', y=-0.01)
axs[2].set_title('Final Round', y=-0.01)

plt.savefig('count_matrix.pdf', dpi=200, bbox_inches='tight')
plt.show()

## Generate stochastic optimization figure

In [None]:
import concurrent.futures

def multi_design_optimize_allsteps(steps, temp):
    res = list()
    cc, cur, curval, bestval = None, None, math.inf, math.inf
    for i in range(steps):
        model = multi_design_model(cc, cur)
        new = Sampler(model).sample()
        newval = multi_defect(ass_to_seq(new),targets,1)
        if (newval <= curval
            or random() <= exp(-(newval-curval)/temp)):
            cur, curval = new, newval
            if curval < bestval:
                best, bestval = cur, curval
        if i==0:
            ccs = model.connected_components()
            weights = [1/len(cc) for cc in ccs]
        cc = choices(ccs,weights)[0]
        
        res.append((ass_to_seq(best),bestval))
    return res

def my_multi_design_optimize_allsteps(i):
    seed(None)
    return multi_design_optimize_allsteps(5120,0.015)
with concurrent.futures.ProcessPoolExecutor() as executor:
    res = executor.map(my_multi_design_optimize_allsteps, range(48))
res = list(res)

In [None]:
the_steps = [1]+[20*(2**i) for i in range(9)]
res2 = [[r[steps-1][1] for r in res] for steps in the_steps]
#print(res2)
fig, ax = plt.subplots()
box = ax.boxplot(res2,
                 labels=the_steps,
                 patch_artist=True,
                 boxprops=dict(facecolor='lightgrey',linewidth=1.25),
                 medianprops=dict(color='blue', linewidth=1.25)
                )
ax.set_ylabel("Multi-defect")
ax.set_xlabel("Iterations")
ax.yaxis.grid(True)
plt.savefig("optimization.svg")

In [None]:
res2.T

In [None]:
def evaluate(seq):
    fc = RNA.fold_compound(seq)
    return (seq, multi_defect(seq,targets), fc.mfe(),
        [fc.eval_structure(t) for t in targets],
        f'{100*sum(x in "GC" for x in seq)/len(seq):0.1f}%'
    )

res_last = [r[-1] for r in res]

res_last = sorted(res_last,key=lambda x:x[1])

for seq,val in res_last:
    print(evaluate(seq))

In [None]:
## define multi-target design model for resampling of subsets 
def multi_design_model(subset=None, solution=None):
    n = len(targets[0])
    model = Model(n, 4)
    if subset is None: subset = set(range(n))
    for i in set(range(n))-subset:
        value = solution.values()[i]
        model.restrict_domains(i,(value,value))
    model.add_functions([GCCont(i) for i in subset], 'gc')
    for target in targets:
        s = parse(target)
        ss = [(i,j) for (i,j) in s if i in subset or j in subset]
        model.add_constraints(BPComp(i, j) for (i, j) in ss)
        model.add_functions([BPEnergy(i, j, (i-1, j+1) not in s)
            for (i,j) in ss], 'energy')
    model.set_feature_weight(-0.5, 'energy')
    model.set_feature_weight(-0.3, 'gc')
    return model

In [None]:
allbps = list()
for t in targets:
    allbps.extend(parse(t))
allbps = list(set(allbps))
steps = 25
n = len(targets[0])