In [None]:
#########################################################
####
#### Tutorial: RNA Design with Infrared (for Developers)
####
#######################################################|##

###############################################
## Start with simple sampling of RNA sequences

# -- _the_ main module of Infrared
import infrared as ir

In [None]:
# -- specify a constraint network model

# our first model is _very_ simple: 20 variables (=sequence positions), no dependencies\
model = ir.Model(20,4)
# construct sampler for the Model (using default tree decomposition)
sampler = ir.Sampler(model)

In [None]:
def show_td_info(sampler,width=600):
    td = sampler.td
    print("tree width =", td.treewidth())
    print("bags =", td.bags)
    print("edges =", td.edges)
    
    tmpfile="tmp_out.png"
    sampler.plot_td(tmpfile,'png')
    from IPython.display import Image
    return Image(filename=tmpfile,width=width)

In [None]:
show_td_info(sampler)

In [None]:
# -- evaluate the cluster tree
#   -- in this simple case, this will count the structures
count = sampler.evaluate()
print("# =",int(count))
# btw, of course, 'count' is the number of all possible seqs 4**20

In [None]:
# -- generate 10 samples
samples = [sampler.sample().values() for i in range(10)]

samples

In [None]:
# -- and show them (pretty)
from infrared import rna
[rna.values_to_seq(s) for s in samples]

In [None]:
##################################################
## add constraints from an RNA secondary structure
structure = "((((...))))(((...)))"

bps = rna.parse(structure)
print(bps)

In [None]:
complementary_nucleotides = ["AU","CG","GC","GU","UA","UG"]
# -- define complementarity constraints
ir.def_constraint_class( 
    'BPComp',
    lambda i,j: [i,j],
    lambda x,y: rna.values_to_seq([x,y]) 
                  in complementary_nucleotides
)    
    
## btw, there is already a pre-defined constraint rna.BPComp,
## which we could have used as well

cons = [ BPComp( i , j ) for (i,j) in bps ]
deps = [ x.vars() for x in cons ]

deps

In [None]:
# -- reinitialize constraint model, rebuild tree decomp and cluster tree
#complementary_nucleotides = ["AU"]

seqlen = len(structure) # --> number of positions / variables in the CN
model = ir.Model(seqlen,4)
model.add_constraints(cons)

sampler = ir.Sampler(model)

show_td_info(sampler)

In [None]:
# generate samples (with complementarity constraints)
def spit_them_samples_out(sampler,num):
    samples = [ sampler.sample() for i in range(num) ]
    return [ rna.ass_to_seq(s) for s in samples ]

count = sampler.evaluate()
print("# =",int(count))
# btw, count == 6**7 * 4**6, by simple combinatorics

print("  "+structure)
spit_them_samples_out(sampler,10)

In [None]:
##########################
## Control the GC content

# -- define function for GC Control
ir.def_function_class(
    'GCCont',
    lambda i: [i],
    lambda x: rna.value_to_nucleotide( x ) in "GC"
)

## btw, there is predefined rna.GCControl

In [None]:
## -- setup functions
gc_weight = 1 ## <- try different weights: 0.1, 10, ...

gc_funs = [ GCCont( i )
              for i in range( seqlen ) ]

model.add_functions(gc_funs, 'gc')

# -- reinitialize sampler
sampler = ir.Sampler( model )

spit_them_samples_out( sampler, 10 )

In [None]:
##########################
## Control the BP energy

rna.set_bpenergy_table() # set bp energies to magic numbers

bpe_funs = [ rna.BPEnergy( i, j, False ) for (i,j) in bps ] 

model.add_functions(bpe_funs, 'energy')

model.set_feature_weight(0, 'gc')
model.set_feature_weight(0, 'energy')

sampler = ir.Sampler(model)


print("  "+structure)
spit_them_samples_out(sampler, 10)

In [None]:
### as example of additional hard constraints: avoid GG dinucleotides

ir.def_constraint_class('AvoidGGConstraint',
                         lambda i: [i, i+1],
                         lambda x,y: rna.values_to_seq([x,y]) != "GG")

gg_cons = [ AvoidGGConstraint( i ) for i in range(seqlen-1) ]

In [None]:
model.add_constraints(gg_cons)

sampler = ir.Sampler(model)

print("  "+structure)
spit_them_samples_out(sampler, 10)

In [None]:
# IncARNation-like model and sampler

model =  ir.Model(seqlen, 4)
model.add_constraints(cons)
model.add_functions(bpe_funs, 'energy')
model.add_functions(gc_funs, 'gc')

model.set_feature_weight(-5,'energy')

sampler = ir.Sampler( model )

sampler.set_target( -12, 1, 'energy' )
sampler.set_target( 10, 2, 'gc' )

# -- and sample away

for i in range(10):
    sample = sampler.targeted_sample()
    print("{} {:.2f} {:.2f}".format(rna.ass_to_seq(sample), 
                                    model.eval_feature(sample,'energy'),
                                    model.eval_feature(sample,'gc')))

In [None]:
## similar but with control of Turner energy
##
## ATTENTION: this requires the Vienna RNA package with working Python bindings 
## (currently, this fails in Windows even after installing the package from binaries)
import RNA

model.add_feature( 'Energy', # feature name
                   'energy', # controlled group(s)
                   #
                   # function to evaluate the feature for a sample;
                   # NOTE how we have to bind i
                   lambda sample, structure=structure:
                      RNA.energy_of_struct( rna.ass_to_seq( sample ),
                                            structure )
                 )

sampler = ir.Sampler(model)

sampler.set_target( -5, 1, 'Energy' )
sampler.set_target( 10, 2, 'gc' )

# -- and sample away

samples = list()
for i in range(20):
    sample = sampler.targeted_sample()
    print("{} {:5.2f} {:5.2f} {:5.2f}".format(rna.ass_to_seq(sample), 
                                    model.eval_feature(sample,'energy'),
                                    model.eval_feature(sample,'Energy'),
                                    model.eval_feature(sample,'gc')))
    samples.append(rna.ass_to_seq(sample))

In [None]:
# Sequence logos
def draw_logo(sequences):
    import logomaker as lm
    import matplotlib.pyplot as plt
    
    matrix = lm.alignment_to_matrix(sequences = sequences)
    logo = lm.Logo(matrix)
    logo.style_xticks(rotation=90, fmt='%d', anchor=0)
    logo.ax.xaxis.set_ticks_position('none')
    plt.savefig('test.svg')
    plt.show()
#    logo.ax.xaxis.set_tick_params(pad=-1)

def opt_draw_logo(sequences):
    try:
        draw_logo(samples)
    except ModuleNotFoundError:
        pass

In [None]:
opt_draw_logo(samples)

In [None]:
## add iupac sequence constraints and sample again
sequence = "RSSSUWWSSNNSNNNNMNYR"

for i,x in enumerate(sequence):
    model.add_constraints(ir.ValueIn(i,rna.iupacvalues(x)))

sampler = ir.Sampler(model)

sampler.set_target( -5, 1, 'Energy' )
sampler.set_target( 10, 2, 'gc' )

# -- and sample away

samples = list()
for i in range(20):
    sample = rna.ass_to_seq(sampler.targeted_sample())
    print(sample)
    samples.append(sample)
    
opt_draw_logo(samples)

In [None]:
## Samplers from the same model can be used in parallel

## for demonstration, produce K samplers of the model
## with different target energies

K = 3

samplers = list()

model.set_feature_weight( 0, 'Energy' )

for i in range(K):
    samplers.append( ir.Sampler(model) )

for k,sampler in enumerate(samplers):
    sampler.set_target( -3*k, 0.01, 'Energy' )

for i in range(5):
    for k,sampler in enumerate(samplers):
        sample = sampler.targeted_sample()
        print("target={:2} {} {:5.2f}".format(sampler.model.features['Energy'].target,
                                              rna.ass_to_seq(sample), 
                                              model.eval_feature(sample,'Energy')))
        
for k,sampler in enumerate(samplers):
    print(f"weight_{k} = {sampler.model.features['Energy'].weight}")