In [30]:
#########################################################
####
#### Tutorial: Alignment with Infrared (for Developers)
####
#########################################################

import infrared as ir
from infrared import Model, BoltzmannSampler, def_function_class, def_constraint_class
import infrared.rna as rna

In [49]:
# start by defining classes for representing and scoring alignments

class Alignment:
    def __init__( self, assignment, a, s, b ):
        self._a = a
        self._s = s
        self._b = b
        self._assignment = assignment
        self._values = assignment.values()
        self._edges = self._values_to_edges(self.values)
    
    @property
    def values(self):
        return self._values

    @property
    def edges(self):
        return self._edges

    @staticmethod 
    def _values_to_edges(values):
        edges = list()
        for x,x1 in zip(values,values[1:]):
            edges.append(x1-1 if x<x1 else -1)
        edges = edges[:-1]
        return (edges)
    
    def _alignment_strings(self):
        a = list()
        s = list()
        b = list()
        
        
        for i,(x,x1) in enumerate(zip(self._values,self._values[1:])):
            if x<x1:
                for j in range(x,x1-1):
                    a.append('-')
                    s.append('-')
                    b.append(self._b[j])
                if i<len(self._a):
                    a.append(self._a[i])
                    s.append(self._s[i])
                    b.append(self._b[x1-1])
            if x==x1:
                if i<len(self._a):
                    a.append(self._a[i])
                    s.append(self._s[i])
                    b.append('-')
        
        return ["".join(s),"".join(a),"".join(b)]
    
    def __str__(self):
        alignment = self._alignment_strings()
        return "\n".join(alignment)

def is_compl(x,y):
    return (x+y) in ["AU","CG","GC","GU","UA", "UG"]
    
class AliScore():
    def __init__(self, a, b):
        self._a = a
        self._b = b
    def gamma( self, k = 1 ):
        return -2*k
    def sigma( self, x, y ):
        return 4 if self._a[x-1]==self._b[y-1] else -1
    def psi( self, xi, xj ):
        return 10 if is_compl(self._b[xi-1],self._b[xj-1]) else 0

In [59]:
## first, define the problem-specific types/classes of constraints and functions

## constraints and functions use var to translate named variables to internal indices

def_constraint_class( 'LeqConstraint', lambda i,var: var([('X',i-1),('X',i)]),
                      lambda x1,x: x1<=x )

def_constraint_class( 'EqConstraint', lambda i,c,var: var([('X',i)]), lambda x,c: x==c )

def_constraint_class( 'XYRelation', lambda i,var: var([('X',i-1),('X',i),('Y',i)]),
                      lambda x1,x,y: (x1<x and y==1) or (x1==x and y==0) )

## no weights in the functions definitions
def_function_class( 'SigmaFunction', lambda i,score,var: var([ ('X',i), ('Y',i) ]),
                     lambda x,y,i,score: score.sigma(i,x) if y==1 else 0 )

def_function_class( 'InsertFunction', lambda i,score,var: var([('X',i-1),('X',i)]),
                    lambda x1,x,score: score.gamma(x-x1) )

def_function_class( 'DeleteFunction', lambda i,score,var: var([('Y',i)]),
                    lambda y,score: score.gamma(1) if y==0 else 0 )

def_function_class( 'PsiFunction', lambda i,j,score,var: var([('X',i),('Y',i),('X',j),('Y',j)]),
                    lambda xi,yi,xj,yj,score: score.psi(xi,xj) if yi==1 and yj==1 else 0 )



In [70]:
## use constraints and functions to define the alignment model for a specific instance

## the instance
#a = "ACUUG"
#s = "([.)]"
#b = "AGGAUC"

a = "CGCCAAUAAUAGGGUUUAU"
s = "(.(([[.[{{.)))]]]}}"
b = "GCGCAAACAAGCGAAUUUUU"

n = len(a)
m = len(b)


bps = rna.parseRNAStructureBps(s)
print(bps)
score = AliScore( a, b )

model = Model()

# X_i's encode alignments, such that 
#  * X_i=j for alignment edges (i,j) (1-based pos indices!) and
#  * X_i==X_i-1 if X_i is deleted
#  * X_0 = 0
#  * X_n+1=m+1
model.add_variables( 1, 1, name = 'X' )
model.add_variables( n, m+1, name = 'X' )
model.add_variables( 1, m+2, name = 'X' )

# Y_i is 1 iff i is matched
model.add_variables( n+1, 2, name = 'Y' )

var = lambda vs: model.idx(vs)

model.add_constraints( LeqConstraint( i, var ) for i in range(1,n+2) )
model.add_constraints( XYRelation( i, var ) for i in range(1,n+1) )
model.add_constraints( EqConstraint( n+1, m+1, var ) )

model.add_functions( [ SigmaFunction( i, score, var )
                       for i in range( 1, n+1 ) ], group = 'sigma' )
model.add_functions( [ InsertFunction( i, score, var )
                       for i in range( 1, n+2 ) ], group = 'indels' )
model.add_functions( [ DeleteFunction( i, score, var )
                       for i in range( 1, n+1 ) ], group = 'indels')

model.add_functions( [ PsiFunction(i+1, j+1, score, var) for (i,j) in bps ], group = 'psi' )

#### not yet available, potential future features/syntax
## optimizer for the model
#alioptimizer = ArcticOptimizer( model )
#
#solution = alioptimizer.optimum()
#score = model.eval_feature( solution )
#print("Score", score )
#print( Alignment( solution, a, b ) )
#
# get suboptimal solutions from optimizer 
#suboptimals = alioptimizer.suboptimals( threshold = score - 2 )


# sampler for the model
alisampler = BoltzmannSampler( model )

## set weights
model.set_feature_weight(5,"sigma")
model.set_feature_weight(1,"indels")
model.set_feature_weight(5,"psi")

#### not yet available, potential future feature/syntax
# tell sampler to produce non-redundant samples
# alisampler.set_non_redundant( True )


print("Treewidth:",alisampler.treewidth())
alisampler.plot_td("treedecomp.pdf")

## and print samples
for _ in range(5):
    sample = alisampler.sample()
    print()
    sigma = model.eval_feature( sample, 'sigma' )
    indels = model.eval_feature( sample, 'indels' )
    psi = model.eval_feature( sample, 'psi' )
    print(f"Score {sigma + indels + psi} = {sigma} + {indels} + {psi}" )
    print(sample.values())
    print( Alignment( sample, a, s, b ) )

[(0, 13), (2, 12), (3, 11), (4, 16), (5, 15), (7, 14), (8, 18), (9, 17)]
Treewidth: 5

Score 76 = 44 + -48 + 80
[0, 2, 2, 3, 4, 5, 6, 6, 7, 9, 10, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
-(.(([[.[-{{.)))--]]]}}
-CGCCAAUA-AUAGGG--UUUAU
GC-GCAA-ACAA-GCGAAUUUUU

Score 76 = 44 + -48 + 80
[0, 2, 2, 3, 4, 5, 6, 6, 7, 9, 10, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
-(.(([[.[-{{.)))--]]]}}
-CGCCAAUA-AUAGGG--UUUAU
GC-GCAA-ACAA-GCGAAUUUUU

Score 76 = 44 + -48 + 80
[0, 2, 2, 3, 4, 5, 6, 6, 7, 9, 10, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
-(.(([[.[-{{.)))--]]]}}
-CGCCAAUA-AUAGGG--UUUAU
GC-GCAA-ACAA-GCGAAUUUUU

Score 76 = 44 + -48 + 80
[0, 2, 2, 3, 4, 5, 6, 6, 7, 9, 10, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
-(.(([[.[-{{.)))--]]]}}
-CGCCAAUA-AUAGGG--UUUAU
GC-GCA