# Affine protein bi-alignment

## Figures for the manuscript "Bi-Alignments with Affine Gaps Costs"

In [None]:
import bialignment
import bialignment as ba
import timeit
import os

figuresdir = "../Figs"
examplesdir = "../Examples"

### Introductory Example

In [None]:
args = {'type': 'Protein',
        'gap_cost': -50,
        'gap_opening_cost': -200,
        'shift_cost': -210,
        'structure_weight': 800,
        'max_shift': 1,
        'simmatrix': 'BLOSUM62',
        'nameA': 'Ecoli',
        'nameB': 'Xanthomonas',
        'nodescription': False,
        'outmode': 'full'
       }

nameA = 'A'
nameB = 'B'
strA = "CHHHHHHHHHHHHHCCCCTCEEEEEEECCTCEEEEEEEECCC"
seqA = "RAKLPLKEKKLTATANYHPGIRYIMTGYSAKYIYSSTYARFR"
seqB = "KAKLPLKEKKLTRTANYHPGIRYIMTGYSAKRIYSSTYAYFR"
strB = "HHHHHHHHHHHHCCCCCCTCEEEEEEECCCCCEEEEEEEECC"

ba.plot_alignment([(nameA, seqA), (nameB, seqB), ('',strA), ('',strB)], 80,
    name_offset=3, show_position_numbers=False, outname = os.path.join(figuresdir,"fig1A.svg"))

seqA1 = "RAKLPLKEKKLTATANYH-PGIRYIMTGYSAK-YIYSSTYARFR"
strA1 = "CHHHHHHHHHHHHHCCCC-TCEEEEEEECCTC-EEEEEEEECCC"
strB1 = "-HHHHHHHHHHHHCCCCCCTCEEEEEEECCCCCEEEEEEEECC-"
seqB1 = "-KAKLPLKEKKLTRTANYHPGIRYIMTGYSAKRIYSSTYAYFR-"

ba.plot_alignment([(nameA, seqA1), (nameB, seqB1), ('',strA1), ('',strB1)], 80,
    name_offset=3, show_position_numbers=False, outname = os.path.join(figuresdir,"fig1B.svg"))

args['nameA'] = 'A'
args['nameB'] = 'B'
args['max_shift'] = 1
args['shift_cost'] = -150
args['structure_weight'] = 800
args['gap_opening_cost'] = -150
args['gap_cost'] = -50

print(args)
print()

bialigner = ba.BiAligner(seqA, seqB, strA, strB,
                         **args)

score = bialigner.optimize()
print('SCORE',score)
print()

alilines = list(bialigner.decode_trace_full())
for i,line in enumerate(alilines):
    print(f"{i:2} {line[0]:18} {line[1]}")
    

ba.plot_alignment(alilines, 80, show_position_numbers=False,
    name_offset=3, outname = os.path.join(figuresdir,"fig1-shift.svg"))

### DNA Polymerase 1

In [None]:
import bialignment
import bialignment as ba
import timeit

args = {'type': 'Protein',
        'gap_cost': -50,
        'gap_opening_cost': -200,
        'shift_cost': -210,
        'structure_weight': 800,
        'max_shift': 1,
        'simmatrix': 'BLOSUM62',
        'nameA': 'Ecoli',
        'nameB': 'Xanthomonas',
        'nodescription': False,
        'outmode': 'full'
       }

inputfiles = ['DNAPolymerase1_Escherichia.cfssp', 'DNAPolymerase1_Xanthomonas.cfssp']
inputfiles = [os.path.join(examplesdir,f) for f in inputfiles]

input = [ ba.read_molecule_from_file(f, type="Protein") for f in inputfiles ]

# optionally, truncate input
for x in input:
    for i in range(2):
        x[i] = x[i][:] # define how to truncate here

print(len(input[0][0]))
print(len(input[1][0]))
#print(input)

In [None]:
remake = False
%store -r stored_alilines
try:
    print(stored_alilines.keys())
except:
    stored_alilines = dict()


In [None]:
for ms in range(3):
    if not remake and (f'max_shift {ms}') in stored_alilines:
        continue
        
    args["max_shift"] = ms

    bialigner = ba.BiAligner(input[0][0],input[1][0],
                             input[0][1],input[1][1], 
                             **args)

    score = timeit.timeit(lambda:bialigner.optimize(),number=1)
    print(score)
    als = list(bialigner.decode_trace_full())
    for i,line in enumerate(alilines):
        print(f"{i:2} {line[0]:12} {line[1]}")

    stored_alilines[(f'max_shift {ms}')] = als
%store stored_alilines

In [None]:
alilines = stored_alilines['max_shift 2']

aliblocks = ba.breaklines(alilines, 80)
for block in aliblocks:
    for i,(name,aliline) in enumerate(block):
        print(f"{i:2} {name:18} {aliline}")
    print()   

In [None]:
for s in range(3):
    alilines = stored_alilines[f'max_shift {s}']
    ba.plot_alignment(alilines, 80, outname=os.path.join(figuresdir,f"dnapoly1-ms{s}-sc-210-sw800.svg"))