In [92]:
from Bio import AlignIO
from Bio.Alphabet import single_letter_alphabet
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment

In [93]:
aln_file1 = '/home/sur/micropopgen/exp/2018/today9/filtered/dnaG.aln'
aln_file2 = '/home/sur/micropopgen/exp/2018/today9/filtered/nusA.aln'
aln1 = AlignIO.read(aln_file1, 'fasta')
aln2 = AlignIO.read(aln_file2, 'fasta')

In [94]:
print(aln1)
print(aln2)

SingleLetterAlphabet() alignment with 3 rows and 547 columns
MYKLSEDEEKIINNIDIVDLIGQYVDNKAVSYKGYSKSENTSHP...VLK Streptobacillus_moniliformis_61686.CDS_0
-VIDQQTRQRILDSAQILDVVSDFISRRQVSYVGLCHSDRNYNP...TII Porphyromonas_sp_57899.CDS_0
HFLPPEFLDELRQRITLSSLIGRTVKTRAREFRACCHNEKTYND...VDE Zymomonas_mobilis_57566.CDS_0
SingleLetterAlphabet() alignment with 2 rows and 398 columns
-----MKKTTSLIELLEYQELSNTLAVDSLRNVLSKMFSDDTFD...--- Porphyromonas_sp_57899.CDS_0
MASAISNALLAIADV---ARELAIVEAEAIQRAARARYAENDIR...EKA Zymomonas_mobilis_57566.CDS_0


In [95]:
alns = [aln1[:,0:10], aln2[:,0:10]]
species = []
species_per_aln = []
for a in alns:
    specs = [r.id for r in a]
    species.extend(specs)
    species_per_aln.append(specs)

species = list(set(species))
print(species)
print(species_per_aln)

['Streptobacillus_moniliformis_61686.CDS_0', 'Porphyromonas_sp_57899.CDS_0', 'Zymomonas_mobilis_57566.CDS_0']
[['Streptobacillus_moniliformis_61686.CDS_0', 'Porphyromonas_sp_57899.CDS_0', 'Zymomonas_mobilis_57566.CDS_0'], ['Porphyromonas_sp_57899.CDS_0', 'Zymomonas_mobilis_57566.CDS_0']]


In [98]:
def reorder_alignment(aln, specs, species, gap='-'):
    """Take an alignment and reorder it acording to species list.
    Add records as gapped if missing"""
    
    new_aln = []
    missing_seq = seq = ''.join([gap] * aln.get_alignment_length())
    for s in species:    
        # Check if species exist in alignment
        try:
            i = specs.index(s)
        except ValueError:
            i = -1
        except:
            raise ValueError

        if i >= 0:
            new_aln.append(aln[i])
        elif i == -1:
            new_aln.append(SeqRecord(Seq(missing_seq, single_letter_alphabet), id=s))

    new_aln = MultipleSeqAlignment(new_aln)
    
    return(new_aln)

SingleLetterAlphabet() alignment with 3 rows and 0 columns
 Streptobacillus_moniliformis_61686.CDS_0
 Porphyromonas_sp_57899.CDS_0
 Zymomonas_mobilis_57566.CDS_0


In [107]:
# Create empty alignmet
new_aln = MultipleSeqAlignment([SeqRecord(Seq('', single_letter_alphabet), id=s) for s in species])
# new_alns = []
for i in range(len(alns)):
    print("alginment", i)
    specs = species_per_aln[i]
    if specs != species:
        print("\treordering")
        # new_alns.append(reorder_alignment(aln=alns[i], specs=specs, species=species))
        new_aln = new_aln + reorder_alignment(aln=alns[i], specs=specs, species=species)
    else:
        print("matched")
        new_aln = new_aln + alns[i]

alginment 0
matched
alginment 1
	reordering


In [108]:
print(new_alns[0])
print(new_alns[1])

SingleLetterAlphabet() alignment with 3 rows and 10 columns
MYKLSEDEEK Streptobacillus_moniliformis_61686.CDS_0
-VIDQQTRQR Porphyromonas_sp_57899.CDS_0
HFLPPEFLDE Zymomonas_mobilis_57566.CDS_0
SingleLetterAlphabet() alignment with 3 rows and 10 columns
---------- Streptobacillus_moniliformis_61686.CDS_0
-----MKKTT Porphyromonas_sp_57899.CDS_0
MASAISNALL Zymomonas_mobilis_57566.CDS_0


In [109]:
print(new_alns[0] + new_alns[1])

SingleLetterAlphabet() alignment with 3 rows and 20 columns
MYKLSEDEEK---------- Streptobacillus_moniliformis_61686.CDS_0
-VIDQQTRQR-----MKKTT Porphyromonas_sp_57899.CDS_0
HFLPPEFLDEMASAISNALL Zymomonas_mobilis_57566.CDS_0


In [110]:
print(new_aln)

SingleLetterAlphabet() alignment with 3 rows and 20 columns
MYKLSEDEEK---------- Streptobacillus_moniliformis_61686.CDS_0
-VIDQQTRQR-----MKKTT Porphyromonas_sp_57899.CDS_0
HFLPPEFLDEMASAISNALL Zymomonas_mobilis_57566.CDS_0


In [72]:
new_aln

[SeqRecord(seq=Seq('MYKLSEDEEKIINNIDIVDLIGQYVDNKAVSYKGYSKSENTSHPVKIFKDSSKG...VLK', SingleLetterAlphabet()), id='Streptobacillus_moniliformis_61686.CDS_0', name='Streptobacillus_moniliformis_61686.CDS_0', description='Streptobacillus_moniliformis_61686.CDS_0 <unknown description>', dbxrefs=[]),
 SeqRecord(seq=Seq('-VIDQQTRQRILDSAQILDVVSDFISRRQVSYVGLCHSDRNYNPANICKCSCEG...TII', SingleLetterAlphabet()), id='Porphyromonas_sp_57899.CDS_0', name='Porphyromonas_sp_57899.CDS_0', description='Porphyromonas_sp_57899.CDS_0 <unknown description>', dbxrefs=[]),
 SeqRecord(seq=Seq('HFLPPEFLDELRQRITLSSLIGRTVKTRAREFRACCHNEKTYNDEGFYHCGCAH...VDE', SingleLetterAlphabet()), id='Zymomonas_mobilis_57566.CDS_0', name='Zymomonas_mobilis_57566.CDS_0', description='Zymomonas_mobilis_57566.CDS_0 <unknown description>', dbxrefs=[])]

In [37]:
aln = aln1

# Create dictionary of records




for i in range(len(species)):
    r = aln[i]
    spec = r.id
    print(spec)
    
    if spec != species[i]
    
    

SyntaxError: invalid syntax (<ipython-input-37-7a428d7ff3c7>, line 13)

2