## AlignIO - The module for multiple sequence alignments

read more about AlignIO:
http://biopython.org/wiki/AlignIO

In [None]:
from Bio import AlignIO

# read alignments (analogy with SeqIO)
alignment = AlignIO.read('rab20_ncbi.aln', 'fasta')

In [None]:
# alignment length
alignment.get_alignment_length()

In [None]:
# print type of the selected object
type(alignment)

In [None]:
# slicing first six sequences from position 79 to 89
my_new_little_friend = alignment[:6, 80:90]

In [None]:
print(my_new_little_friend)

In [None]:
# first column for first 6 sequences
print(alignment[:6, 0])

In [None]:
# 80. column for all sequences
alignment[:,80]

In [None]:
# first 3 sequences from 5. line
print(alignment[:3, 5:])

In [None]:
# making list from alignments object
x = list(alignment)

In [None]:
# first SeqRecord from list
# první SeqRecord z listu
x[0]

In [None]:
# sequence of first SeqRecord from list
print(x[0].seq)

In [None]:
# prints three most abundant amino acids from position 100-109
from collections import Counter

for i in range(100,110):
    counter = Counter(list(alignment[:, i]))
    print("column number: ", i, counter.most_common(3))

In [None]:
# Using AlingIO to convert alignment from one format to another

from Bio import AlignIO

input_handle = open("rab20_ncbi.sth", "r")
output_handle = open("rab20_ncbi.phy", "w")

alignment = AlignIO.parse(input_handle, "stockholm")
AlignIO.write(alignment, output_handle, "phylip-relaxed")

output_handle.close()
input_handle.close()

In [None]:
# Easy way how to count how many positions are conserved
# in more than 95% of sequences
from collections import Counter
from Bio import AlignIO
import pandas as pd

alignment = AlignIO.read('rab20_ncbi.aln', 'fasta')
aln_length = alignment.get_alignment_length()

total = 0
for i in range(aln_length):
    counted = dict(Counter(alignment[:, i]))
    counted_series = pd.Series(counted)
    if (counted_series.max()/len(alignment) > 0.95) and (counted_series.idxmax() != '-'):
        #print(i+1, alignment[:,i] + '\n')
        total += 1
print(round(total/aln_length, 2))