/
AA-count-nostop.py
29 lines (19 loc) · 947 Bytes
/
AA-count-nostop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/usr/local/bin/python
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
import numpy
# set some definitions
seqLen = 11
aaList = ["A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y","*"]
# Import data
records = list(SeqIO.parse("SampleName.translate.fasta", "fasta"))
# For each sequence with 11 residues, iterate through the positions, determine the amino acid identity, and add a count to the appropriate position on the frequency matrix.
frequencyMatrix = numpy.zeros(shape=(len(aaList),seqLen))
for i in range(len(records)):
if len(records[i].seq) == seqLen and records[i].seq[5] == "Y" and "*" not in records[i].seq:
for pos in range(seqLen):
for aa in range(len(aaList)):
if records[i].seq[pos] == aaList[aa]:
frequencyMatrix[aa,pos] +=1
# Save the array as a tab-delimited text file.
numpy.savetxt("SampleName.AA-count-nostop.txt", frequencyMatrix, fmt = '%i', delimiter='\t')