In [1]:
from cogent3 import make_aligned_seqs

data = {
    "s1": "GCTCATGCCAGCTCTTTACAGCATGAGAACA--AGT",
    "s2": "ACTCATGCCAACTCATTACAGCATGAGAACAGCAGT",
    "s3": "ACTCATGCCAGCTCATTACAGCATGAGAACAGCAGT",
    "s4": "ACTCATGCCAGCTCATTACAGCATGAGAACAGCAGT",
    "s5": "ACTCATGCCAGCTCAGTACAGCATGAGAACAGCAGT",
}

nt_seqs = make_aligned_seqs(data=data, moltype="dna")
nt_seqs

0,1
,0
s2,ACTCATGCCAACTCATTACAGCATGAGAACAGCAGT
s1,G.........G...T................--...
s3,..........G.........................
s4,..........G.........................
s5,..........G....G....................


In [2]:
nt_seqs.get_translation(gc=1, incomplete_ok=True)

0,1
,0
s2,THANSLQHENSS
s1,A..S......?.
s3,...S........
s4,...S........
s5,...S.V......


In [3]:
from cogent3 import get_code

standard_code = get_code(1)
standard_code.translate("TTTGCAAAC")

'FAN'

In [4]:
from cogent3 import get_code, make_seq

standard_code = get_code(1)
seq = make_seq("ATGCTAACATAAA", moltype="dna")
translations = standard_code.sixframes(seq)
print(translations)

['MLT*', 'C*HK', 'ANI', 'FMLA', 'LC*H', 'YVS']


In [5]:
from cogent3 import get_code, make_seq

standard_code = get_code(1)
seq = make_seq("ATGCTAACATAAA", moltype="dna")
stops_frame1 = standard_code.get_stop_indices(seq, start=0)
stops_frame1

[9]

In [6]:
stop_index = stops_frame1[0]
seq[stop_index : stop_index + 3]

0,1
,0
,TAA


In [7]:
from cogent3 import get_code, make_seq

standard_code = get_code(1)
standard_code["TTT"]

'F'

In [8]:
standard_code["A"]

['GCT', 'GCC', 'GCA', 'GCG']

In [9]:
from cogent3 import get_code

standard_code = get_code(1)
standard_code["TTT"]

'F'

In [10]:
from cogent3 import get_code

standard_code = get_code(1)
standard_code["A"]

['GCT', 'GCC', 'GCA', 'GCG']

In [11]:
targets = ["A", "C"]
codons = [standard_code[aa] for aa in targets]
codons

[['GCT', 'GCC', 'GCA', 'GCG'], ['TGT', 'TGC']]

In [12]:
flat_list = sum(codons, [])
flat_list

['GCT', 'GCC', 'GCA', 'GCG', 'TGT', 'TGC']

In [13]:
from cogent3 import get_code

gc = get_code(1)
alphabet = gc.get_alphabet()
print(alphabet)

('TTT', 'TTC', 'TTA', 'TTG', 'TCT', 'TCC', 'TCA', 'TCG', 'TAT', 'TAC', 'TGT', 'TGC', 'TGG', 'CTT', 'CTC', 'CTA', 'CTG', 'CCT', 'CCC', 'CCA', 'CCG', 'CAT', 'CAC', 'CAA', 'CAG', 'CGT', 'CGC', 'CGA', 'CGG', 'ATT', 'ATC', 'ATA', 'ATG', 'ACT', 'ACC', 'ACA', 'ACG', 'AAT', 'AAC', 'AAA', 'AAG', 'AGT', 'AGC', 'AGA', 'AGG', 'GTT', 'GTC', 'GTA', 'GTG', 'GCT', 'GCC', 'GCA', 'GCG', 'GAT', 'GAC', 'GAA', 'GAG', 'GGT', 'GGC', 'GGA', 'GGG')


In [14]:
from cogent3 import make_seq

my_seq = make_seq("ATGCACTGGTAA", name="my_gene", moltype="dna")
codons = my_seq.get_in_motif_size(3)
codons

['ATG', 'CAC', 'TGG', 'TAA']

In [15]:
pep = my_seq.get_translation()
pep

0,1
,0
my_gene,MHW


In [16]:
from cogent3.core.alphabet import AlphabetError

In [17]:
from cogent3 import make_seq

seq = make_seq("ATGTGATGGTAA", name="s1", moltype="dna")

In [18]:
pep = seq.get_translation()

AlphabetError: unresolvable codon 'TGA' in s1

In [19]:
pep = seq.get_translation(include_stop=True)
pep

0,1
,0
s1,M*W*
