Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Format; use google docstring format; fix docstring typos #2

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
221 changes: 194 additions & 27 deletions amino_acids.py
@@ -1,31 +1,198 @@
aa = ['F', 'L', 'I', 'M', 'V', 'S', 'P', 'T', 'A', 'Y',
'|', 'H', 'Q', 'N', 'K', 'D', 'E', 'C', 'W', 'R',
'G']

codons = [['TTT', 'TTC'],
['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
['ATT', 'ATC', 'ATA'],
['ATG'],
['GTT', 'GTC', 'GTA', 'GTG'],
['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
['CCT', 'CCC', 'CCA', 'CCG'],
['ACT', 'ACC', 'ACA', 'ACG'],
['GCT', 'GCC', 'GCA', 'GCG'],
['TAT', 'TAC'],
['TAA', 'TAG', 'TGA'],
['CAT', 'CAC'],
['CAA', 'CAG'],
['AAT', 'AAC'],
['AAA', 'AAG'],
['GAT', 'GAC'],
['GAA', 'GAG'],
['TGT', 'TGC'],
['TGG'],
['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
['GGT', 'GGC', 'GGA', 'GGG']]
"""
This module maps codons to amino acids.

Attributes:
aa_table (dict(str, str)):
A map from codons to amino acids. Codons are represented as strings
in IUPAC notation, e.g. 'TTT'. Amino acids are represented by their
single-letter IUPAC nucleobase abbreviations, e.g. 'F' for
phenylalanine.

Examples:

>>> aa_table['TTC']
'F'
>>> aa_table['TTA']
'L'
>>> aa_table['GGA']
'G'

References:
* DNA codon table: https://en.wikipedia.org/wiki/DNA_codon_table
* IUPAC notation: https://en.wikipedia.org/wiki/Nucleic_acid_notation

"""

# amino acids, in the same order as in `codons`. This is used to construct
# `aa_table`.
amino_acids = [
'F', 'L', 'I', 'M', 'V', 'S', 'P', 'T', 'A', 'Y', '|', 'H', 'Q', 'N', 'K',
'D', 'E', 'C', 'W', 'R', 'G']

# A list of lists of codons, in the same order as `amino_acids`.
codons = [
['TTT', 'TTC'],
['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
['ATT', 'ATC', 'ATA'],
['ATG'],
['GTT', 'GTC', 'GTA', 'GTG'],
['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
['CCT', 'CCC', 'CCA', 'CCG'],
['ACT', 'ACC', 'ACA', 'ACG'],
['GCT', 'GCC', 'GCA', 'GCG'],
['TAT', 'TAC'],
['TAA', 'TAG', 'TGA'],
['CAT', 'CAC'],
['CAA', 'CAG'],
['AAT', 'AAC'],
['AAA', 'AAG'],
['GAT', 'GAC'],
['GAA', 'GAG'],
['TGT', 'TGC'],
['TGG'],
['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
['GGT', 'GGC', 'GGA', 'GGG']
]

# create a dictionary lookup table for mapping codons into amino acids
# See Think Python Ch. 11
# http://greenteapress.com/thinkpython/html/thinkpython012.html
# http://greenteapress.com/thinkpython/html/thinkpython012.html
aa_table = {}
for i in range(len(aa)):
for i in range(len(amino_acids)):
for codon in codons[i]:
aa_table[codon] = aa[i]
aa_table[codon] = amino_acids[i]

# Use `enumerate` to avoid having to compute the size of the list,
# and then select each of its items as a separate operation.
# `enumerate` produces a sequence of (index, item) pairs that
# `i` and `nucleotide_codons` bind to.
# See: https://docs.python.org/3/library/functions.html#enumerate
aa_table = {}
for i, nucleotide_codons in enumerate(codons):
for codon in nucleotide_codons:
aa_table[codon] = amino_acids[i]

# `zip` produces a sequence of pairs: a pair that contains the first item from
# each of `codons` and `amino_acids`, then the second item from each, and so
# on.
# See: https://docs.python.org/3/library/functions.html#zip
aa_table = {}
for nucleotide_codons, amino_acid in zip(codons, amino_acids):
for codon in nucleotide_codons:
aa_table[codon] = amino_acid

# instead of creating an empty dictionary and then filling it, use a
# dictionary comprehension to create the dictionary in one swell foop.
aa_table = {codon: aa
for aa_codons, aa in zip(codons, amino_acids)
for codon in aa_codons}

# instead of including `codons` and `amino_acids` as above, read
# the map from a CSV ("commma-separated values") file.
# The file looks like this:
# AAA,K
# AAC,N
# AAG,K
# etc.
aa_table = dict(line.strip().split(',')
for line in open('codon_nucleotides.csv').readlines())



def fn(a, b):
if test():
return


#!/usr/bin/env python3


# The Pandas library can read CSV and Excel tables, and manipulate tables
# within a Python program. It's overkill for this purpose, but
import doctest

# very powerful in general.
# See: http://pandas.pydata.org
import pandas as pd

aa_table = pd.read_csv('codon_nucleotides.csv', header=None, index_col=0)[1]

# The previous solution, unlike all the others, doesn't actually create
# a Python dict. It creates a (Pandas) Series, which can be used the same
aa_table = dict(pd.read_csv('codon_nucleotides.csv',
header=None, index_col=0)[1])
# do this instead:
aa_table = dict(pd.read_csv('codon_nucleotides.csv',
header=None, index_col=0)[1])

# Finally, we could just list out the key-value pairs in the dictionary.
aa_table = {
'AAA': 'K',
'AAC': 'N',
'AAG': 'K',
'AAT': 'N',
'ACA': 'T',
'ACC': 'T',
'ACG': 'T',
'ACT': 'T',
'AGA': 'R',
'AGC': 'S',
'AGG': 'R',
'AGT': 'S',
'ATA': 'I',
'ATC': 'I',
'ATG': 'M',
'ATT': 'I',
'CAA': 'Q',
'CAC': 'H',
'CAG': 'Q',
'CAT': 'H',
'CCA': 'P',
'CCC': 'P',
'CCG': 'P',
'CCT': 'P',
'CGA': 'R',
'CGC': 'R',
'CGG': 'R',
'CGT': 'R',
'CTA': 'L',
'CTC': 'L',
'CTG': 'L',
'CTT': 'L',
'GAA': 'E',
'GAC': 'D',
'GAG': 'E',
'GAT': 'D',
'GCA': 'A',
'GCC': 'A',
'GCG': 'A',
'GCT': 'A',
'GGA': 'G',
'GGC': 'G',
'GGG': 'G',
'GGT': 'G',
'GTA': 'V',
'GTC': 'V',
'GTG': 'V',
'GTT': 'V',
'TAA': '|',
'TAC': 'Y',
'TAG': '|',
'TAT': 'Y',
'TCA': 'S',
'TCC': 'S',
'TCG': 'S',
'TCT': 'S',
'TGA': '|',
'TGC': 'C',
'TGG': 'W',
'TGT': 'C',
'TTA': 'L',
}
'TTG': 'L',
'TTT': 'F'
}


doctest.testmod()