In [2]:
### Chapter 5

In [6]:
## Within chapter exercises

# Creating a get_at_content function
def get_at_content(dna):
    length = len(dna)
    a_count = dna.count('A')
    t_count = dna.count('T')
    at_content = (a_count + t_count) / length
    return at_content

In [7]:
# Running (calling) the function
get_at_content("ATGACTGGACCA")

0.5

In [9]:
# Storing the result of the function as a variable
at_content = get_at_content("ATGACTGGACCA")

# Printing the result of the function
print("AT content is " + str(get_at_content("ATGACTGGACCA")))

AT content is 0.5


In [10]:
# Improvement 1: added rounding and uppercase/lowercase conversion
def get_at_content(dna):
    length = len(dna)
    a_count = dna.count('A')
    t_count = dna.count('T')
    at_content = (a_count + t_count) / length
    return at_content

# Print results
my_at_content = get_at_content("ATGCGCGATCGATCGAATCG")
print(str(my_at_content)) # Works
print(get_at_content("ATGCATGCAACTGTAGC")) # Too many decimal places
print(get_at_content("aactgtagctagctagcagcgta")) # Doesn't recognize lowercase

0.45
0.5294117647058824
0.0


In [12]:
# Improvement 1: added rounding and uppercase/lowercase conversion
def get_at_content(dna):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, 2)

# Print results
my_at_content = get_at_content("ATGCGCGATCGATCGAATCG")
print(str(my_at_content))
print(get_at_content("ATGCATGCAACTGTAGC"))
print(get_at_content("aactgtagctagctagcagcgta"))

0.45
0.53
0.52


In [13]:
# Improvement 2: added argument for significant figures
def get_at_content(dna, sig_figs):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, sig_figs)

# Print results
test_dna = "ATGCATGCAACTGTAGC"
print(get_at_content(test_dna, 1))
print(get_at_content(test_dna, 2))
print(get_at_content(test_dna, 3))

0.5
0.53
0.529


In [14]:
# Functions don't always have to take an argument
def get_a_number():
    return 42

# Print results
get_a_number()

42

In [15]:
# Functions don't always have to take an argument
def get_at_content():
    dna = "ACTGATGCTAGCTA"
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, 2)

# Print results
get_at_content()

0.57

In [16]:
# Excluding the variable from the function (bad)
def get_at_content():
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, 2)

# Print results
dna = "ACTGATCGATCG"
print(get_at_content())

0.5


In [17]:
# Functions don't always have to return a value
def print_at_content(dna):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    print(str(round(at_content, 2)))

In [24]:
# Functions can be called with named arguments
# Order of the arugments is key
# Keyword arguments in python3 is akin to function arguments in R
def get_at_content(dna, sig_figs=2):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, sig_figs)


print(get_at_content("ATCGTGACTCG"))
print(get_at_content("ATCGTGACTCG", 3))
print(get_at_content("ATCGTGACTCG", sig_figs=4))

0.45
0.455
0.4545


In [26]:
# Assert tool
# An assertion consists of the word assert, folllowed by a call to our function,
# then two equals signs, then the result that we expect

assert get_at_content("ATGC") == 0.5

assert get_at_content("ATGCNNNNNNNNNN") == 0.5

# True = no response
# False = AssertionError

AssertionError: 

In [28]:
# Improvement 3: remove non-ATCG characters
def get_at_content(dna, sig_figs=2):
    dna = dna.replace('N', '')
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, sig_figs)

# Test assertion
assert get_at_content("ATGCNNNNNNNNNN") == 0.5

In [29]:
# Testing different assertions
assert get_at_content("A") == 1
assert get_at_content("G") == 0
assert get_at_content("ATGC") == 0.5
assert get_at_content("AGG") == 0.33
assert get_at_content("AGG", 1) == 0.3
assert get_at_content("AGG", 5) == 0.33333

In [95]:
### End of chapter exercises
## Percentage of amino acid residues, part one

# Write a function that takes two arguments – a protein sequence and an amino acid residue code –
# and returns the percentage of the protein that the amino acid makes up.

# Save protein sequence and code for tests
protein = "MSRSLLLRFLLFLLLLPPLP"
code = "M"

# Count the number of amino acid occurences
code_count = protein.count(code)
print("The number of amino acid occurences is: " + str(code_count))

# Calculate the length of the protein sequence
protein_length = len(protein)
print("The protein sequence length is: " + str(protein_length))

# Calculate the percentage of amino acid occurences in the protein sequence
protein_per = code_count * 100 / protein_length
print("The percent of " + str(code) + " in the protein sequence " + str(protein) + " is: " + str(protein_per))

The number of amino acid occurences is: 1
The protein sequence length is: 20
The percent of M in the protein sequence MSRSLLLRFLLFLLLLPPLP is: 5.0


In [138]:
# Write function
def get_aa_residues(protein, code):
    protein = protein.upper()
    code = code.upper()
    code_count = protein.count(code)
    protein_length = len(protein)
    protein_per = code_count * 100 / protein_length
    return(protein_per)

# Test function
print(get_aa_residues(protein, code))

# Test assertions
assert get_aa_residues(protein, "M") == 5
assert get_aa_residues(protein, "r") == 10
assert get_aa_residues(protein, "L") == 50
assert get_aa_residues(protein, "Y") == 0

5.0


In [217]:
## Percentage of amino acid residues, part two

# Modify the function from part one so that it accepts a list of amino acid residues
# rather than a single one. If no list is given, the function should return the
# percentage of hydrophobic amino acid residues (A, I, L, M, F, W, Y and V).

# Save protein sequence, list of amino acids, and list of hydrophobic amino acid residues
protein = "MSRSLLLRFLLFLLLLPPLP"
code_list = ["M", "L", "F", "S"]
#code_list = ["M"]
hydro_list = ["A", "I", "L", "M", "F", "W", "Y", "V"]

# Convert protein sequence into a list
protein_list = list(protein_list)

# Test
#print(aa_list[0])
#print(protein_list[0])

# Convert list to uppercase
#protein_list = list(x.upper() for x in protein_list)

# Test
#print(protein_list[1])

# Count proteins in list
#code_count = protein_list.count(code_list[1])

# Test
#print(code_count)

# Test
#for x in protein_list:
#    if x in code_list:
#        print(x)

# Count proteins for list: version 2
#for x in protein_list:
#    code_count = protein_list.count(x)
#    print(code_count)

# Save protein sequence, list of amino acids, and list of hydrophobic amino acid residues
protein = "MSRSLLLRFLLFLLLLPPLP"
code_list = ["M", "L", "F", "S"]
#code_list = ["M"]
hydro_list = ["A", "I", "L", "M", "F", "W", "Y", "V"]

# Convert protein sequence into a list
protein_list = list(protein_list)

# Write function
def get_aa_residues2(protein_list, code_list):
    protein_list2 = list(x.upper() for x in protein_list)
    code_list2 = list(x.upper() for x in code_list)
    protein_length = len(protein_list2)
    for x in protein_list2:
        if len(code_list2) > 1:
            code_count = protein_list2.count(x)
            protein_per = code_count * 100 / protein_length
            print(protein_per)
        else:
            if x in code_list2:
                hydro_sum = protein_list2.count(x)
                hydro_per = hydro_sum * 100 / protein_length
                hydro_per = int(hydro_per)
                print(hydro_per)

## Test function
get_aa_residues2(protein_list, code_list)

5.0
10.0
10.0
10.0
50.0
50.0
50.0
10.0
10.0
50.0
50.0
10.0
50.0
50.0
50.0
50.0
15.0
15.0
50.0
15.0


In [215]:
# Test assertions
assert get_aa_residues2("MSRSLLLRFLLFLLLLPPLP", ["M"]) == 5
assert get_aa_residues2("MSRSLLLRFLLFLLLLPPLP", ['M', 'L']) == 55
assert get_aa_residues2("MSRSLLLRFLLFLLLLPPLP", ['F', 'S', 'L']) == 70
assert get_aa_residues2("MSRSLLLRFLLFLLLLPPLP") == 65

5


AssertionError: 