In [2]:
#Defining our own function
def get_at_content(dna):
    length = len(dna)
    a_count = dna.count('A')
    t_count = dna.count('T')
    at_content = (a_count + t_count) / length
    return at_content

In [3]:
#call the newly defined function
at_content = get_at_content("ATGACTGGACCA")
print(str(at_content))

0.5


In [4]:
#small program that uses out new function
my_at_content = get_at_content("ATGCGCGATCGATCGAATCG")
print(str(my_at_content))
print(get_at_content("ATGCATGCAACTGTAGC"))
print(get_at_content("aactgtagctagctagcagcgta"))

0.45
0.5294117647058824
0.0


In [5]:
#fixing the function to better suit our needs, rounding to 2 sig figs and accounting for lowercase
def get_at_content(dna):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, 2)

In [6]:
#new results
my_at_content = get_at_content("ATGCGCGATCGATCGAATCG")
print(str(my_at_content))
print(get_at_content("ATGCATGCAACTGTAGC"))
print(get_at_content("aactgtagctagctagcagcgta"))

0.45
0.53
0.52


In [7]:
#we can also make the number of sig figs dependent on independent variables
def get_at_content(dna, sig_figs):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, sig_figs)

In [8]:
#new output with 1, 2, and 3 sigfigs
test_dna = "ATGCATGCAACTGTAGC"
print(get_at_content(test_dna, 1))
print(get_at_content(test_dna, 2))
print(get_at_content(test_dna, 3))

0.5
0.53
0.529


In [9]:
#Functions can be called with name arguments (when you cann a function you can go ahead and define the variables in a call)
get_at_content(dna="ATCGTGACTCG", sig_figs=2)
#this way it does not matter what order they go in (sif_figs=2, dna="ATCGTGACTCG" is the exact same)

0.45

In [10]:
#you can mix and match (but you cannot start with keyword arguments and end with the normal (no dna="ATCGTGACTCG", 2))
get_at_content("ATCGTGACTCG", 2)
get_at_content(dna="ATCGTGACTCG", sig_figs=2)
get_at_content("ATCGTGACTCG", sig_figs=2)

0.45

In [11]:
#we can set defaults that will occure if something else is not specified (this will default to 2 sig figs if not otherwaise specified)
def get_at_content(dna, sig_figs=2):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, sig_figs)

In [12]:
#try it out
get_at_content("ATCGTGACTCG")


0.45

In [13]:
get_at_content("ATCGTGACTCG", 3)

0.455

In [14]:
get_at_content("ATCGTGACTCG", sig_figs=4)

0.4545

In [15]:
#Testing functions so you know that they are working properly, you give it something you know to be true if your function is working
assert get_at_content('ATGC') == 0.5

In [16]:
#you can test if an un-accounted for element in your input will break the function
assert get_at_content('ATGCNNNNNNNNNNNNNNN') == 0.5

AssertionError: 

In [17]:
#it failed that test so we know we have to edit our function to account for unknown bases
def get_at_content(dna, sig_figs=2):
    dna = dna.replace('N', '')
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, sig_figs)

In [18]:
#now it should work
assert get_at_content('ATGCNNNNNNNNNNNNNNN') == 0.5

In [19]:
#you should test more than one behavior to make sure your function is working properly
assert get_at_content('A') == 1
assert get_at_content('G') == 0
assert get_at_content('ATGC') == 0.5
assert get_at_content('AGG') == 0.33
assert get_at_content('AGG', 1) == 0.3
assert get_at_content('AGG', 5) == 0.33333

In [21]:
#Exercise 1: Percentage of amino acid residues part one
def my_function(protein, aa):
    length = len(protein)
    aa_count = protein.upper().count(aa)
    percent_aa = (aa_count / length) * 100
    return(percent_aa)

In [23]:
assert my_function("MSRSLLLRFLLFLLLLPPLP", 'M') == 5

In [24]:
assert my_function("MSRSLLLRFLLFLLLLPPLP", 'r') == 10

AssertionError: 

In [25]:
def my_function(protein, aa):
    length = len(protein)
    aa = aa.upper()
    aa_count = protein.upper().count(aa)
    percent_aa = (aa_count / length) * 100
    return(percent_aa)

In [26]:
assert my_function("MSRSLLLRFLLFLLLLPPLP", 'r') == 10

In [27]:
assert my_function("MSRSLLLRFLLFLLLLPPLP", 'L') == 50

In [28]:
assert my_function("MSRSLLLRFLLFLLLLPPLP", 'Y') == 0

In [33]:
#part 2
def my_function(protein, aa_list = ['A', 'I', 'L', 'M', 'F', 'W', 'Y', 'V']):
    protein = protein.upper()
    length = len(protein)
    total = 0
    for aa in aa_list:
        aa = aa.upper()
        aa_count = protein.count(aa)
        total = total + aa_count
        percent_aa = total * 100 / length
    return(percent_aa)

In [34]:
assert my_function("MSRSLLLRFLLFLLLLPPLP", ['M']) == 5

In [35]:
assert my_function("MSRSLLLRFLLFLLLLPPLP", ['M', 'L']) == 55

In [36]:
assert my_function("MSRSLLLRFLLFLLLLPPLP", ['F', 'S', 'L']) == 70

In [37]:
assert my_function("MSRSLLLRFLLFLLLLPPLP") == 65