In [1]:
#writing functions
#defining a function to calculate at content
#def defines the name of function(name of argument variables):
def get_at_content(dna):
    #describes actions of the function
    length=len(dna)
    a_count=dna.count('A')
    t_count=dna.count('T')
    at_content=(a_count+t_count)/length
    #execution of the sequence (when function is called)
    return at_content
get_at_content("ATGACTGGACCA")

0.5

In [2]:
at_content=get_at_content("ATGACTGGACCA")

In [3]:
print("AT content is "+str(get_at_content("ATGACTGGACCA")))

AT content is 0.5


In [5]:
#variables created as part of the function cannot be accessed outside
print(a_count)

NameError: name 'a_count' is not defined

In [10]:
#program using the function
def get_at_content(dna):
    #describes actions of the function
    length=len(dna)
    a_count=dna.count('A')
    t_count=dna.count('T')
    at_content=(a_count+t_count)/length
    #execution of the sequence (when function is called)
    return at_content

my_at_content=get_at_content("ATGCGCGATCGATCGAATCG")
print(str(my_at_content))
print(get_at_content("ATGCATGCAACTGTAGC"))
print(get_at_content("aactgtagctagctagcagcgta"))

0.45
0.5294117647058824
0.0


In [11]:
#fixing function to round to 2 decimal places always + to take upper and lowercase
def get_at_content(dna):
    length=len(dna)
    a_count=dna.upper().count('A')
    t_count=dna.upper().count('T')
    at_content=(a_count+t_count)/length
    return round(at_content,2)

my_at_content=get_at_content("ATGCGCGATCGATCGAATCG")
print(str(my_at_content))
print(get_at_content("ATGCATGCAACTGTAGC"))
print(get_at_content("aactgtagctagctagcagcgta"))

0.45
0.53
0.52


In [12]:
#adding sigfig customization to function
def get_at_content(dna,sig_figs):
    length=len(dna)
    a_count=dna.upper().count('A')
    t_count=dna.upper().count('T')
    at_content=(a_count+t_count)/length
    return round(at_content,sig_figs)

test_dna="ATGCATGCAACTGTAGC"
print(get_at_content(test_dna,1))
print(get_at_content(test_dna,2))
print(get_at_content(test_dna,3))

0.5
0.53
0.529


In [15]:
#functions are very useful because they allow for encapsulation-- multiple independently working parts that can each be understood individually and then called together to perform complex actions
#functions do not always have to take arguments
def get_number():
    return 42
#these are not very useful though
#this seems useful at first, but the two code portions are not independent anymore-- function requires a variable to be defined before it is called
#more useful to to ID which variables from outside a function are being used inside, then turn them into arguments
def get_at_content():
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, 2)

dna = "ACTGATCGATCG"
print(get_at_content())

0.5


In [16]:
#functions do not have to 'return' a value
def print_at_content(dna):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    print(str(round(at_content, 2)))

#this function is less flexible than one which returns output though (would have to be modified to do anything other than print)



In [18]:
#functions can be called with named arguments (doesnt rely on order of arguments)
def get_at_content(dna,sig_figs):
    length=len(dna)
    a_count=dna.upper().count('A')
    t_count=dna.upper().count('T')
    at_content=(a_count+t_count)/length
    return round(at_content,sig_figs)

get_at_content("ATCGTGACTCG",2)
get_at_content(dna="ATCGTGACTCG",sig_figs=2)
get_at_content(sig_figs=2,dna="ATCGTGACTCG")
get_at_content("ATCGTGACTCG",sig_figs=2)

0.45

In [19]:
#but you cannot start with keyword argument and then switch to normal
get_at_content(dna="ATCGTGACTCG",2)

SyntaxError: positional argument follows keyword argument (<ipython-input-19-7c75c2aa9c1c>, line 2)

In [22]:
#default values can be specified with def(argument_name=default):
def get_at_content(dna,sig_figs=2):
    length=len(dna)
    a_count=dna.upper().count('A')
    t_count=dna.upper().count('T')
    at_content=(a_count+t_count)/length
    return round(at_content,sig_figs)

get_at_content("ATCGTGACTCG")

0.45

In [23]:
get_at_content("ATCGTGACTCG", sig_figs=4)

0.4545

In [24]:
get_at_content("ATCGTGACTCG", 3)

0.455

In [25]:
#Python has assert function to test  functions
#format is: 
#assert name_of_function("argument")==expected_result
assert get_at_content("ATGC")==0.5
#AssertionError is result if function result does not match

In [26]:
#can test for whether a function is useful in a given situation or needs editing
#ex: calculating AT content while excluding unknown bases from the calculation
assert get_at_content("ATGCNNNN")==0.5

AssertionError: 

In [28]:
#to fix this, replace N characters (unknown bases) with nothing
def get_at_content(dna,sig_figs=2):
    dna=dna.replace('N','')
    length=len(dna)
    a_count=dna.upper().count('A')
    t_count=dna.upper().count('T')
    at_content=(a_count+t_count)/length
    return round(at_content,sig_figs)

assert get_at_content("ATCGNNNNNN")==0.5

In [29]:
#assertions can be grouped to test for correct behavior on different input types
assert get_at_content("A") == 1
assert get_at_content("G") == 0
assert get_at_content("ATGC") == 0.5
assert get_at_content("AGG") == 0.33
assert get_at_content("AGG", 1) == 0.3
assert get_at_content("AGG", 5) == 0.33333