# This notebook will introduce you to some basic Python syntax
##   (Particularly, it will focus on the most common variable types and their built-in methods.)

# Comments!

In [None]:
# Any text followed by a '#' symbol is a comment. 
# Python ignores text included in comments
# Use comments liberally to document what your script is doing
    # Or perhaps to give instructions to others about how your script could be altered for other purposes
# You cannot have too many comments!

"""It is often taught that if you want 
to create comments that span multiple
lines, you can enclose them in triple quotes.
However, the behaviour is not exactly the same.
"""

x = """These 'comments' are still interpreted as code,
as strings actually, and can be saved to a variable"""

x = x + ", and later manipulated."

print (x)     # This line prints the contents of the variable "x" to the screen

### Tip: You can execute the code within this cell by using "Control-Enter", Shift-Enter" 
###      or pushing the Play button at the top of the notebook. At the bottom
###      of the cell, you will see the text that would appear on the terminal window
###      if this code was run within a script at the command line. 

# Strings

In [None]:
s = 'This is a string' #And it has been assigned to a variable called 's'

'This is also a string' #However, this string was NOT assigned to a variable, or printed to the screen, or written to an output file
# So, it really serves no purpose
# It's code garbage

# To send the contents of the string to the screen (i.e., terminal window), use the 'print' command
print (s)

# You can also print strings directly, without assigning them to a variable first
print ('This is also a string')

# Double and single quotes are interchangeable in Python, both can be used to create strings
print ("This is a string in double quotes") 

In [None]:
# Triple quotes can be used to generate multiline strings
print ("""This string spans
multiple lines!
""")

#However, the same can be accomplished using the \n notation
print ("This string spans\nmultiple lines too!\n")


In [None]:
# In Python, strings are immutable, which means that they cannot be altered
alpha = "abcdefghijklmnop"

# Therefore, you can't directly add the rest of the alphabet onto the string currently assigned to the variable alpha
# However, you can reassign variable names to altered versions of strings

alpha = alpha + "qrstuvwxyz"  #I know...it's a subtle distinction, but will become important later

#The above statement might seem a bit strange because we are both using and reassigning the alpha variable in the same line
#However, this works because the statement to the right of the "=" is evaluated BEFORE the variable is reassigned

print (alpha)

In [None]:
# Here is another way to accomplish the same thing with simpler syntax

alphabet = "abcdefghijklmnop"
alphabet += "qrstuvwxyz"
print(alphabet)

# Assignment 1.1

## Within the next cell, do the following:
### 1. Create a variable called *first* that contains your first name as a string 
### 2. Create a variable called *last* that contains your last name as a string 
### 3. Use print to display the content of these variables
### 4. Create a new variable called *full* that contains your full name. Do NOT retype your first and last name, rather use your existing *first* and *last* variables. And don't forget to add a space between your first and last names!
### 5. Use print to display the content of the *full* variable

# String methods

In [None]:
# Many variable types in Python have built-in methods that can be called using "dot notation"
# You can use the built-in function dir() to view the methods associated with a particular variable

seq = "nnnnnnnacgtn-gggtcgattcta---nnnnntgatagnnnnnn"    #Creating a string variable
print (dir(seq))                                         #Using dir() to get a list of the built-in string methods


In [None]:
# One built-in string method (upper()) will allow you to convert a string from lowercase to uppercase

#Remember, strings are NOT mutable, therefore, this method will NOT alter your string in place
print("seq before:", seq)
seq.upper()
print("seq after:", seq, "\n")    #Comma separated items provided to print are printed together, but separated by spaces

#Rather, if you want to save the new version, you need to save the output it to a new variable, or reassign the starting variable 
seq_upper = seq.upper() 
print ("seq:", seq, "\nseq_upper:", seq_upper, "\n")


# One nice thing is that you don't need to worry about charaters that are already uppercase, they won't be altered


In [None]:
# Another useful method allows you to quickly "strip" a certain character from the beginning and ending of a string
# Therefore, this method can be used to easily remove Ns from the beginning and ends of seqs, like we did in the first class with regular expressions 
noN_seq_upper = seq_upper.strip('N')
print ("noN_seq_upper:", noN_seq_upper, "\n")


In [None]:
# If you dont' provide an argument to strip, it will remove all leading and trailing white space, including new lines
# This is very useful when parsing through text files
test_line="   A line of text.  \n"
print ("Start Length:", len(test_line))    ### len() will return the length of a string
print ("Start String:", test_line)

stripped_line = test_line.strip()
print ("Stripped Length:", len(stripped_line))
print ("Stripped String:", stripped_line)


In [None]:
# Remember, any methods that start and end with "__" are "special" or "magic" methods that are not meant to be called directly
# For example, __getitem__ is a method that allows you to extract a subset of characters from a string

# This is how your are expected to utilize this method. We will cover this syntax next week. 
print (noN_seq_upper[0])

# However, this works exactly the same way
print (noN_seq_upper.__getitem__(0))  

#Note: to extract the first character, I used the "index" 0, all counting in Python starts at 0

# Assignment 1.2

## Within the next cell, I've assigned a long string of characters to the variable *longStr*. Please enter code within this cell to do the following:
### 1. Determine the length of this string. Make sure the value is printed to the screen. 
### 2. Replace the current contents of *longStr* with a version of this string with all uppercase characters.
### 3. Use a built-in method described in the book to count the number of N, A and U characters. Print these values to the screen.
### 4. Using another built-in method described in the book, replace all occurrances of S with N and then count the number of N characters again. Make sure this new count is also printed to the screen. 


In [None]:
longStr = "ajdiowlandjslalsjAHSISlkjhsAJKHSOlkja298737sdOAKJSKJHPakljdsf9283kljhasfHOIUHlkjhhdGHJKJLKHtredhgmbnvO213908239087IUKJLHMHHGDHFGDas;kd;jnsa234763dflkjhsadfmnfivlmn786123"




# Integers and floats

In [None]:
integer = 2               # This assigns the integer (i.e., whole number) 2 to the variable "integer"
flt = 2.0                 # This assigns the floating point number 2.0 to the variable 'flt"
string = '2.0'            # This is just a string

print (integer, flt, string, "\n")   # The print funtion can take all kinds of variables as input

#Note how flt and string are indistinguishable after printing 
#Print actually converts these different variables to strings to print

In [None]:
# You can use the type() function to check the type of any variable
print ("Output from the type() function:")
print (type(integer), type(flt), type(string), "\n")


In [None]:
# And there are functions available to converting between variable types
print ("Type conversion examples:")
print (float(integer), type(integer), type(float(integer)))   # Convert integer to floating point
print (float(string), type(string), type(float(string)))      # Or convert string to floating point
print (int(flt), type(flt), type(int(flt)))                   # Convert floating point to integer
# int(string)                                               # This would result in an error, use flt() instead
print (int('2'), type('2'), type(int('2')))                   # But this works
print ('\n')


In [None]:
# Keep in mind that methods of one variable type may return values of another type
# For example, the string method .count()
seq = "nnnnnnnacgtn-gggtcgattcta---nnnnntgatagnnnnnn"

print ("Number of n characters in seq variable:", seq.count('n'))
print ("seq variable type:", type(seq))
print ("'Number of n characters in seq variable' type:",  type(seq.count('n')))


In [None]:
# Integers and floats also have built-in methods, but most are special

print (dir(integer))
print ('\n')
print (dir(flt))


# Arithmetic

In [None]:
# Instead of using built-in methods, the most common manipulations of integers and floats will be using mathematical operators

# Most mathematical operators in python are pretty intuitive and can accept both integers and floats in the same statements

# Here we add an integer and a float
print (integer + flt)
# Here we add an two integers
print (integer + integer)

# Subtraction
print (flt - integer)

# Multiplication
print (flt*3)

#Power function
print (10**integer)

#Division
print (integer/3)
print (type(integer), type(3), type(integer/3))   #Note that division using two integers results in a floating point variable
print ("\n")

# Assignment 1.3

## The variable *zika* contains a complete Zika virus genome, though some of the nucleotide positions are ambiguous ('N').
## Calculate the following statistics for this sequence:
### 1. Percent non-ambiguous characters
### 2. Percent GC content (# G or C characters/# non-ambiguous characters\*100)

## Note: print results to the screen and be prepared for both upper case and lower case characters

In [None]:
zika = 'NNNNNNNNNNNNgTGTGAATCAGACTGCGACAGTTCGAGTTTGAAGCGAAAGCTAGCAACAGTATCAACAGGTTTTATTTTGGATTTGGAAACGAGAGTTTCTGGTCATGAAAAACCCAAAAAAGAAATCCGGAGGATTCCGGATTGTCAATATGCTAAAACGCGGAGTAGCCCGTGTGAGCCCCTTTGGGGGCTTGAAGAGGCTGCCAGCCGGACTTCTGCTGGGTCATGGGCCCATCAGGATGGTCTTGGCGATTCTAGCCTTTTTGAGATTCACGGCAATCAAGCCATCACTGGGTCTCATCAATAGATGGGGTTCAGTGGGGAAAAAAGAGGCTATGGAAATAATAAAGAAGTTCAAGAAAGATCTGGCTGCCATGCTGAGAATAATCAATGCTAGGAAGGAGAAGAAGAGACGAGGCGCAGATACTAGTGTCGGAATTGTTGGCCTCCTGCTGACCACAGCTATGGCAGCGGAGGTCACTAGACGTGGGAGTGCATACTATATGTACTTGGACAGAAACGATGCTGGGGAGGCCATATCTTTTCCAACCACATTGGGGATGAATAAGTGTTATATACAGATCATGGATCTTGGACACATGTGTGACGCCACCATGAGCTATGAATGCCCTATGCTGGATGAGGGGGTGGAACCAGATGACGTCGATTGTTGGTGCAACACGACGTCAACTTGGGTTGTGTACGGAACCTGCCATCACAAAAAAGGTGAAGCACGGAGATCTAGAAGAGCTGTGACGCTCCCCTCCCATTCCACTAGGAAGCTGCAAACGCGGTCGCAAACCTGGTTGGAATCAAGAGAATACACAAGGCACTTGATTAGAGTCGAAAATTGGATATTCAGGAACCCTGGCTTCGCGTTAGCAGCAGCTGCCATCGCTTGGCTTTTGGGAAGCTCAACGAGCCAAAAAGTCATATACTTGGTCATGATACTGCTGATTGCCCCGGCATACAGCATCAGGTGCATAGGAGTCAGCAATAGGGACTTTGTGGAAGGTATGTCAGGTGGGACTTGGGTTGATGTTGTCTTGGAACATGGAGGTTGTGTCACCGTAATGGCACAGGACAAACCGACTGTCGACATAGAGCTGGTTACAACAACAGTCAGCAACATGGCGGAGGTAAGATCCTACTGCTATGAGGCATCAATATCAGACATGGCTTCGGACAGCCGCTGCCCAACACAAGGTGAAGCCTACCTTGACAAGCAATCAGACACTCAATATGTCTGCAAAAGAACGTTGGTGGACAGAGGCTGGGGAAATGGATGTGGACTTTTTGGCAAAGGGAGCCTGGTGACATGCGCTAGGTTTGCATGCTCCAAGAAAATGACCGGGAAGAGCATCCAGCCAGAGAATCTGGAGTACCGGATAATGCTGTCAGTTCATGGCTCCCAGCACAGTGGGATGATCGTTAATGACACAGGACATGAAACTGATGAGAATAGAGCGAAGGTTGAGATAACGCCCAATTCACCAAGAGCCGAAGCCACCCTGGGGGGTTTTGGAAGCCTAGGACTTGATTGTGAACCGAGGACAGGCCTTGACTTTTCAGATTTGTATTACTTGACTATGAATAACAAGCACTGGTTGGTTCACAAGGAGTGGTTCCACGACATTCCATTACCTTGGCACGCTGGGGCAGACACCGGAACTCCACACTGGAACAACAAAGAAGCACTGGTAGAGTTCAAGGACGCACATGCCAAGAGGCAAACTGTCGTGGTTCTAGGGAGCCAAGAAGGAGCAGTTCACACGGCCCTTGCTGGAGCTCTGGAGGCTGAGATGGATGGTGCAAAGGGAAGGCTGTCCTCTGGCCACTTGAAATGTCGCCTGAAAATGGATAAACTTAGATTGAAGGGCGTGTCATACTCCTTGTGTACCGCAGCGTTCACATTCACCAAGATCCCGGCTGAAACACTGCACGGGACAGTCACAGTGGAGGTACAGTACGCAGGGACAGATGGACCTTGCAAGGTTCCAGCTCAGATGGCGGTGGACATGCAAACTCTGACCCCAGTTGGGAGGTTGATAACCGCTAACCCCGTAATCACTGAAAGCACTGAGAACTCTAAGATGATGCTGGAACTTGATCCACCATTTGGGGACTCTTACATTGTCATAGGAGTCGGGGAGAAGAAGATCACCCACCACTGGCACAGGAGTGGCAGCACCATTGGAAAAGCATTTGAAGCCACTGTGAGAGGTGCCAAGAGAATGGCAGTCTTGGGAGACACAGCCTGGGACTTTGGATCAGTTGGAGGCGCTCTCAACTCATTGGGCAAGGGCATCCATCAAATTTTTGGAGCAGCTTTCAAATCATTGTTTGGAGGAATGTCCTGGTTCTCACAAATTCTCATTGGAACGTTGCTGATGTGGTTGGGTCTGAACACAAAGAATGGATCTATTTCCCTTATGTGCTTGGCCTTAGGGGGAGTGTTGATCTTCTTATCCACAGCCGTCTCTGCTGATGTGGGGTGCTCGGTGGACTTCTCAAAGAAGGAGACGAGATGCGGTACAGGGGTGTTCGTCTATAACGACGTTGAAGCCTGGAGGGACAGGTACAAGTACCATCCTGACTCCCCCCGTAGATTGGCAGCAGCAGTCAAGCAAGCCTGGGAAGATGGTATCTGCGGGATCTCCTCTGTTTCAAGAATGGAAAACATCATGTGGAGATCAGTAGAAGGGGAGCTCAACGCAATCCTGGAAGAGAATGGAGTTCAACTGACGGTCGTTGTGGGATCTGTAAAAAACCCCATGTGGAGAGGTCCACAGAGATTGCCCGTGCCTGTGAACGAGCTGCCCCACGGCTGGAAGGCTTGGGGGAAATCGTACTTCGTCAGAGCAGCAAAGACAAATAACAGCTTTGTCGTGGATGGTGACACACTGAAGGAATGCCCACTCAAACATAGAGCATGGAACAGCTTTCTTGTGGAGGATCATGGGTTCGGGGTATTTCACACTAGTGTCTGGCTCAAGGTTAGAGAAGATTATTCATTAGAGTGTGATCCAGCCGTTATTGGAACAGCTGTTAAGGGAAGGGAGGCTGTACACAGTGATCTAGGCTACTGGATTGAGAGTGAGAAGAATGACACATGGAGGCTGAAGAGGGCCCATCTGATCGAGATGAAAACATGTGAATGGCCAAAGTCCCACACATTGTGGACAGATGGAATAGAAGAGAGTGATCTGATCATACCCAAGTCTTTAGCTGGGCCACTCAGCCATCACAATACCAGAGAGGGCTACAGGACCCAAATGAAAGGGCCATGGCACAGTGAAGAGCTTGAAATTCGGTTTGAGGAATGCCCAGGCACTAAGGTCCACGTGGAGGAAACATGTGGAACAAGAGGACCATCTCTGAGATCAACCACTGCAAGCGGAAGGGTGATCGAGGAATGGTGCTGCAGGGAGTGCACAATGCCCCCACTGTCGTTCCGGGCTAAAGATGGCTGTTGGTATGGAATGGAGATAAGGCCCAGGAAGGAACCAGAAAGCAACTTGGTAAGGTCAATGGTGACTGCAGGATCAACTGATCACATGGATCACTTCTCCCTTGGAGTGCTTGTGATTCTGCTTATGGTGCAGGAAGGGCTGAAGAAGAGAATGACCACAAAGATCATCATAAGCACATCAATGGCAGTGCTGGTAGCCATGATCCTGGGAGGATTTTCAATGAGTGACCTGGCTAAGCTTGCAATTTTGATGGGTGCCACCTTCGCGGAAATGAACACTGGAGGAGATGTAGCTCATCTGGCGCTGATAGCGGCATTCAAAGTCAGACCAGCGTTGCTGGTATCTTTCATCTTCAGAGCTAATTGGACANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNATCTCCGCCTTGGAAGGCGACCTGATGGTTCTCATCAATGGTTTTGCTTTGGCCTGGTTGGCAATACGAGCGATGGTTGTTCCACGCACTGATAACATCACCTTGGCAATCCTGGCTGCTCTGACACCACTAGCCCGGGGCACACTGCTTGTGGCGTGGAGAGCAGGCCTTGCTACTTGCGGGGGGTTTATGCTCCTCTCTCTGAAGGGAAAAGGCAGTGTGAAGAAGAACTTACCATTTGTCATGGCCCTGGGACTAACCGCTGTGAGGCTGGTCGACCCCATCAACGTGGTGGGACTGCTGTTGCTCACAAGGAGTGGGAAGCGGAGCTGGCCCCCTAGCGAAGTACTCACAGCTGTTGGCCTGATATGCGCATTGGCTGGAGGGTTCGCCAAGGCAGATATAGAGATGGCTGGGCCCATGGCCGCGGTCGGTCTGCTAATTGTCAGTTACGTGGTCTCAGGAAAGAGTGTGGACATGTACATTGAAAGAGCAGGTGACATCACATGGGAAAAAGATGCGGAAGTCACTGGAAACAGTCCCCGGCTCGATGTGGCGCTAGATGAGAGTGGTGATTTCTCCCTGGTGGAGGATGACGGTCCCCCCATGAGAGAGATCATACTCAAGGTGGTCCTGATGACCATCTGTGGCATGAACCCAGTAGCCATACCCTTTGCAGCTGGAGCGTGGTACGTATACGTGAAGACTGGAAAAAGGAGTGGTGCTCTATGGGATGTGCCTGCTCCCAAGGAAGTAAAAAAGGGGGAGACCACAGATGGAGTGTACAGAGTAATGACTCGTAGACTGCTAGGTTCAACACAAGTTGGAGTGGGAGTTATGCAAGAGGGGGTCTTTCACACTATGTGGCACGTCACAAAAGGATCCGCGCTGAGAAGCGGTGAAGGGAGACTTGATCCATACTGGGGAGATGTCAAGCAGGATCTGGTGTCATACTGTGGTCCATGGAAGCTAGATGCCGCCTGGGACGGGCACAGCGAGGTGCAGCTCTTGGCCGTGCCCCCGGAGAGAGAGCGAGGAACATCCAGACTCTGCCCGGAATATTTAAGACAAAGGATGGGGACATTGGAGCGGTTGCGCTGGATTACCCAGCAGGAACTTCAGGATCTCCAATCCTAGACAAGTGTGGGAGAGTGATAGGACTTTATGGCAATGGGGTCGTGATCAAAAATGGGAGTTATGTTAGTGCCATCACCCAAGGGAGGAGGGAGGAAGAGACTCCTGTTGAGTGCTTCGAGCCTTCGATGCTGAAGAAGAAGCAGCTAACTGTCTTAGACTTGCATCCTGGAGCTGGGAAAACCAGGAGAGTTCTTCCTGAAATAGTCCGTGAAGCCATAAAAACAAGACTCCGTACCGTGATCTTAGCTCCAACCAGGGTTGTCGCTGCTGAAATGGAGGAAGCCCTTAGAGGGCTTCCAGTGCGTTATATGACAACAGCAGTCAATGTCACCCACTCTGGAACAGAAATCGTCGACTTAATGTGCCATGCCACCTTCACTTCACGTCTACTACAGCCAATCAGAGTCCCCAACTATAATCTGTATATTATGGATGAGGCCCACTTCACAGATCCCTCAAGTATAGCAGCAAGAGGATACATTTCAACAAGGGTTGAGATGGGCGAGGCGGCTGCCATCTTCATGACCGCCACGCCACCAGGAACCCGTGACGCATTTCCGGACTCCAACTCACCAATTATGGACACCGAAGTGGAAGTCCCAGAGAGAGCCTGGAGCTCAGGCTTTGATTGGGTGACGGATCATTCTGGAAAAACAGTTTGGTTTGTTCCAAGCGTGAGGAACGGCAATGAGATCGCAGCTTGTCTGACAAAGGCTGGAAAACGGGTCATACAGCTCAGCAGAAAGACTTTTGAGACAGAGTTCCAGAAAACAAAACATCAAGAGTGGGACTTTGTCGTGACAACTGACATTTCAGAGATGGGCGCCAACTTTAAAGCTGACCGTGTCATAGATTCCAGGAGATGCCTAAAGCCGGTCATACTTGATGGCGAGAGAGTCATTCTGGCTGGACCCATGCCTGTCACACATGCCAGCGCTGCCCAGAGGAGGGGGCGCATAGGCAGGAATCCCAACAAACCTGGAGATGAGTATCTGTATGGAGGTGGGTGCGCAGAGACTGACGAAGACCATGCACACTGGCTTGAAGCAAGAATGCTCCTTGACAATATTTACCTCCAAGATGGCCTCATAGCCTCGCTCTATCGACCTGAGGCCGACAAAGTAGCAGCCATTGAGGGAGAGTTCAAGCTTAGGACGGAGCAAAGGAAGACCTTTGTGGAACTCATGAAAAGAGGAGATCTTCCTGTTTGGCTGGCCTATCAGGTTGCATCTGCCGGAATAACCTACACAGATAGAAGATGGTGCTTTGATGGCACGACCAACAACACCATAATGGAAGACAGTGTGCCGGCAGAGGTGTGGACCAGACACGGAGAGAAAAGAGTGCTCAAACCGAGGTGGATGGACGCCAGAGTTTGTTCAGATCATGCGGCCCTGAAGTCATTCAAGGAGTTTGCCGCTGGGAAAAGAGGGGCGGCTTTTGGAGTGATGGAAGCCCTGGGAACACTGCCAGGACACATGACAGAGAGATTCCAGGAAGCCATTGACAACCTCGCTGTGCTCATGCGGGCAGAGACTGGAAGCAGGCCTTACAAAGCCGCGGCGGCCCAATTGCCGGAGACCCTAGAGACCATTATGCTTTTGGGGTTGCTGGGAACAGTCTCGCTGGGAATCTTTTTCGTCTTGATGAGGAACAAGGGCATAGGGAAGATGGGCTTTGGAATGGTGACTCTTGGGGCCAGCGCATGGCTCATGTGGCTCTCGGAAATTGAGCCAGCCAGAATTGCATGTGTCCTCATTGTTGTGTTCCTATTGCTGGTGGTGCTCATACCTGAGCCAGAAAAGCAAAGATCTCCCCAGGACAACCAAATGGCAATCATCATCATGGTAGCAGTAGGTCTTCTGGGCTTGATTACCGCCAATGAACTCGGATGGTTGGAGAGAACAAAGAGTGACCTAAGCCATCTAATGGGAAGGAGAGAGGAGGGGGCAACCATAGGATTCTCAATGGACATTGACCTGCGGCCAGCCTCAGCTTGGGCCATCTATGCTGCCTTGACAACTTTCATTACCCCAGCCGTCCAACATGCAGTGACCACTTCATACAACAACTACTCCTTAATGGCGATGGCCACGCAAGCTGGAGTGTTGTTTGGTATGGGCAAAGGGATGCCATTCTACGCATGGGACTTTGGAGTCCCGCTGCTAATGATAGGTTGCTACTCACAATTAACACCCCTGACCCTAATAGTGGCCATCATTTTGCTCGTGGCGCACTACATGTACTTGATCCCAGGGCTGCAGGCAGCAGCTGCGCGTGCTGCCCAGAAGAGAACGGCAGCTGGCATCATGAAGAACCCTGTTGTGGATGGAATAGTGGTGACTGACATTGACACAATGACTATTGACCCCCAAGTGGAGAAAAAGATGGGACAGGTGCTACTCATAGCAGTAGCCGTCTCCAGCGCCATACTGTCGCGGACCGCCTGGGGGTGGGGGGAGGCTGGGGCCCTGATCACAGCGGCAACTTCCACTTTGTGGGAAGGCTCTCCGAACAAGTACTGGAACTCCTCTACAGCCACTTCACTGTGTAACATTTTTAGGGGAAGTTACTTGGCTGGAGCTTCTCTAATCTACACAGTAACAAGAAACGCTGGCTTGGTCAAGAGACGTGGGGGTGGAACAGGAGAGACCCTGGGAGAGAAATGGAAGGCCCGCTTGAACCAGATGTCGGCCCTGGAGTTCTACTCCTACAAAAAGTCAGGCATCACCGAGGTGTGCAGAGAAGAGGCCCGCCGCGCCCTCAAGGACGGTGTGGCAACGGGAGGCCATGCTGTGTCCCGAGGAAGTGCAAAGCTGAGATGGTTGGTGGAGCGGGGATNCCTGCAGCCCTATGGAAAAGTCATTGATCTTGGATGTGGCAGAGGGGGCTGGAGTTACTACGCCGCCACCATCCGCAAAGTTCAAGAAGTGAAAGGATACACAAAAGGAGGCCCTGGTCATGAAGAACCCGTGTTGGTGCAAAGCTATGGGTGGAACATAGTCCGTCTTAAGAGTGGGGTGGACGTCTTTCATATGGCGGCTGAGCCGTGTGACACGTTGCTGTGTGACATAGGTGAGTCATCATCTAGTCCTGAAGTGGAAGAAGCACGGACGCTCAGAGTCCTCTCCATGGTGGGGGATTGGCTTGAAAAAAGACCAGGAGCCTTTTGTATAAAAGTGTTGTGCCCATACACCAGCACTATGATGGAAACCCTGGAGCGACTGCAGCGTAGGTATGGGGGAGGACTGGTCAGAGTGCCACTCTCCCGCAACTCTACACATGAGATGTACTGGGTCTCTGGAGCGAAAAGCAACACCATAAAAAGTGTGTCCACCACGAGCCAGCTCCTCTTGGGGCGCATGGACGGGCCTAGGAGGCCAGTGAAATATGAGGAGGATGTGAATCTCGGCTCTGGCACGCGGGCTGTGGTAAGCTGCGCTGAAGCTCCCAACATGAAGATCATTGGTAACCGCATTGAAAGGATCCGCAGTGAGCACGCGGAAACGTGGTTCTTTGACGAGAACCACCCATATAGGACATGGGCCTACCATGGAAGCTATGAGGCCCCCACACAAGGGTCAGCGTCCTCTCTAATAAACGGGGTTGTCAGGCTCCTGTCAAAACCCTGGGATGTGGTGACTGGAGTCACAGGAATAGCCATGACCGACACCACACCGTATGGTCAGCAAAGAGTTTTCAAGGAAAAAGTGGACACTAGGGTGCCAGACCCCCAAGAAGGCACTCGTCAGGTTATGAGCATGGTCTCTTCCTGGTTGTGGAAAGAGCTAGGCAAACACAAACGGCCACGAGTCTGTACCAAAGAAGAGTTCATCAACAAGGTTCGTAGCAATGCAGCATTAGGGGCAATATTTGAAGAGGAAAAAGAGTGGAAGACTGCAGTGGAAGCTGTGAACGATCCAAGGTTCTGGGCTCTAGTGGACAAGGAAAGAGAGCACCACCTGAGAGGAGAGTGCCAGAGTTGTGTGTACAACATGATGGGAAAAAGAGAAAAGAAACAAGGGGAATTTGGAAAGGCCAAGGGCAGTCGCGCCATCTGGTATATGTGGCTAGGGGCTAGATTTCTAGAGTTCGAAGCCCTTGGATTCTTGAACGAGGATCACTGGATGGGGAGAGAGAACTCAGGAGGTGGTGTTGAAGGGCTGGGATTACAAAGACTCGGATATGTCCTAGAAGAGATGAGTCGCATACCAGGAGGAAGGATGTATGCAGATGACACTGCTGGCTGGGACACCCGCATCAGCAGGTTTGATCTGGAGAATGAAGCTCTAATCACCAACCAAATGGAGAAAGGGCACAGGGCCTTGGCATTGGCCATAATCAAGTACACATACCAAAACAAAGTGGTAAAGGTCCTTAGACCAGCTGAAAAAGGGAAAACAGTTATGGACATTNNNNNNNNNNNNNNNCAAAGGGGGAGCGGACAAGTTGTCACTTACGCTCTTAACACATTTACCAACCTAGTGGTGCAACTCATTCGGAATATGGAGGCTGAGGAAGTTCTAGAGATGCAAGACTTGTGGCTGCTGCGGAGGTCAGAGAAAGTGACCAACTGGTTGCAGAGCAACGGATGGGATAGGCTCAAACGAATGGCAGTCAGTGGAGATGATTGCGTTGTGAAGCCAATTGATGATAGGTTTGCACATGCCCTCAGGTTCTTGAATGATATGGGAAAAGTTAGGAAGGACACACAAGAGTGGAAACCCTCAACTGGATGGGACAACTGGGAAGAAGTTCCGTTTTGCTCCCACCACTTCAACAAGCTCCATCTCAAGGACGGGAGGTCCATTGTGGTTCCCTGCCGCCACCAAGATGAACTGATTGGCCGGGCCCGCGTCTCTCCAGGGGCGGGATGGAGCANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGAAGGGACCTCCGACTGATGGCCAATGCCATTTGTTCATCTGTGCCAGTTGACTGGGTTCCAACTGGGAGAACTACCTGGTCAATCCATGGAAAGGGAGAATGGATGACCACTGAAGACATGCTTGTGGTGTGGAACAGAGTGTGGATTGAGGAGAACGACCACATGGAAGACAAGACCCCAGTTACGAAATGGACAGACATTCCCTACTTGGGAAAAAGGGAAGACTTGTGGTGTGGATCTCTCATAGGGCACAGACCGCGCACCACCTGGGCTGAGAACATTAAGAACACAGTCAACATGGTGCGCAGGATCATAGGTGATGAAGAAAAGTACATGGACTACCTATCCACCCAAGTTCGCTACTTGGGTGAAGAAGGGTCTACACCTGGAGTGCTGTANGCACCAATCTTAATGTTGTCAGGCCTGCTAGTCAGCCACAGCTTGGGGAAAGCTGTGCAGCCTGTGACCCCCCCAGGAGAAGCTGGGAAACCAAGCCTATAGTCAGGCCGAGAACGCCATGGCACGGAAGAAGCCATGCTGCCTGTGAGCCCCTCAGAGGACACTGAGTCAAAAAACCCCACGCGCTTGGAGGCGCAGGATGGGAAAAGAAGGTGGCGACCTTCCCCACCCTTCAATCTGGGGCCTGAACTGGAGATCAGCTGTGGATCTCCAGAAGAGGGACTAGTGGTTAGAGGAGACCCCCCGGAAAACGCAAAACAGCATATTGACGCTGGGAAAGACCAGAGACTCCATGAGTTTCCaccacgctggccgccaggcacagatcgccgaaTAGCGGCGGCCAGTGTGGGGAAANNNNNNNNNNNN'





# Lists

In [None]:
# The list is a variable type that is used to store an ordered collection of items
emptylist1 = []       # This is one way to create an empty list
emptylist2 = list()   # This is another way

print (type(emptylist1))
print (type(emptylist2))


In [None]:
# The items stored in a list can be of any type: strings, integers, floats, even other lists or dictionaries
# And all of these types can be stored together within a single list

# For example, here is a list containing: a string, an integer, a float, another string
    # and finally another list with two strings and two integers 
thisIsAList = ['This is a string', 3, 4.5, "askdjfh", ['3', '4', 3, 4]]

print ("This is what it looks like when you print a list")
print (thisIsAList)

# You can check the number of items in a list by using the len() function
    # This is the same function you use to check the length of strings (i.e., the number of charcaters in a string)
print("\nThis list contains this many items:", len(thisIsAList))


# List methods

In [None]:
# Just like with strings, we can use the dir() function to look at the built-in methods associated with a list
print (dir(thisIsAList))

In [None]:
# Unlike strings, lists are mutable
    # Therefore, many of the list methods will directly make changes to specified list
    # In other words, you don't need to make a copy to save the changes

# Let's generate a simple list to play with
l=[1,2,3]
print("Starting list:", l, "\n")

# One of the most commonly used methods is to add a single item to the end of a list using the append method
l.append(4)
print ("After appending the integer 4:", l, "\n")

# If you want to add several items to the end of a list, you can use the extend method
l.extend([5,6,7])
print ("After extending list to include 5, 6 and 7:", l, "\n")

# You can also insert items into the middle of the list, by specifying the 0-indexed position at which you want the new item to go
# The 1st integer specifies the location, the 2nd specifies the item to be inserted
l.insert(1, 8)
l.insert(4, 9)
print ("After inserting two integers:", l, "\n")

# There's even a method for sorting the items of a list, ascending order
l.sort()
print ("After sorting the list, in place:", l, "\n")

# Or descending order
l.reverse()
print ("After reverse sorting the list, in place:", l, "\n")

#There are also a couple of built-in list methods for removing items

# The remove method removes items based on identity
l.remove(8)
print ("After removing the number 8:", l, "\n")

# The pop method removes items based on location, and also returns the item that has been removed
removed = l.pop(1)
print ("After removing 2nd item in the list:", l, "\n")
print("This is the item that was just removed:", removed, "\n")


# Assignment 1.4

## In the cell below, I've initialized a list with several integers. Using a combination of the methods mentioned above (with the exception of sort and reverse), modify this list so that it contains the numbers 1-10 sorted in asecending order. 

## Print out final list



In [None]:
l = [1, 3, 11, 5, 6, 8, 10]



# Assignment 1.5

## Redo assignment 1.4 but this time, use sort to simplify the process


In [None]:
k = [1, 3, 11, 5, 6, 8, 10]



# Dictionaries

In [None]:
# Dictionaries allow you to store key:value pairs
emptydict1 = {}       # This is one way to create an empty dictionary
emptydict2 = dict()   # This is another way

print (type(emptydict1))
print (type(emptydict2))


In [None]:
# You can also define a dictionary along with key:value pairs
d = {1: "one", "one": 1, 2: "two", "two": 2, 3: "three", "three": 3, 4: "four", "four": 4}

# Prior to Python 3.7, the contents of dictionaries were unordered, meaning that the order you would see when printing a dictionary would not be the order you inserted the items
# With 3.7+, the insertion order is maintained, therefore, when you print the dictionary below, you may or may not see the keys in the same order they are listed below
print ("This is what it looks like when you print a dictionary:\n", d, "\n")


In [None]:
# You access values from a dictionary using the keys
# So, for example, this is how we would access the value associated with the key "two":
print(d["two"])

In [None]:
# Using a similar syntax, you can add new key:value pairs
d[10] = "ten"

# Or you can modify the value associated with an existing key
d[1]=1.0

print ("Here is the modified dictionary:\n", d, "\n")

#Note that these changes are made "in place" because dictionaries are MUTABLE

In [None]:
# Remember, keys can be any NON-MUTABLE variable type. This means lists and dictionaries cannot be keys
# Therefore, this should result in an error
d[["a", "b"]] = "letters"


In [None]:
# The len() function that we've already used for strings and lists also works on dictionaries
# In this case, it returns the number of key:value pairs

print("d contains this many key:value pairs:", len(d))

In [None]:
# Again, we can take a look at the built-in methods using the dir() function
print("\nHere are the built-in methods/functions associated with dictionaries in Python (many are __magic__):")
print (dir(d))

In [None]:
# One handy dictionary method is update
# This method can be used to merge two dictionaries into one

d1 = {"a":1, "b":2, "c":3}   # Initialize one dictionary
d2 = {"d":4, "e":5, "f":6}   # Initialize another dictionary

#Add the key:value pairs from the second dictionary into the 1st
d1.update(d2)
print("Updated 1st dictionary:", d1, "\n")
print("Unchanged 2nd dictionary:", d2, "\n")

In [None]:
# Some of the most useful dictionary methods are most useful in the context of for loops, which we will discuss next week
# For example, the items method allows you to step through each key:value pair in the dictionary
# This is beyond what I expect you to understand this week, but here is just a little taste of what is to come

for key, value in d1.items():
    print(key, value)

# Assignment 1.6

## codonTable is a dictionary that links DNA codons to amino acids. Let's use some of the functions and methods described above to explore and manipulate this table

### 1. How many key:value pairs are contained in codonTables? Please print this value to the screen.
### 2. Those familiar with their codons may notice that this table is missing 3 of the possible codons. The three missing codons are the stop codons. Create a separate dictionary called 'stopCodons' with these three codons as keys and '*' as the value for each. Then add these key:value pairs to codonTable using update().
### 3. abbrevTable links the single letter abbreviations for each amino acid to the long form name. There are currently two mistakes though. The "acid" part of the long form name has been left off for Aspartic Acid (D) and Glutamic Acid (E). Please fix these two mistakes. 
### 4. Compose a single expression that utilizes both codonTable and abbrevTable to print the long form name for the amino acid encoded by 'ATC'. This will require you to nest a call to one dictionary inside a call to the other.


In [None]:
codonTable={'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'}



abbrevTable={"A":"Alanine", "R":"Arginine", "N":"Asparagine", "D":"Aspartic", "C":"Cysteine", "E":"Glutamic", "Q":"Glutamine", "G":"Glycine", "H":"Histidine", "I":"Isoleucine", "L":"Leucine", "K":"Lysine", "M":"Methionine", "F":"Phenylalanine", "P":"Proline", "S":"Serine", "T":"Threonine", "W":"Tryptophan", "Y":"Tyrosine", "V":"Valine", "*":"Stop"}

