# PART 4

## **[Dictionaries](https://docs.python.org/3/library/stdtypes.html#dict)**

In [None]:
my_first_dict = {}  #or another method: my_first_dict = dict()
print('dict: {}, type: {}'.format(my_first_dict, type(my_first_dict)))

### Initialization

In [None]:
dict1 = {'key1': 2, 'key2': 4, 'key3': 6}
my_second_dict = dict(key1 = 2, key2 = 4, key3 = 6)

print(dict1)
print(my_second_dict)

print('equal: {}'.format(dict1 == my_second_dict))
print('length: {}'.format(len(dict1)))

In [None]:
my_dict = {
    'Subaru' : 'Japanese',
    'Tesla' : 'American',
    'Mercedes' : 'German',
}

print(my_dict)

In [None]:
help(format)
help('FORMATTING')

## `dict.keys(), dict.values(), dict.items()`

In [None]:
print('keys: {}'.format(my_second_dict.keys()))
print('values: {}'.format(my_second_dict.values()))
print('items: {}'.format(my_second_dict.items()))

### How to access the values of a dictionary and how to set new values?

In [None]:
recipe_dict = dict()
recipe_dict['ratatouille'] = ['eggplant', 'tomato', 'zucchini', 'onion', 'olive oil', 'garlic']
recipe_dict['homemade pasta'] = ['semolina flour', 'olive oil', 'sea salt']
recipe_dict['homemade pasta'] = ['wheat', 'olive oil', 'sea salt']  # we are changing the value that exists
print(type(recipe_dict))
print(recipe_dict)
print('ingredients for homemade pasta: {}'.format(recipe_dict['homemade pasta']))


Accessing a nonexistent key will raise `KeyError`:

In [None]:
print(recipe_dict['soup'][0])

### Deleting elements of dictionary

In [None]:
my_dict = {'key1': 3, 'key2': 9, 'key3': 15}
del my_dict['key3']
print(my_dict)

# First check: Does the key exist? (hints: pop() ve popitem())
key_to_delete = 'key3'
if key_to_delete in my_dict:
    del my_dict[key_to_delete]
else:
    print('{key} is not in {dictionary}'.format(key=key_to_delete, dictionary=my_dict)) #remember format

In [None]:
squares = {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
print('dictionary before modification: ', squares)

#let's remove a given element using pop method 
print(squares.pop(1)) #pop will print the element that was removed
print(squares)

#let's remove a random element
print(squares.popitem()) # returns (key, value)
print(squares)

#let's remove all elements
squares.clear() #result {}
print(squares)

#let's delete the dictionary
del squares
print(squares) #it will error out

### Dictionaries are mutable

Which other data types are mutable?

In [None]:
grades = {}.fromkeys(['week 1', 'week 2', 'week 3'], 88)
print(grades)

updated_grades = grades.copy()
updated_grades['week 2'] = 90
updated_grades['week 3'] = 92
updated_grades['week 4'] = 82
print('grades: {}\nupdated_grades: {}'.format(grades, updated_grades))
print('equal: {}'.format(grades == updated_grades))

In [None]:
print(id(grades))
print(id(updated_grades))

# So how can we fix the above issue?

If we prefer a copy, let's make a new `dict`:

In [None]:
grades = {}.fromkeys(['week 1', 'week 2', 'week 3'], 88)
updated_grades = dict(grades)
grades['week 2'] = 90
grades['week 3'] = 92
print('grades: {}\nupdated_grades: {}'.format(grades, updated_grades))
print('equal: {}'.format(grades == updated_grades))

In [None]:
help(dict.fromkeys)

### `dict.get()`

Returns `None` if `key` is not in `dict`. However, you can also specify default return value which will be returned if `key` is not present in the `dict`.

In [None]:
my_dict = {'a': 1, 'b': 2, 'c': 3}
d = my_dict.get('d')
print('d: {}'.format(d))

d = my_dict.get('d', 'temporary values for d')
print('d: {}'.format(d))

In [None]:
# check whether a given key exists in dictionary

def key_check(dict_x, key):
    if key in dict_x.keys():
        print('Present and the value is: ', dict_x[key])
    else:
        print('Key is not present')
        
dict_a = {'key1' : 78, 'key2' : 89, 'key3' : 90, 'key4' : 72} 
key_check(dict_a, 'key2')

In [None]:
help(dict.get)

## `dict.pop()`

In [None]:
enzymes = {}
enzymes['EcoRI'] = r'GAATTC'
enzymes['AvaII'] =  r'GG(A|T)CC'
enzymes['BisI'] =  r'GC[ATGC]GC'
print('enzymes dictionary before pop method: {}'.format(enzymes))

#let's remove an element using pop, it will show its value 
EcoRI = enzymes.pop('EcoRI')
print('EcoRI: {}'.format(EcoRI))
print('enzymes dictionary after removing EcoRI: {}'.format(enzymes))


# [What is a python raw string?](https://www.journaldev.com/23598/python-raw-string)

In [None]:
raw_s = r'Hi\nHello'
print(raw_s)

str_a = 'Hi\nHello'
print(str_a)

### `dict.setdefault()`

Returns the `value` of `key` defined as first parameter. If the `key` does not exist in the dict, adds `key` with `default value` (second parameter).

In [None]:
new_dict = {'a': 1, 'b': 2, 'c': 3}
a = new_dict.setdefault('a', 'default value')
d = new_dict.setdefault('d', 'default value')
print('a: {}\nd: {}\nnew_dict: {}'.format(a, d, new_dict))

### `dict.update()`

    1) dictionary gets updated, 2) two `dictionaries` get combined.

In [None]:
d = {1: "one", 2: "three"}
d1 = {2: "two"}

# updates the value of key 2
d.update(d1)
print(d)

d1 = {3: "three"}

# adds element with key 3
d.update(d1)
print(d)

## the keys in the `dict` are immutable

Therefore, we can not use mutable data types as keys of a dictionary. Examples: list or dictionary:

In [None]:
 bad_dict = {['my_list'], 'value'}  # will generate TypeError

Values are mutable

In [None]:
good_dict = {'my key': ['Python', ' can be', 'difficult']}
print(good_dict)

In [None]:
#Introduction to bioinformatics: Using dictionaries

dna = "AATGATGAACGAC" 
dinucleotides = ['AA','AT','AG','AC',
                 'TA','TT','TG','TC', 
                 'GA','GT','GG','GC', 
                 'CA','CT','CG','CT'] 
all_counts = {}  #initialize dictionary
for dinucleotide in dinucleotides: #for each element in list
    count = dna.count(dinucleotide) #count how many of that element
    print("count is " + str(count) + " for " + dinucleotide) 
    all_counts[dinucleotide] = count #all these elements are keys, so let's define the values for each key
print(all_counts)

In [None]:
help(str.count)

# PART 5

## **[`for` loops](https://docs.python.org/3/tutorial/controlflow.html#for-statements)**

### For loops in lists 

Example: Our goal is to convert each value given in cm to inch and print the output.

#### Info: 
    1 inch = 2.54 cms
    the list of length values we have: length_cm=[158, 165, 168, 172, 183, 190]

#### Algorithm:
    For each element in length_cm[index]
       inc = length_cm[index]/2.54
       print cm and inch values
       Move to the next element

For every operation that needs to take place in the for loop, they need to have same indentation. 
Not a requirement, but better to use \"<TAB\"> or press space four times.

In [None]:
print("------------------") # beginning of the output)
length_cm = [158, 165, 168, 172, 183, 190]
for item in length_cm: # for each element in length_cm
    # entering for loop
    # things to do in for loop
    inc = item/2.54 # convert to inch
    print('value in cm: ', item, ',', 'value in inch: ', inc) # Print
    # exit for loop
# things to do outside for loop
print("------------------") # end of the output

### `break`
using break, we can stop the loop when a given condition is satisfied.

In [None]:
for item in length_cm:
    if item == 165:
        break
    inc = item/2.54
    print(inc)

### `continue`
Continue to the next item without executing the lines occurring after `continue` inside the loop:

In [None]:
for item in length_cm:
    if item == 160:
        continue
    inc = item/2.54    
    print(inc)

### `enumerate()` 
enumerate() class can give us both the value and the index for that given value

In [None]:
help(enumerate)

In [None]:
for idx, val in enumerate(length_cm):
    print('idx: {}, value: {}'.format(idx, val))

In [None]:
Turkish_consonants = "bcçdfgğhjklmnprsştvyz"
Turkish_consonants = list(Turkish_consonants)
print(Turkish_consonants)

count = 0
for item in Turkish_consonants:
    print(count, item)
    count +=1
#What happens if count+=1 is outside of the loop?

In [None]:
#We can rewrite the above for loop more neatly by using enumerate() function.

for count, item in enumerate(Turkish_consonants):
    print(count, item)


In [None]:
# Write a python program to get sum of all items in a dictionary

def get_sum(dict_x):
    sum = 0 #initialize the sum
    for key, value in dict_x.items():
        sum += dict_x[key]
    return sum

dict_a = {'key1' : 78, 'key2' : 89, 'key3' : 90, 'key4' : 72} 
get_sum(dict_a)

In [None]:
#python
# Python program to illustrate
# enumerate function
l1 = ["Moderna","BioNTech","Sinovac"]
s1 = "available vaccines"

# creating enumerate objects
obj1 = enumerate(l1)
obj2 = enumerate(s1)

print("Return type:",type(obj1))
print(list(enumerate(l1)))

# changing start index to 2 from 0
print(list(enumerate(s1,2)))



## for loops for dictionaries

In [None]:
recipe_dict = {} 
recipe_dict['ratatouille '] = ['eggplant', 'tomato', 'zucchini', 'onion', 'olive oil', 'garlic']
recipe_dict['homemade pasta'] = ['semolina flour', 'olive oil', 'sea salt']
print(recipe_dict)

for key, value in recipe_dict.items(): 
    print(key, value)
    


## `range()`

range() function returns numbers starting from 0, and increasing incrementally.

In [None]:
help(range)

In [None]:
for number in range(5):
    print(number) #range(5) returns 0-4, not 0-5.

In [None]:
for number in range(2, 5):
    print(number) 

#range() function's default initial value is 0,
#but we can specify a different initial value:
#range(2, 5) returns values from 2 to 5 (exclusive of 5):

In [None]:
for number in range(0, 10, 2):  # last number is the number of steps
    print(number)

In [None]:
# generate a random sequence
import random
seq = ''.join([random.choice('ACGT') for _ in range(10)])
print(seq)
print(len(seq))

# What is happening here?

In [None]:
len(range(10))

In [None]:
for number in range(10): 
    print(number)

In [None]:
help(random.choice)

In [None]:
print(random.choice('ACGT'))

In [None]:
#Let's practice some

my_numbers = [1,3,5,7,9,12,19,21]

#which numbers in the `my_numbers` list are multiples of 3? Please answer with a for loop.

for element in my_numbers:
    if element % 3 == 0:
        print(element)

In [None]:
#The sum of values in my_numbers list?

my_numbers = [1,3,5,7,9,12,19,21]

total = 0
for item in my_numbers:
    total += item
print('total:', total)


In [None]:
for num in range(10,20):  # loop through values between 10 and 20 (exclusive of 20)
   for i in range(2,num): # for each element in the loop, starting from 2
      if num%i == 0:      
         j=num/i          
         print('%d is equal to %d * %d' % (num,i,j))
         break # move to the next number
   else:                 
      print(num, ' is a prime number')

# PART 6

## Functions

In [None]:
def hello_my_first_function():
    print('Hello trgn515!')

print('type: {}'.format(hello_my_first_function))

hello_my_first_function()  # Let's call the function

### Arguments

In [None]:
def key_Python_libraries(name1, name2, name3): #required arguments
    print('These are great libraries for data visualization in Python:  {}, {}, {}'.format(name1, name2, name3))

key_Python_libraries('matplotlib', 'seaborn', 'Bokeh')

In [None]:
# functions that return something we define 

def strip_and_lowercase(original):
    modified = original.lower().strip()
    return modified

ugly_string = '  MixED CaSe '
pretty = strip_and_lowercase(ugly_string)
print('pretty: {}'.format(pretty))

#In general, a function takes arguments (if any), performs some operations, and returns a value (or object). 
#The value that a function returns to the caller is generally known as the function’s return value. 
#All Python functions have a return value, either explicit or implicit. 

In [None]:
strip_and_lowercase(980)

In [None]:
def word_lowercase(word):
    """it converts the argument with data type string to lowercase"""
    modified = word.lower()
    return modified

mixed = 'AAgcgctgagtcTGC'
word_lowercase(mixed)

In [None]:
# Introduction to bioinformatics: Example to the use of dictionaries

dna = "AATGATGAACGAC" 
dinucleotides = ['AA','AT','AG','AC',
                 'TA','TT','TG','TC', 
                 'GA','GT','GG','GC', 
                 'CA','CT','CG','CT'] 
all_counts = {}  #initiated the dictionary
for dinucleotide in dinucleotides: #for each element in the dictionary
    count = dna.count(dinucleotide) #let's count how many elements 
    print("count is " + str(count) + " for " + dinucleotide) 
    all_counts[dinucleotide] = count #all elements are keys, let's prepare values for each key
print(all_counts)


In [None]:
dinucleotides = ['AA','AT','AG','AC',
                 'TA','TT','TG','TC', 
                 'GA','GT','GG','GC', 
                 'CA','CT','CG','CT']

def dinucleotide_counting(seq):
    """This function gets a DNA sequence and returns the count of each dinucleotide within the DNA sequence"""
    #convert sequence to upper letter format 
    seq = seq.upper()
    counting = {k: 0 for k in dinucleotides}  # we initalize a dictionary, This is faster than a list
    #Scans the sequence, looking for all dinucleotides at once
    for i in range(len(seq)-2):
        if seq[i:i+2] in counting:
            counting[seq[i:i+2]] += 1
    return counting

In [None]:
dinucleotides = ['AA','AT','AG','AC',
                 'TA','TT','TG','TC', 
                 'GA','GT','GG','GC', 
                 'CA','CT','CG','CT']

def dinucleotide_counting(seq):
    """This function gets a DNA sequence and returns the count of each dinucleotide within the DNA sequence"""
    #convert sequence to upper letter format 
    seq = seq.upper()
    counting = {}
    for k in dinucleotides:
        counting[k] = 0 # we initalize a dictionary, This is faster than a list
    #Scans the sequence, looking for all dinucleotides at once
    for i in range(len(seq)-2):
        if seq[i:i+2] in counting:
            counting[seq[i:i+2]] += 1
    return counting

In [None]:
dinucleotide_counting('AAgcgctgagtcTGC')

In [None]:
seq2 = 'AACTG'
range(len(seq2)-2)

In [None]:
seq = 'AATGC'
seq[2:4]

In [None]:
dinucleotide_counting("AATGATGAACGAC")

### Keyword arguments

In [None]:
def first_algorithm(first, second, third):
    return (first + second)** third 

print(first_algorithm(2, 3, 4))

print(first_algorithm(first=2, second=3, third=4))

# using keyword arguments, we can change the ordering
print(first_algorithm(third=4, first=2, second=3))

# positional arguments and keyword arguments can also change places but we should start with positional arguments
print(first_algorithm(2, third=4, second=3))  

print(first_algorithm(third=4, second=3, 2))  #It will error out


### Default arguments 

In [None]:
def greet(name, msg="good morning!"):
    """
    This function greets to
    the person with the
    provided message.

    If the message is not provided,
    it defaults to "Good
    morning!"
    """

    print("Hello", name + ', ' + msg)


greet("Alex")
greet("Krishna", "how are you?")

**Do not use mutable objects as default arguments!**

In [176]:
def compute_patterns(inputs=[]):
    inputs.append("some stuff")
    patterns = ["a list is based on"] + inputs
    return patterns

In [178]:
compute_patterns()

['a list is based on', 'some stuff', 'some stuff']

In [184]:
def append(element, seq=[]):
    seq.append(element)
    return seq

In [186]:
append(1) # seq is assigned to []

# This returns a reference to the *same* list as the default for `seq`

[1, 1]

In [187]:
append(4) # `seq` is now given [1] as a default!

[1, 1, 4]

Let's fix the above problem like this:

In [188]:
def append(element, seq=None):
    if seq is None:  
        seq = []
    seq.append(element)
    return seq

In [192]:
append(4)

[4]

#### [Lambda functions](https://realpython.com/python-lambda/) and [list comprehensions](https://www.w3schools.com/python/python_lists_comprehension.asp)

    lambda input(s): function #lambda is the keyword
    
    #Normal python function
    def a_name(x):
        return x+x
        
    #Lambda function
    lambda x: x+x
    
    #List comprehension
    Provides shorter syntax when you want to create a new list based on the values of an existing list

In [193]:
def gets_cube(x):
    return x*x*x

cube = lambda x: x*x*x

In [194]:
print('lambda function output is: ', cube(2))
print('regular function output is: ', gets_cube(2))

lambda function output is:  8
regular function output is:  8


In [203]:
my_list = [18, -3, 5, 0, -1, 12]
new_list = list(filter(lambda x: x > 0, my_list)) #what is happening here?
print(new_list) # [18, 5, 12]


[0]


In [201]:
def pos_values(list_x):
    new_list = list()
    for i in list_x:
        if i > 0:
            new_list.append(i)
    return new_list

my_list = [18, -3, 5, 0, -1, 12]
pos_values(my_list)

[18, 5, 12]

In [202]:
help(filter)

Help on class filter in module builtins:

class filter(object)
 |  filter(function or None, iterable) --> filter object
 |  
 |  Return an iterator yielding those items of iterable for which function(item)
 |  is true. If function is None, return the items that are true.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.



In [208]:
list(map(lambda x: x.capitalize(), ['cat', 'dog', 'cow']))

['Cat', 'Dog', 'Cow']

In [205]:
# or we can use list comprehension
[x.capitalize() for x in ['cat', 'dog', 'cow']]

['Cat', 'Dog', 'Cow']

In [206]:
help(map)

Help on class map in module builtins:

class map(object)
 |  map(func, *iterables) --> map object
 |  
 |  Make an iterator that computes the function using arguments from
 |  each of the iterables.  Stops when the shortest iterable is exhausted.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.



In [209]:
even = lambda x: x%2 == 0

list(filter(even, range(11)))

[0, 2, 4, 6, 8, 10]

In [None]:
#list comprehension
[x for x in range(11) if x%2 == 0]

### Docstrings
Python docstrings are the string literals that appear right after the definition of a function, method, class, or module. They are used to document our code.

In [210]:
filename = 'fastq_runid_687811ddee13bdfed6e08d8f5d403e432f41ebd8_0_0.fastq'

import bilge_pype as bpy

def load_file(filename, run_info=None):
    '''
    Checks the file type and loads the data
    '''
    file_format = filename.split('.')
    # check if it is a fasta file
    if ('fasta' in file_format) or ('fa' in file_format):
        A = bpy.read_fasta(filename)
    # check if it is a fastq file
    elif ('fastq' in file_format) or ('fq' in file_format):
        A = bpy.read_fastq(filename)
    return A

load_file(filename)

Unnamed: 0,sequence,quality,id
0,UCAGUAAGAUCGAAAGAUUGGCUAAAGUUGAACCAAUUGAGAAUCG...,"%+$-&$*4+$&9=8>>=82(&&/,.2/1;:76318>/=:<8=40+0...",6c73b496-0d9e-4d3d-9b05-ec7d29e8bb90 runid=687...
1,AGGUGGACUGAUCGAGUCAAGACUCCAUACGUUUUGCCCGUUCAAG...,$'#(&$($%%#0&'--%()-6/4&&..86640<5/.522176/0A8...,5a6ffec3-a807-47ae-b91b-8340b4f4344d runid=687...
2,UGUCGGUUUGAGAACUGGUCAAUCAAGACUGGUGCUCCAAGCUAGA...,"(%)-,*5)0/89C889;:<9:9-67EC<>;=;=:%))+%''$')*5...",da79375e-4420-4fd5-a0f1-195fd1f355fa runid=687...
3,UCCAGCUAGAUUCGAAAGAUUGGCUAAGUUGAACCAAUUGUUGAGA...,%/163.1&+(4%97=;><A;5=;>B:F=<>9CEA6:=4><A<:79=...,2d5c970a-5212-4e21-b074-70d6b64c6430 runid=687...
4,GAAGAUUGGCUAAGUGAACCAAUUGUGAAGAAAUCGAAGAAGAACU...,(-735:./-93542.92/9@B>891-%%0.;<&>87@KAGJ6<56;...,a6923647-414d-465e-ab68-1edab9b2b32a runid=687...
...,...,...,...
3898,GAUUAAAGUUGUCGGUUAAAGAACUGGUCAAUUCCAAGACUGGUGC...,"*89::7=<:==:464:.2278<768-):,33$$(6-B=9)3<;B>7...",80698b86-e0df-4d3f-ad79-0dbb2c60d70f runid=687...
3899,UGCUCCUUAUUGGUGCAUAGACCUUAAAAAAAU,46+5)6))+);6486*&%3308/;05875:<-8,42a1601f-2cdc-4c29-b4ac-0e69e28e09f1 runid=687...
3900,UCAUAUUUUUCAAAAAU,"'('(+,&(''*4644,(",2b332d08-7a63-4666-881c-c3173bceb43c runid=687...
3901,ACUCAUAUUUCAAAAAAAAUCUUCUCUCAUUUUCAUAUAUUUACAA...,"$$'.%(&**''48<<99&%&$&&$()*,,*-&)(%'*))'((#$14...",f8d114e4-8809-4d9e-806e-d96787a87c49 runid=687...


In [211]:
import bilge_pype as bpy
help(bpy.add_ONT_header)

Help on function add_ONT_header in module bilge_pype:

add_ONT_header(df)
    Parses the fastq header for ONT specific information and adds it to the dataframe



# PART 7: Tuples

    ordered
    immutable
    allows duplicate values
    
So maybe use when you do not want your data to be changed?
Can also use as dictionary keys. 

In [242]:
fav_fruits = tuple(("strawberry", "durian", "tangerine"))
print('tuple: {}, type: {}'.format(fav_fruits, type(fav_fruits)))
print(fav_fruits[1]) #prints the second item in tuple

tuple: ('strawberry', 'durian', 'tangerine'), type: <class 'tuple'>
durian


In [212]:
help(tuple)

Help on class tuple in module builtins:

class tuple(object)
 |  tuple() -> empty tuple
 |  tuple(iterable) -> tuple initialized from iterable's items
 |  
 |  If the argument is a tuple, the return value is the same object.
 |  
 |  Methods defined here:
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __contains__(self, key, /)
 |      Return key in self.
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __getitem__(self, key, /)
 |      Return self[key].
 |  
 |  __getnewargs__(...)
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __hash__(self, /)
 |      Return hash(self).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __le__(self, value, /)
 |      Return self<=value.
 |  
 |  __len__(self, /)
 |      Return len(self).
 |  
 |  __lt__(self, value, /)
 |      Return self

In [241]:
#tuple
fav_fruits_t = tuple("strawberry", "durian", "tangerine", "strawberry")
fav_fruits_t[0] = "blueberry"
fav_fruits_t

TypeError: 'tuple' object does not support item assignment

In [224]:
#list 
fav_fruits_l = ["strawberry", "durian", "tangerine", "strawberry"]
fav_fruits_l.count("strawberry")


2

In [223]:
tuple_sample = ("yellow", "green", "magenta", "cyan", "cyan")
tuple_sample.count("yellow")

1

In [225]:
fav_fruits_l[0] = "blueberry"
fav_fruits_l

['blueberry', 'durian', 'tangerine', 'strawberry']

# PART 8: Sets
    unordered, 
    immutable, 
    do not allow duplicate values
    
So maybe use sets for when you need uniqueness for the elements?

In [235]:
fav_fruits_s = {"strawberry", "durian", "tangerine", "starfruit"}
print('set: {}, type: {}'.format(fav_fruits_s, type(fav_fruits_s)))


set: {'tangerine', 'strawberry', 'starfruit', 'durian'}, type: <class 'set'>


In [233]:
print(fav_fruits_s[1])

TypeError: 'set' object does not support indexing

In [236]:
fav_fruits_s = {"strawberry", "durian", "tangerine", "strawberry"}
fav_fruits_s.count("strawberry")

AttributeError: 'set' object has no attribute 'count'

In [237]:
help(set)

Help on class set in module builtins:

class set(object)
 |  set() -> new empty set object
 |  set(iterable) -> new set object
 |  
 |  Build an unordered collection of unique elements.
 |  
 |  Methods defined here:
 |  
 |  __and__(self, value, /)
 |      Return self&value.
 |  
 |  __contains__(...)
 |      x.__contains__(y) <==> y in x.
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __iand__(self, value, /)
 |      Return self&=value.
 |  
 |  __init__(self, /, *args, **kwargs)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __ior__(self, value, /)
 |      Return self|=value.
 |  
 |  __isub__(self, value, /)
 |      Return self-=value.
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __ixor__(self, value, /)
 |      Re

In [238]:
set1 = {"abcd", 15, False, 40, "female"} #can contain string, integer, boolean all together
print(set1) #unordered, so can not be indexed

{False, 'abcd', 40, 'female', 15}


In [239]:
# we can use set constructor

set1 = set("abcd", 15, False, 40, "female") # will error out
print(set1)

TypeError: set expected at most 1 arguments, got 5

In [240]:
set1 = set(("abcd", 15, False, 40, "female"))
print(set1)

{False, 'abcd', 40, 'female', 15}


# PART 9: [Classes](https://docs.python.org/3/tutorial/classes.html)

A class is a structure in Python that can be used as a blueprint to create objects that have

   
    1- Methods (functions) that belong to objects of the class.
    2- Instance variables referring to data.
    3- A special method called a constructor. This method will be called automatically whenever a new object of the class is created, and must have the name __init__.



In [None]:
help(str)

In [250]:
# let's create a class in python

class Student():
    name = None
    grade = None
    age = None
    def __init__(self, name, age, grade):
        '''initializes data in the student object'''
        self.name = name
        self.age = age
        self.grade = grade

    def print(self):
        '''prints info about self'''
        print(self.str())        

    def str(self):
        '''return string format of the data in the object'''
        out = 'name:'+self.name+'\n'
        out+= 'age:'+str(self.age)+'\n'
        out+= 'grade:'+str(self.grade)+'\n'
        return out

Sam = Student('Sam',13,'A++')

In [251]:
s = Sam.str()
s

'name:Sam\nage:13\ngrade:A++\n'

In [252]:
Sam.print()

name:Sam
age:13
grade:A++



In [253]:
help(Sam)

Help on Student in module __main__ object:

class Student(builtins.object)
 |  Methods defined here:
 |  
 |  __init__(self, name, age, grade)
 |      initializes data in the student object
 |  
 |  print(self)
 |      prints info about self
 |  
 |  str(self)
 |      return string format of the data in the object
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  age = None
 |  
 |  grade = None
 |  
 |  name = None



#### Let's define a class called Gene

each Gene object will have (1) an id (string) and (2) a sequence (also a string). 

We want to define a class called Gene (class names begin with a capital letter), where each Gene object will have (1) an id (string) and (2) a sequence (also a string). When creating a Gene object, we should define its id and sequence by passing them as parameters to the __init__ method.

    class Gene:
        def __init__(self, args[, ...])
        #more code

so it would look like this:

    class Gene:
        def __init__(self, gene_id, gene_seq):
            self.id  = gene_id
            self.sequence = gene_seq
        

Use the __init__() function to assign values for gene_id and gene_seq

The arguments of __init__ are required input when creating a new instance of this class, except for 'self'.

In [260]:
# create the Gene class below
class Gene:
    def __init__(self, gene_id, gene_seq, gene_name):
        self.gene_id  = gene_id
        self.gene_seq = gene_seq
        self.gene_name = gene_name
    
    def print_id(self):
        print('my ID is: ', str(self.id))
        
    def print_length(self):
        print('my length is', str(len(self.gene_seq)))

a = Gene("gi_788876969750", "GCGCTGATGCGTGATGCTCC", "COI")
print(a.gene_id)
print(a.gene_seq)

gi_788876969750
GCGCTGATGCGTGATGCTCC


The __init__() function is called automatically every time the class is being used to create a new object.

In [263]:
gene1 = Gene("gi_788876969750", "gacttttacc", "COI")
print(gene1)
print(gene1.gene_id)
print(gene1.gene_seq)
print(gene1.gene_name)

<__main__.Gene object at 0x7f9ad065e710>
gi_788876969750
gacttttacc
COI


### [Class and instance variables](https://docs.python.org/3/tutorial/classes.html#class-and-instance-variables)

Instance variables are for data unique to each instance and class variables are for attributes and methods shared by all instances of the class:

![image.png](attachment:10fca30c-3a23-46c2-baf9-c5c51ee4996f.png)


### Object Methods

Objects can also contain methods. Methods in objects are functions that belong to the object.

For example, in the case of our 'Gene' class, we may want to check the base composition or GC content of a given sequence. Let us create a method in the Gene class:

Note that each method takes 'self' as an argument along with the arguments required when calling this method.


In [266]:
class Gene:
    def __init__(self, gene_id, gene_seq):
        self.gene_id  = gene_id
        self.gene_seq = gene_seq
    
    def base_composition(self, base):
        count_base = 0
        for i in range(0, len(self.gene_seq)):
            base_i = self.gene_seq[i]
            if base_i == base:
                count_base += 1
        return count_base
    
    def gc_content(self):
        g_count = self.base_composition("G") + self.base_composition("g") 
        c_count = self.base_composition("C") + self.base_composition("c") 
        return (g_count + c_count) /float(len(self.gene_seq))
        
#creating an object
geneA = Gene("AY342", "CATttTGAC")
print('geneA: {}, type; {}'.format(geneA, type(geneA)))

geneA_t = geneA.base_composition("T") + geneA.base_composition("t")
print('geneA T base content: ', geneA_t)

geneA_gc = geneA.gc_content()
print('geneA GC content: ', geneA_gc)

geneA: <__main__.Gene object at 0x7f9ad0663c88>, type; <class '__main__.Gene'>
geneA T base content:  2
geneA GC content:  0.3333333333333333


The self parameter is a reference to the current instance of the class, and is used to access variables that belong to the class. It does not have to be named self , you can call it anything you want, but it has to be the first parameter of any function in the class.

In [None]:
help(Gene)

In [267]:
# let's create some new genes named TH134 (COI_gene_midge)

TH134 = Gene('TH134', 'gacttttacc') 

geneTH134_gc = TH134.gc_content()
print('geneTH134 GC content: ', geneTH134_gc)

geneTH134_t = TH134.base_composition("T") + TH134.base_composition("t")
print('geneTH134 T base content: ', geneTH134_t)

geneTH134 GC content:  0.4
geneTH134 T base content:  4


In [None]:
TH134.sequence

In [None]:
TH134.id

### When should you consider creating a class?

    - When you have many different types of data for a similar concept, and you would like to keep them organized into single objects as instance variables.
    - When you have many different functions for a similar concept, and you would like to keep them organized into single objects as methods.
    - When you have a concept that is simple now, but you suspect might increase in complexity in the future as you add to it. Like functions, classes enable code to be reused, and it is easy to add new methods and instance variables to classes when needed.


# PART 10: BONUS!

## The importance of pointers in Python: What is the point?

So far, we have seen that everything in Python is indeed an object. Each object contains at least three pieces of data:

    Reference point
    Type
    Value


A variable does not point to a value in Python but points to the memory address of an object. For example, in our simple example x = 1 the variable x is pointing to a memory address that the integer object 1 is stored.

# How do we find the memory address that the variable x points to?

    id() returns the object’s memory address.
    is returns True if and only if two objects have the same memory address.


In [268]:
# Create a dictionary and populate with elements
x = {}
x['val1'] = list(range(1,10))
y = 1

print('address of x', id(x))
print('value of x', x)
print('address of y', id(y))
print('value of y', y)

address of x 140302900714952
value of x {'val1': [1, 2, 3, 4, 5, 6, 7, 8, 9]}
address of y 140308148680608
value of y 1


In [269]:
# reassign value to y
# the way we do this is dangerous
# because changing the underlying data will also change y's reference point

y = x['val1']
print('address of x', id(x))
print('value of x', x)
print('address of val1 in dict x', id(x['val1']))
print('address of y', id(y))
print('value of y', y)

address of x 140302900714952
value of x {'val1': [1, 2, 3, 4, 5, 6, 7, 8, 9]}
address of val1 in dict x 140302900904968
address of y 140302900904968
value of y [1, 2, 3, 4, 5, 6, 7, 8, 9]


In [271]:
y[2] = 99 #let's change the value
print('address of x', id(x))
print('value of x', x)
print('address of val1 in dict x', id(x['val1']))
print('address of y', id(y)) # what happened to y's reference point?
print(y == x['val1'])
# the address is the location of the data in memory
# that does not get changed unless you delete the variable and set it to a new one

print('value of y', y) 

address of x 140302900714952
value of x {'val1': [1, 2, 99, 4, 5, 6, 7, 8, 9]}
address of val1 in dict x 140302900904968
address of y 140302900904968
True
value of y [1, 2, 99, 4, 5, 6, 7, 8, 9]


In [277]:
x['val1'] = list(range(1,10))
# y = x['val1'].copy()
y = [i for i in x['val1']] #y is now initialized as a new array using list comprehension, and no longer depends on x
print('address of x', id(x))
print('value of x', x)
print('address of val1 in dict x', id(x['val1']))
print('address of y', id(y)) #now the address of y is independent of x
print('value of y', y)

address of x 140302900714952
value of x {'val1': [1, 2, 3, 4, 5, 6, 7, 8, 9]}
address of val1 in dict x 140302902870280
address of y 140302901957512
value of y [1, 2, 3, 4, 5, 6, 7, 8, 9]


In [278]:
y[2] = 99
print('address of x', id(x))
print('value of x', x)
print('address of val1 in dict x', id(x['val1']))
print('address of y', id(y))
print('value of y', y)

address of x 140302900714952
value of x {'val1': [1, 2, 3, 4, 5, 6, 7, 8, 9]}
address of val1 in dict x 140302902870280
address of y 140302901957512
value of y [1, 2, 99, 4, 5, 6, 7, 8, 9]


In [173]:
# we wanted to actually initalize the variables david, brooke, and bilge with the values of x

import pandas as pd
x = [[None, None]]
x = pd.DataFrame(x, columns=['gender','age'])
[david, brooke, bilge] = [x, x, x] 

brooke.gender = 'female'
brooke.age = 21
bilge.gender = 'kadin'
bilge.age = 50
david.gender = 'male'
david.age = 105

# what is going on here?

In [174]:
y = ['david', 'brooke', 'bilge']
x = [david, brooke, bilge]
for i in range(len(x)):
    print(y[i])
    print(x[i])

david
  gender  age
0   male  105
brooke
  gender  age
0   male  105
bilge
  gender  age
0   male  105


In [None]:
print('memory address of david', id(david))
print('memory address of brooke', id(brooke))
print('memory address of bilge', id(bilge))

In [None]:
import pandas as pd
x = [[None, None]]
x = pd.DataFrame(x, columns=['gender','age'])
# initialize with copies
[david, brooke, bilge] = [x.copy(), x.copy(), x.copy()]

david.gender = 'male'
david.age = 43
brooke.gender = 'female'
brooke.age = 28
bilge.gender = 'female'
bilge.age = 79

In [None]:
y = ['david', 'brooke', 'bilge']
x = [david, brooke, bilge]
for i in range(len(x)):
    print(y[i])
    print(x[i])

In [None]:
print('memory address of david', id(david))
print('memory address of brooke', id(brooke))
print('memory address of bilge', id(bilge))