## Study session 11 - classes
### BIOINF 575 - Fall 2020


SOLUTION

### Resources - classes and object oriented programming

https://docs.python.org/3/tutorial/classes.html   
https://www.python-course.eu/python3_properties.php   
https://www.tutorialspoint.com/python3/python_classes_objects.htm   
https://www.w3schools.com/python/python_classes.asp   
https://www.geeksforgeeks.org/python-classes-and-objects/  
https://www.datacamp.com/community/tutorials/property-getters-setters#property


____

#### Below is the Pileup class to provide functionality for a pileup object that stores information regarding alignment at a specific genomic position.


In [67]:
# Pileup uses Counter so we need to import before we can create an object of that type
# The definition will run with no error bu we will not be able to use it without importing Counter

from collections import Counter

In [68]:
# Pileup Object
class Pileup:
    """
    Contains a counter of nulceotides aligned at a genomic position and 
    computes depth and consensus for a genomic position
    
    Attributes:
    counts (Counter):  counter of all nucleotides aligned at the genomic position
    depth (int): total number of reads aligned at the genomic position
    consensus (str): most common nucleotide
    """
    
    # method called when we initialize an object of type Pileup
    # p = Pileup() or p = Pileup(Counter("AAA")) will call this method
    def __init__(self, counts = None):
        self.counts = counts
        if self.counts == None:
            self.counts = Counter()
        self.depth = sum(self.counts.values())
        if self.depth == 0:
            self.__consensus = "" # using __ at the beginning of the attribute name makes it inaccessible outside of the class definition 
        else:
            self.__consensus = self.counts.most_common()[0][0]

    # see more examples in the last link in RESOURCES
    # property (the @property above the function makes it so) 
    # should be used without () when called for an object e.g. p.consensus
    # getter that allows us to display the value of a hidden attribute outside of the class definition
    @property
    def consensus(self):
        """
        Get the consensus nucleotide for the pileup
        """
        return self.__consensus 

    # method that implements what the string representation of a Pileup object is
    # for a Pileup object p this method is called when print(p) or str(p) are used
    def __str__(self):
        return f"Pileup({self.counts})"
 

    # method that implements what the string representation of a Pileup object is 
    # and that allows you to create another object idential to the one you are calling it for
    # it is called when you use the name of the object to see its value
    # typing 
    # it is called when print(p) or str(p) are used
    def __repr__(self):
        return f"Pileup({self.counts})"
    
    # used to implement the addition operator
    # if p1 and p2 are Pileup objects this is called when we compute p1+p2  
    # self will be assigned p1 and p will be assigned p2 in the __add__ method 
    def __add__(self, p):
        c = self.counts.copy() # make a copy otherwise we are updating p1 when we do p1+p2
        c.update(p.counts)
        return Pileup(c)
        
    #the first argument of every method in a class (self) will be assigned the value of the object we call it from
    # e.g if variable p contains a pileup object the statement p.update("AAA") will call this method below and assign p to self and "AAA" to seq
    def update(self, seq):
        """
        Update the counts depth and consensus for the pileup
        Given a sequence of nucleotides to add to the pileup
        """
        self.counts.update(seq)
        self.depth = sum(self.counts.values())
        self.__consensus = self.counts.most_common()[0][0]



### We copied and ran all or part of the below code for each exercise to test we did not break something while trying to implement the exercise requirement

In [69]:
p = Pileup()

In [70]:
p

Pileup(Counter())

In [71]:
# hidden attribute only accesible in the class definition
p.__consensus

AttributeError: 'Pileup' object has no attribute '__consensus'

In [72]:
#dir(p)

In [73]:
# Nothing in Python is truly private; internally
p._Pileup__consensus = 3

In [74]:
p.consensus

3

In [75]:
p.counts

Counter()

In [76]:
p.depth

0

In [77]:
# we have only implementerd a getter (a way to retrieve the value)
# a setter (a way to set the value) was not implemented 
# we want this to be changed only internally when the counts change
p.consensus = "T"

AttributeError: can't set attribute

In [78]:
# we update the counts but the consensus and depth stay the same
# we are going to make this a property so we can also update the depth and consensus when we assign a value to the counts
# see the class defintion below
p.counts = Counter("ACGTTTT")

In [79]:
p.counts

Counter({'A': 1, 'C': 1, 'G': 1, 'T': 4})

In [80]:
p.depth

0

In [81]:
p.consensus

3

In [82]:
p.depth = 40

In [83]:
p.depth

40

In [84]:
p.update("ACGTTGGGG")

In [85]:
p.counts

Counter({'A': 2, 'C': 2, 'G': 6, 'T': 6})

In [86]:
p.consensus

'G'

In [87]:
p.depth

16

### <font color = "red">Exercise</font>
#### - Make the Pileup attribute depth read only
#### - Test the change

In [88]:
# Pileup Object
class Pileup:
    """
    Contains a counter of nulceotides aligned at a genomic position and 
    computes depth and consensus for a genomic position
    
    Attributes:
    counts (Counter):  counter of all nucleotides aligned at the genomic position
    depth (int): total number of reads aligned at the genomic position
    consensus (str): most common nucleotide
    """
    
    
    def __init__(self, counts = None):
        self.__counts = counts
        if self.__counts == None:
            self.__counts = Counter()
        self.__depth = sum(self.__counts.values())
        if self.__depth == 0:
            self.__consensus = ""        
        else:
            self.__consensus = self.__counts.most_common()[0][0]

    @property # getter
    def consensus(self):
        """
        Get the consensus nucleotide for the pileup
        """
        return self.__consensus
    
    @property # getter
    def depth(self):
        """
        Get the depth for the pileup - number of reads that aliged at that position
        """
        return self.__depth
    
    @property # getter
    def counts(self):
        """
        Get the counts for the pileup - frequencies of the nucleotides that aligned at that position
        """
        return self.__counts
    
    @counts.setter # property setter used when we assign a value to the property: p.counts = Counter("AACG")
    def counts(self, counts_value):
        self.__counts = counts_value
        self.__depth = sum(self.__counts.values())
        self.__consensus = self.__counts.most_common()[0][0]

    def __str__(self):
        return f"Pileup({self.__counts})"
    
    def __repr__(self):
        return f"Pileup({self.__counts})"
    
    def __add__(self, p):
        c = self.__counts.copy()
        c.update(p.counts)
        return Pileup(c)
        
        
    def update(self, seq):
        """
        Update the counts, depth and consensus nucleotide for the pileup
        given a sequence of nucleotides to add to the pileup
        """
        self.__counts.update(seq)
        self.__depth = sum(self.__counts.values())
        self.__consensus = self.__counts.most_common()[0][0]




#### We also updated the counts to a property in the class definition above to be able to update the consensus and depth when we set up the counts

In [89]:
p = Pileup()

In [90]:
p

Pileup(Counter())

In [91]:
p.consensus

''

In [92]:
p.counts

Counter()

In [93]:
p.depth

0

In [94]:
p.consensus = "T"

AttributeError: can't set attribute

In [95]:
p.counts = Counter("ACGTTTT")

In [96]:
p.counts

Counter({'A': 1, 'C': 1, 'G': 1, 'T': 4})

In [97]:
p.depth

7

In [98]:
p.consensus

'T'

In [99]:
# depth is now read only as intended
# it can only be computed using the counts and gets updated every time the counts change

p.depth = 40

AttributeError: can't set attribute

In [100]:
p.depth

7

In [101]:
p.update("ACGTTGGGG")

In [102]:
p.counts

Counter({'A': 2, 'C': 2, 'G': 6, 'T': 6})

In [103]:
p.consensus

'G'

In [104]:
p.depth

16

### <font color = "red">Exercise</font>
#### - Implement a dunder method so that we can apply the `len()` function to the pileup object 
#### - Test the change

In [105]:
# we did not tell our class what to do when we apply the function len to our object
# we can do that by implementing the __len__ method see that in the class definition below
len(p)

TypeError: object of type 'Pileup' has no len()

In [106]:
# Pileup Object
class Pileup:
    """
    Contains a counter of nulceotides aligned at a genomic position and 
    computes depth and consensus for a genomic position
    
    Attributes:
    counts (Counter):  counter of all nucleotides aligned at the genomic position
    depth (int): total number of reads aligned at the genomic position
    consensus (str): most common nucleotide
    """
    
    
    def __init__(self, counts = None):
        self.__counts = counts
        if self.__counts == None:
            self.__counts = Counter()
        self.__depth = sum(self.__counts.values())
        if self.__depth == 0:
            self.__consensus = ""        
        else:
            self.__consensus = self.__counts.most_common()[0][0]

    @property # getter
    def consensus(self):
        """
        Get the consensus nucleotide for the pileup
        """
        return self.__consensus
    
    # @set.consensus # property setter
    # def consensus(self, cons):
    #    self.__consensus = consensus
    
    @property # getter
    def depth(self):
        """
        Get the depth for the pileup - number of reads that aliged at that position
        """
        return self.__depth
    
    @property # getter
    def counts(self):
        """
        Get the counts for the pileup - frequencies of the nucleotides that aligned at that position
        """
        return self.__counts
    
    @counts.setter # property setter
    def counts(self, counts_value):
        self.__counts = counts_value
        self.__depth = sum(self.__counts.values())
        self.__consensus = self.__counts.most_common()[0][0]

    def __str__(self):
        return f"Pileup({self.__counts})"
    
    def __repr__(self):
        return f"Pileup({self.__counts})"
    
    def __add__(self, p):
        c = self.__counts.copy()
        c.update(p.counts)
        return Pileup(c)

    # called when the len function is applied to a Pileup object
    # e.g. len(p)
    def __len__(self):
        return self.__depth
    
    # this is method to test that the first argument in the method is self
    def test(arg1, x):
        print(x)
        
        
    def update(self, seq):
        """
        Update the counts, depth and consensus nucleotide for the pileup
        given a sequence of nucleotides to add to the pileup
        """
        self.__counts.update(seq)
        self.__depth = sum(self.__counts.values())
        self.__consensus = self.__counts.most_common()[0][0]




In [107]:
p = Pileup()

In [108]:
p

Pileup(Counter())

In [109]:
p.consensus

''

In [110]:
p.counts

Counter()

In [111]:
p.depth

0

In [112]:
p.consensus = "T"

AttributeError: can't set attribute

In [113]:
p.counts = Counter("ACGTTTT")

In [114]:
p.counts

Counter({'A': 1, 'C': 1, 'G': 1, 'T': 4})

In [115]:
p.depth

7

In [116]:
p.consensus

'T'

In [117]:
p.depth = 40

AttributeError: can't set attribute

In [118]:
p.depth

7

In [119]:
p.update("ACGTTGGGG")

In [120]:
p.counts

Counter({'A': 2, 'C': 2, 'G': 6, 'T': 6})

In [121]:
p.consensus

'G'

In [122]:
p.depth

16

In [123]:
len(p)

16

In [124]:
len(5)

TypeError: object of type 'int' has no len()

In [125]:
# from the class we will apply the test method 
    ## this is method to test that the first argument in the method is self
    #  def test(arg1, x):
    #    print(x)
# The test method only has one argument
# the first argument (arg1) is not counted 
# since it is automatically assigned the value of the variable that is being called from in this case p
    
p.test()

TypeError: test() missing 1 required positional argument: 'x'

In [126]:
p.test(5)

5


In [127]:
p.test(p)

Pileup(Counter({'G': 6, 'T': 6, 'A': 2, 'C': 2}))


In [128]:
# The test method only has one argument
# the first argument (arg1) is not counted 
# since it is automatically assigned the value of the variable that is being called from

p.test(3,4)

TypeError: test() takes 2 positional arguments but 3 were given

### <font color = "red">Exercise</font>
#### - Create a list of 10 Pileup objects
#### - Update the pileup objects with the corresponding sequences fom the following list.

In [129]:
seq_list = ["CCCCATTTG","CATTTAG","GGGATC","AACTGA", "GCCCTAA", "CCCCATTTG", "AAAAAC","TTTTTTG","GGGGAT", "TTTTA"]

In [130]:

pileup_list = [Pileup(Counter(seq_list[i])) for i in range(10)]

In [131]:
pileup_list

[Pileup(Counter({'C': 4, 'T': 3, 'A': 1, 'G': 1})),
 Pileup(Counter({'T': 3, 'A': 2, 'C': 1, 'G': 1})),
 Pileup(Counter({'G': 3, 'A': 1, 'T': 1, 'C': 1})),
 Pileup(Counter({'A': 3, 'C': 1, 'T': 1, 'G': 1})),
 Pileup(Counter({'C': 3, 'A': 2, 'G': 1, 'T': 1})),
 Pileup(Counter({'C': 4, 'T': 3, 'A': 1, 'G': 1})),
 Pileup(Counter({'A': 5, 'C': 1})),
 Pileup(Counter({'T': 6, 'G': 1})),
 Pileup(Counter({'G': 4, 'A': 1, 'T': 1})),
 Pileup(Counter({'T': 4, 'A': 1}))]

In [132]:
# creating a list of empty pileups
pileup_list = [Pileup() for i in range(10)]

In [133]:
pileup_list

[Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter())]

In [134]:
# creating a list of  pileups based on the sequence in seq_list

for i in range(10):
    pileup_list[i].update(seq_list[i])

In [135]:
pileup_list

[Pileup(Counter({'C': 4, 'T': 3, 'A': 1, 'G': 1})),
 Pileup(Counter({'T': 3, 'A': 2, 'C': 1, 'G': 1})),
 Pileup(Counter({'G': 3, 'A': 1, 'T': 1, 'C': 1})),
 Pileup(Counter({'A': 3, 'C': 1, 'T': 1, 'G': 1})),
 Pileup(Counter({'C': 3, 'A': 2, 'G': 1, 'T': 1})),
 Pileup(Counter({'C': 4, 'T': 3, 'A': 1, 'G': 1})),
 Pileup(Counter({'A': 5, 'C': 1})),
 Pileup(Counter({'T': 6, 'G': 1})),
 Pileup(Counter({'G': 4, 'A': 1, 'T': 1})),
 Pileup(Counter({'T': 4, 'A': 1}))]

In [136]:
pileup_list = [Pileup() for i in range(10)]

In [137]:
pileup_list

[Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter()),
 Pileup(Counter())]

In [138]:
list(map(Pileup.update, pileup_list, seq_list))

[None, None, None, None, None, None, None, None, None, None]

In [139]:
pileup_list

[Pileup(Counter({'C': 4, 'T': 3, 'A': 1, 'G': 1})),
 Pileup(Counter({'T': 3, 'A': 2, 'C': 1, 'G': 1})),
 Pileup(Counter({'G': 3, 'A': 1, 'T': 1, 'C': 1})),
 Pileup(Counter({'A': 3, 'C': 1, 'T': 1, 'G': 1})),
 Pileup(Counter({'C': 3, 'A': 2, 'G': 1, 'T': 1})),
 Pileup(Counter({'C': 4, 'T': 3, 'A': 1, 'G': 1})),
 Pileup(Counter({'A': 5, 'C': 1})),
 Pileup(Counter({'T': 6, 'G': 1})),
 Pileup(Counter({'G': 4, 'A': 1, 'T': 1})),
 Pileup(Counter({'T': 4, 'A': 1}))]

### <font color = "red">Exercise</font>
#### - Implement a method that compares the pileup it is called for with another given pileup and returns a tuple with the consensus for the two pileups and the ratio of the consensus frequencies available for the consensus in the counts attribute. 
#### - Test the change

In [140]:
# Pileup Object
class Pileup:
    """
    Contains a counter of nulceotides aligned at a genomic position and 
    computes depth and consensus for a genomic position
    
    Attributes:
    counts (Counter):  counter of all nucleotides aligned at the genomic position
    depth (int): total number of reads aligned at the genomic position
    consensus (str): most common nucleotide
    """
    
    
    def __init__(self, counts = None):
        self.__counts = counts
        if self.__counts == None:
            self.__counts = Counter()
        self.__depth = sum(self.__counts.values())
        if self.__depth == 0:
            self.__consensus = ""        
        else:
            self.__consensus = self.__counts.most_common()[0][0]

    @property # getter
    def consensus(self):
        """
        Get the consensus nucleotide for the pileup
        """
        return self.__consensus
    
    # @set.consensus # property setter
    # def consensus(self, cons):
    #    self.__consensus = consensus
    
    @property # getter
    def depth(self):
        """
        Get the depth for the pileup - number of reads that aliged at that position
        """
        return self.__depth
    
    @property # getter
    def counts(self):
        """
        Get the counts for the pileup - frequencies of the nucleotides that aligned at that position
        """
        return self.__counts
    
    @counts.setter # property setter
    def counts(self, counts_value):
        self.__counts = counts_value
        self.__depth = sum(self.__counts.values())
        self.__consensus = self.__counts.most_common()[0][0]

    def __str__(self):
        return f"Pileup({self.__counts})"
    
    def __repr__(self):
        return f"Pileup({self.__counts})"
    
    def __add__(self, p):
        c = self.__counts.copy()
        c.update(p.counts)
        return Pileup(c)
    
    def __len__(self):
        return self.__depth
        
        
    def update(self, seq):
        """
        Update the counts, depth and consensus for the pileup
        given a sequence of nucleotides to add to the pileup
        """
        self.__counts.update(seq)
        self.__depth = sum(self.__counts.values())
        self.__consensus = self.__counts.most_common()[0][0]
        
    def compare_pileup(self, p):
        """
        Creates a tuple with three elements:
        - consensus of the pileup we are calling this method for
        - consensus of the pileup p given as an argumet
        - ration of the frequencies of the consensus in the counts for each of the pileups
        """
        tuple_res = ()
        f1 = self.__counts[self.__consensus]
        f2 = p.counts[p.consensus]
        try: 
            tuple_res = (self.__consensus, p.consensus, f1/f2)
        except ZeroDivisionError:
            tuple_res = (self.__consensus, p.consensus, 0)
        return tuple_res




In [141]:
3/0

ZeroDivisionError: division by zero

In [142]:
p = Pileup()

In [143]:
p

Pileup(Counter())

In [144]:
p.consensus

''

In [145]:
p.counts

Counter()

In [146]:
p.depth

0

In [147]:
p.consensus = "T"

AttributeError: can't set attribute

In [148]:
p.counts = Counter("ACGTTTT")

In [149]:
p.counts

Counter({'A': 1, 'C': 1, 'G': 1, 'T': 4})

In [150]:
p.depth

7

In [151]:
p.consensus

'T'

In [152]:
p.depth = 40

AttributeError: can't set attribute

In [153]:
p.depth

7

In [154]:
p.update("ACGTTGGGG")

In [155]:
p.counts

Counter({'A': 2, 'C': 2, 'G': 6, 'T': 6})

In [156]:
p.consensus

'G'

In [157]:
p.depth

16

In [158]:
len(p)

16

In [159]:
len(5)

TypeError: object of type 'int' has no len()

In [163]:
p1 = Pileup(Counter("AACTTTGAAAAA"))

In [164]:
p1

Pileup(Counter({'A': 7, 'T': 3, 'C': 1, 'G': 1}))

In [165]:
p.compare_pileup(p1)

('G', 'A', 0.8571428571428571)

In [166]:
p.compare_pileup(Pileup())

('G', '', 0)

In [167]:
pileup_list = [Pileup(Counter(seq_list[i])) for i in range(10)]

In [168]:
pileup_list

[Pileup(Counter({'C': 4, 'T': 3, 'A': 1, 'G': 1})),
 Pileup(Counter({'T': 3, 'A': 2, 'C': 1, 'G': 1})),
 Pileup(Counter({'G': 3, 'A': 1, 'T': 1, 'C': 1})),
 Pileup(Counter({'A': 3, 'C': 1, 'T': 1, 'G': 1})),
 Pileup(Counter({'C': 3, 'A': 2, 'G': 1, 'T': 1})),
 Pileup(Counter({'C': 4, 'T': 3, 'A': 1, 'G': 1})),
 Pileup(Counter({'A': 5, 'C': 1})),
 Pileup(Counter({'T': 6, 'G': 1})),
 Pileup(Counter({'G': 4, 'A': 1, 'T': 1})),
 Pileup(Counter({'T': 4, 'A': 1}))]

In [169]:
pileup_list[0].compare_pileup(pileup_list[1])

('C', 'T', 1.3333333333333333)

### An alternative is to compare the pileups and compute the tuple (variant) in a function outside of the class

In [170]:
# Pileup Object
class Pileup:
    """
    Contains a counter of nulceotides aligned at a genomic position and 
    computes depth and consensus for a genomic position
    
    Attributes:
    counts (Counter):  counter of all nucleotides aligned at the genomic position
    depth (int): total number of reads aligned at the genomic position
    consensus (str): most common nucleotide
    """
    
    
    def __init__(self, counts = None):
        self.__counts = counts
        if self.__counts == None:
            self.__counts = Counter()
        self.__depth = sum(self.__counts.values())
        if self.__depth == 0:
            self.__consensus = ""        
        else:
            self.__consensus = self.__counts.most_common()[0][0]

    @property # getter
    def consensus(self):
        """
        Get the consensus nucleotide for the pileup
        """
        return self.__consensus
    
    # @set.consensus # property setter
    # def consensus(self, cons):
    #    self.__consensus = consensus
    
    @property # getter
    def depth(self):
        """
        Get the depth for the pileup - number of reads that aliged at that position
        """
        return self.__depth
    
    @property # getter
    def counts(self):
        """
        Get the counts for the pileup - frequencies of the nucleotides that aligned at that position
        """
        return self.__counts
    
    @counts.setter # property setter
    def counts(self, counts_value):
        self.__counts = counts_value
        self.__depth = sum(self.__counts.values())
        self.__consensus = self.__counts.most_common()[0][0]

    def __str__(self):
        return f"Pileup({self.__counts})"
    
    def __repr__(self):
        return f"Pileup({self.__counts})"
    
    def __add__(self, p):
        c = self.__counts.copy() 
        c.update(p.counts)
        return Pileup(c)
    
    def __len__(self):
        return self.__depth
    
    def test(self, x):
        print(x)
        
        
    def update(self, seq):
        """
        Update the counts, depth and consensus for the pileup
        given a sequence of nucleotides to add to the pileup
        """
        self.__counts.update(seq)
        self.__depth = sum(self.__counts.values())
        self.__consensus = self.__counts.most_common()[0][0]





In [171]:
def create_variant(pn,pt):
        """
        Creates a tuple with three elements:
        - consensus of the pileup we are calling this method for
        - consensus of the pileup p given as an argumet
        - ration of the frequencies of the consensus in the counts for each of the pileups
        """
        tuple_res = ()
        f1 = pn.counts[pn.consensus]
        f2 = pt.counts[pt.consensus]
        try: 
            tuple_res = (pn.consensus, pt.consensus, f1/f2)
        except ZeroDivisionError:
            tuple_res = (pn.consensus, pt.consensus, 0)
        return tuple_res    

In [172]:
p1 = Pileup(Counter("CCCGT"))

In [173]:
create_variant(p1, Pileup())

('C', '', 0)

In [174]:
p2 = Pileup(Counter("GGGGTACGGGG"))

In [175]:
create_variant(p2, p1)

('G', 'C', 2.6666666666666665)

## Unrelated extra example

### Passing arguments to a function by value or by reference

In [229]:
## By reference - using the variable name for a list, set, dict ...

In [219]:
def test(d):
    d["A"] = 2120

In [220]:
d1 = {"A":4, "B":5}

In [221]:
d1

{'A': 4, 'B': 5}

In [222]:
test(d1)

In [223]:
d1

{'A': 2120, 'B': 5}

In [None]:
## By value - unpacking the vaariable values using * or ** for a list, set, dict ...

In [224]:
def test(**d):
    x = d["A"]
    x = x + 5
    
    d["A"] = 4000

In [225]:
test(**d1)

In [226]:
d1

{'A': 2120, 'B': 5}