# Object-Oriented Programming

In [None]:
v1 = [1, 2]
v2 = [-1, 2]

In [None]:
v1 * 2

In [None]:
v1 + v2

## Classes = Data + Behavior

In [None]:
class Point2D(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def distance_to_origin(self):
        return (self.x ** 2 + self.y ** 2) ** .5

In [None]:
p1 = Point2D(3, 4)

In [None]:
p1.distance_to_origin()

In [None]:
p2 = Point2D(3, 4)

In [None]:
p2 is p1

In [None]:
p2 == p1

In [None]:
p1

In [None]:
class Point2D(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def distance_to_origin(self):
        return (self.x ** 2 + self.y ** 2) ** .5
    
    def __eq__(self, other):
        if isinstance(other, Point2D):
            return self.x == other.x and self.y == other.y
        else:
            return False
        
    def __repr__(self):
        return f'{self.__class__.__name__}({self.x}, {self.y})'

In [None]:
p1 = Point2D(3, 4)
p2 = Point2D(3, 4)

In [None]:
p1

In [None]:
p1 == p2

In [None]:
from dataclasses import dataclass

[Dataclasses](https://docs.python.org/3/library/dataclasses.html) (since Py 3.7)

In [None]:
@dataclass
class Point2D:
    x: float
    y: float
    
    def distance_to_origin(self) -> float:
        return (self.x ** 2 + self.y ** 2) ** .5

In [None]:
p1 = Point2D(3, 4)
p2 = Point2D(3, 4)

In [None]:
p1

In [None]:
p1 == p2

## Type Hints

In [None]:
def add_one(x: int) -> int:
    return x + 1

In [None]:
add_one(42)

In [None]:
add_one('foo')

Optional (but recommended) for functions, vars, required for data classes!

## Object Attributes

In [None]:
dir(p1)

In [None]:
p1.x

In [None]:
p1.distance_to_origin

## Composition

In [None]:
import math

In [None]:
@dataclass
class Circle:
    center: Point2D
    radius: float
    
    def circumference(self):
        return 2 * math.pi * radius

In [None]:
@dataclass
class Point2D:
    x: float
    y: float
    
    def distance_to_origin(self) -> float:
        return (self.x ** 2 + self.y ** 2) ** .5
    
    def distance_from(self, other: Point2D) -> float:
        return ((self.x - other.x) ** 2 + (self.y - other.y) ** 2) ** .5

@dataclass
class Circle:
    center: Point2D
    radius: float
    
    def circumference(self):
        return 2 * math.pi * self.radius
    
    def __contains__(self, point: Point2D):
        return self.center.distance_from(point) <= self.radius

In [None]:
c = Circle(Point2D(3, 4), 1)
assert Point2D(3.5, 4) in c
assert Point2D(5, 4) not in c

## Composition of a Pandas DataFrame

In [None]:
import pandas as pd

In [None]:
transaction_df = pd.DataFrame({
    'amount': [42., 100., 999.],
    'from': ['bob', 'alice', 'bob'],
    'to': ['alice', 'bob', 'alice']
})
transaction_df

In [None]:
type(transaction_df)

In [None]:
transaction_df.columns

In [None]:
transaction_df.amount

In [None]:
transaction_df['amount']

In [None]:
type(transaction_df['amount'])

In [None]:
transaction_df['amount'][0], type(transaction_df['amount'][0])

In [None]:
dir(transaction_df)

## Index-based (or Label-based) Selection and Assignment

In [None]:
transaction_df = pd.DataFrame({
    'amount': [42., 100., 999.],
    'from': ['bob', 'alice', 'bob'],
    'to': ['alice', 'bob', 'alice']
})
transaction_df

In [None]:
transaction_df.index

In [None]:
transaction_df.loc[1]

In [None]:
transaction_df.loc[[0, 2]]

In [None]:
transaction_df = pd.DataFrame({
    'amount': [42., 100., 999.],
    'from': ['bob', 'alice', 'bob'],
    'to': ['alice', 'bob', 'alice']
}, index=[2, 4, 6])
transaction_df

In [None]:
transaction_df.loc[1]

In [None]:
transaction_messages = pd.Series(['foo', 'bar', 'baz'])
transaction_messages

In [None]:
transaction_df.assign(message=transaction_messages)

In [None]:
transaction_messages = pd.Series(['foo', 'bar', 'baz'], index=[2, 4, 6])
transaction_df.assign(message=transaction_messages)

In [None]:
transaction_df.reset_index()

In [None]:
transaction_df = pd.DataFrame({
    'amount': [42., 100., 999.],
    'from': ['bob', 'alice', 'bob'],
    'to': ['alice', 'bob', 'alice'],
    'tx_id': [101, 201, 301]
})
transaction_df

In [None]:
transaction_df.set_index('tx_id')

In [None]:
import numpy as np

In [None]:
df_size = 100_000

foo_df = pd.DataFrame({
    'a': np.arange(df_size),
    'b': np.random.permutation(df_size)
})
foo_df

Why indexing?

In [None]:
%%timeit
for n in np.random.choice(df_size, size=10):
    foo_df.loc[lambda df: df['b'] == n]

In [None]:
%timeit foo_df.loc[lambda df: df['b'] == 42]

In [None]:
idx_foo_df = foo_df.set_index('b')

In [None]:
%timeit big_foo_df = foo_df.set_index('b')

In [None]:
%%timeit
for n in np.random.choice(df_size, size=10):
    idx_big_foo_df.loc[n]

In [None]:
%timeit idx_foo_df.loc[42]

## Inheritance

In [None]:
@dataclass
class Square:
    center: Point2D
    side_length: float
    
    def circumference(self):
        return 4 * self.side_length
    
    def __contains__(self, point: Point2D):
        return (
            point.x <= self.center.x + self.side_length / 2 and
            point.x >= self.center.x - self.side_length / 2 and
            point.y <= self.center.y + self.side_length / 2 and
            point.x >= self.center.y - self.side_length / 2
        )

In [None]:
from abc import ABC, abstractmethod

In [None]:
@dataclass
class Shape2D(ABC):
    center: Point2D
    
    @abstractmethod
    def circumference(self):
        pass
    
    @abstractmethod
    def __contains__(self, point: Point2D):
        pass
    
    def distance_to_origin(self):
        return self.center.distance_to_origin()

In [None]:
s = Shape2D()

In [None]:
@dataclass
class Circle(Shape2D):
    radius: float
    
    def circumference(self):
        return 2 * math.pi * self.radius

c = Circle(Point2D(3, 4), 1)

In [None]:
@dataclass
class Circle(Shape2D):
    radius: float
    
    def circumference(self):
        return 2 * math.pi * self.radius
    
    def __contains__(self, point: Point2D):
        return self.center.distance_from(point) <= self.radius

@dataclass
class Square(Shape2D):
    side_length: float
    
    def circumference(self):
        return 4 * self.side_length
    
    def __contains__(self, point: Point2D):
        return (
            point.x <= self.center.x + self.side_length / 2 and
            point.x >= self.center.x - self.side_length / 2 and
            point.y <= self.center.y + self.side_length / 2 and
            point.x >= self.center.y - self.side_length / 2
        )

Not _required_ to have abc, just convenient and clear

In [None]:
from typing import List

def total_size(shapes: List[Shape2D]) -> float:
    return sum(s.circumference() for s in shapes)

In [None]:
total_size([
    Circle(Point2D(3, 4), 1),
    Square(Point2D(0, 0), 2)
])

sklearn estimators?

## Iterating

In [None]:
@dataclass
class IntRange:
    upper_bound: int
    
    def __iter__(self):
        self.i = 0
        return self
    
    def __next__(self):
        if self.i >= self.upper_bound:
            raise StopIteration
            
        current_value = self.i
        self.i += 1
        return current_value

In [None]:
r = IntRange(3)
r_iter = r.__iter__()

In [None]:
r_iter.__next__()

In [None]:
l_iter = iter([1, 2, 3])

In [None]:
r_iter = iter(IntRange(3))
while True:
    try:
        print(next(r_iter))
    except StopIteration:
        print('Finished iterating!')
        break

In [None]:
for i in IntRange(3):
    print(i * 2)

In [None]:
[i ** 2 for i in IntRange(3)]

Ref to `range()`!

In [None]:
from typing import List

In [None]:
Square(Point2D(0, 0), 2).__class__.__name__

In [None]:
@dataclass
class ShapeGrouper:
    shapes: List[Shape2D]
    
    def __iter__(self):
        self.shape_type_iter = iter(set([shape.__class__.__name__ for shape in self.shapes]))
        return self
    
    def __next__(self):
        shape_type = next(self.shape_type_iter)
        return shape_type, [shape for shape in self.shapes if shape.__class__.__name__ == shape_type]

__Bonus Exercise__: What happens if our list of shapes is long, having many unique shape types? Can we make our implementation more efficient by using a dictionary for quickly looking up all shapes of a given type? 

_Hint_: [`collections.defaultdict()`](https://docs.python.org/3/library/collections.html#collections.defaultdict) from the standard library may be particularly useful in this case.

In [None]:
# Your solution:

In [None]:
# %load solutions/shape_grouper_efficient.py

In [None]:
grouper = ShapeGrouper([
    Square(Point2D(1, 1), 1),
    Circle(Point2D(3, 4), 1),
    Square(Point2D(0, 0), 2)
])

In [None]:
for shape_type, shape_list in grouper:
    print(f'{shape_type}: {shape_list}')

In [None]:
@dataclass
class ShapeGrouper:
    shapes: List[Shape2D]
    
    def __iter__(self):
        self.shape_type_iter = iter(set([shape.__class__.__name__ for shape in self.shapes]))
        return self
    
    def __next__(self):
        shape_type = next(self.shape_type_iter)
        return shape_type, [shape for shape in self.shapes if shape.__class__.__name__ == shape_type]
    
    def total_size(self):
        return [
            (shape_type, sum([shape.circumference() for shape in shapes]))
            for shape_type, shapes in self
        ]

In [None]:
grouper = ShapeGrouper([
    Square(Point2D(1, 1), 1),
    Circle(Point2D(3, 4), 1),
    Square(Point2D(0, 0), 2)
])

In [None]:
grouper.total_size()

In [None]:
transaction_df

In [None]:
type(transaction_df.groupby('to'))

In [None]:
for receiver, receiver_transactions in transaction_df.groupby('to'):
    print(f'{receiver} got a total amount of {receiver_transactions["amount"].sum()}')

In [None]:
transaction_df.groupby('to').sum()

## Operator (or Method) Chaining

__Exercise__: Create a class `Vector` that implements the addition and multiplication behavior as given at the top of this module. Use the assertions below to verify the correctness of your solution.

_Hints_: 

1. There are [some magic methods](https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types) to provide an implementation of numeric operators on custom classes.
2. For the addition of vectors, [`zip()`](https://docs.python.org/3/library/functions.html#zip) can be a useful built-in function.

In [None]:
# Your solution:

In [None]:
# %load solutions/vector_basic.py

In [None]:
v1 = Vector([1., 2.])
v2 = Vector([2., 4.])
v3 = Vector([3.5, 4.5])
v4 = Vector([1, 2, 3])

assert v1 + v1 == v2
assert v1 * 2 == v2
assert v1 * 2 + v3 == Vector([5.5, 8.5])
assert v1 + v3 * 2 == Vector([8, 11])
assert v4 + v4 == Vector([2, 4, 6])

_Reflection_: Why is it convenient that our addition and multiplication methods return (new) `Vector` objects?

__Bonus Exercise__: Add methods to the `Vector` class such that it (1) also implements a lookup by dimension just as we index a list using `[]`, and (2) it can return its number of dimensions using the builtin function `len()`.

_Hint_: There are [some magic methods](https://docs.python.org/3/reference/datamodel.html#emulating-container-types) for implementing container-like behavior for custom classes.

In [None]:
# Your solution:

In [None]:
# %load solutions/vector_as_container.py

In [None]:
v5 = Vector([42, 99])
assert v5[0] == 42
assert len(v5) == 2
assert (v5 + Vector([1, 1]) * 2)[1] == 101

Reflection: `.loc[]`, pandas method chaining

In [None]:
transaction_df.loc[transaction_df['to'] == 'alice']

In [None]:
type(transaction_df.loc[transaction_df['to'] == 'alice'])

In [None]:
transaction_df['to'] == 'alice'

In [None]:
transaction_df.loc[[0, 2]]

In [None]:
(
    transaction_df
    .loc[lambda df: df['to'] == 'alice']
    .assign(amount=lambda df: df['amount'] * 2)
)