In [73]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
!python --version

In [None]:
!python -m site

In [None]:
import site
site.getsitepackages()
site.getusersitepackages()

# Python changes
## Dictionary merge and update operators

In [None]:
[1, 2, 3] + [4, 5, 6]
(1, 2, 3) + (4, 5, 6)
value = [1, 2, 3]
value +=  [4, 5, 6]
value
value = (1, 2, 3)
value += (4, 5, 6)
value

In [None]:
# intersection operator
{1, 2, 3} & {1, 4}

# union operator 
{1, 2, 4} | {1, 4}

# difference operator 
{1, 2, 3} - {1, 4}

# symmetirc difference
{1, 2, 3} ^ {1, 4}


In [None]:
# merging two dictionaries 
# i.e. dict1 | dict2
{'a': 1} | {'a': 3, 'b': 2}

mydict = {'a': 1}
mydict |= {'a': 3, 'b': 2}
mydict # returns error becuase its implmented in python 3.9

In [None]:
mydict = {'a': 1}
mydict.update({'a': 3, 'b': 2})
mydict

In [None]:
# alternative - Dict. unpacking {**dict1, **dict2}
a = {'a': 1}
b = {'a':3, 'b':2}
{**a, **b}


In [None]:
# alternative - chainmap from collections module  new_map = ChainMap(dict2, dict1)
from collections import ChainMap

user_account = {"iban": "GB71BARC20031885581746", "type": "account"}
user_profile = {"display_name": "John Doe", "type": "profile"}

# when one key overlap ChainMap instance will return the value of leftmost mapping that has 
# the specific key

user = ChainMap(user_account, user_profile) 

user["iban"], user["type"], user["display_name"]

# ChainMap is a wrapper object. i.e. does not copy object but store them reference.
# this means when the object changes, them ChainMap will return modified data.
user["display_name"]
user_profile["display_name"] = "Emile Jones"
user["display_name"]

# Also, ChainMap is writable and populates changes back to the underlying mapping.
user_profile
user_account
user["display_name"] = "John Doe"
user["age"] = 33
user["type"] = "extension"
user_profile
user_account


dict(ChainMap(user_account, user_profile))

Usually, the most important reason for using ChainMap over dictionary unpacking or the union operator is backward compatibility. On Python versions older than 3.9, you won't be able to use the new dictionary merge operator syntax. So, if you have to write code for older versions of Python, use ChainMap . If you don't, it is better to use the merge operator


# Assignment Expression

### user of walrus operator (i.e. :=)


``` python
import os
import re
import sys

import_re = re.compile(r"^\s*import\s+\.{0,2}((\w+\.)*(\w+))\s*$")
import_from_re = re.compile(r"^\s*from\s+\.{0,2}((\w+\.)*(\w+))\s+import\s+(\w+|\*)+\s*$")

if __name__ == "__main__":
    if len(sys.argv) != 2:
        file_ = "__file__"
        print(f"usage: {os.path.basename(file_)} file-name")
        sys.exit(1)
    with open(sys.argv[1]) as file:
        for line in file:
            match = import_re.search(line)
            if match:
                print(match.groups()[0])
            match = import_from_re.search(line)
            if match:
                print(match.groups()[0])
```

since there is repeation of match we can use walrus operation as
``` python
if match := import_re.match(line):
    print(match.groups()[0])

if match := import_from_re.match(line):
    print(match.groups()[0])
```


In [None]:
first_name = "Emile"
last_name = "May"
height = 168
weight = 50

user = {"first_name": first_name,
        "last_name": last_name,
        "display_name" : f"{first_name} {last_name}",
        "height": height,
        "weight": weight,
        "bmi": weight / (height / 100) ** 2, }
user

# using the walrus operation 
user = {"first_name": (first_name_ := "Emile"),
        "last_name": (last_name_ := "May"),
        "display_name" : f"{first_name_} {last_name_}",
        "height": (height_ := 168),
        "weight":(weight_ := 70),
        "bmi": weight_ / (height_ / 100) ** 2, }
user


### typing-hinting generics

In [None]:
from typing import Any, Dict

def get_ci(d: dict, key: str) -> Any:
    for k, v in d.items():
        if key.lower == k.lower():
            return v
        
# further annotating the datatype of the dict type 
# def get_ci(d: dict[str, Any], key: str) -> Any: ... this works for python 3.9
def get_ci(d: Dict[str, Any], key:str) -> Any:
    for k, v in d.items():
        if key.lower == k.lower():
            return v


### postional-only params.

In [None]:
# concatentes the strings using a delimeter
def concatenate(first: str, second: str, delim: str):
    return delim.join([first, second])


concatenate("Emile", "Arthur", " ") # position args 
concatenate(first="Emile", second="Arthur", delim=" ") # keyword args
concatenate("Emile", "Arthur", delim=" ") # mix of keyword and position args

In [None]:
def concatenate(first: str, /, *, second: str, delim: str):
    return delim.join([first, second])

The way in which you read this definition is as follows:
* All arguments preceding the / mark are positional-only arguments
* All arguments following the * mark are keyword-only arguments

In [None]:
concatenate("Emile", second="Arthur", delim=" ") # this will run and pass
concatenate("Emile", "Arthur", " ")

Using args unpacking in concatenate function

In [None]:
def concatenate(*items, delim: str):
    return delim.join(items)

concatenate("Emile", "Arthur", delim=" ")
concatenate("Emile", "Frederick", "Emile", delim=" ")
concatenate("Emile", delim=" ")
concatenate(delim=" ")

### zoneinfo module

In [None]:
from datetime import datetime
from zoneinfo import ZoneInfo

dt = datetime(2020, 11, 28, tzinfo=ZoneInfo("Europe/Warsaw"))
dt

### graphlib

module for working on graph-like data structures. A graph is a data structure consisting of nodes connected by edges. Depending on edge type, we can distinguish two main types of graphs.

* An undirected graph is a graph where every edge is undirected.
* A directed  graph is a graph where every edge is directed.

* A cyclic graph is a graph that has at least one cycle -  a closed path that starts and ends at the same node.
* Acyclic graph is a graph that does not have any cycles.

Topological sorting (`TopologicalSorter` - class in graphlib module) is an operation of order nodes of Directed Acyclic Graph (DAG) in a specific way. The result of topological sorting is a list of all nodes where every nodes appears before all nodes that you can transverse to from that node
i.e.

* The first node will be the node that cannot be tranversed to from any other node.
* Every next node will be a node from which you cannot tranverse to previous nodes.
* The last node will be a node from which you cannot traverse to any node.


Understanding use of graph;
We have a complex operation to execute that consist of multiple dependent tasks. Eg. is a job of migrating multiple db table between two different database systems.

In relational database systems, rows in tables are often cross-referenced, and the
integrity of those references is guarded by foreign key constraints. If we would like
to ensure that, at any given point in time, the target database is referentially integral,
we would have to migrate our all the tables in specific order.

* A `customers` table, which holds personal information pertaining to customers.
* An `accounts` table, which holds information about user accounts, including
their balances. A single user can have multiple accounts (for instance, personal and business accounts), and the same account cannot be accessed by multiple users.
 *A `products` table, which holds information on the products available for sale in our system.
* An `orders` table, which holds individual orders of multiple products within a
single account made by a single user.
* An `order_products` table, which holds information regarding the quantities of individual products within a single order.


In [None]:
from graphlib import TopologicalSorter

table_references = {
    "customers": set(),
    "accounts": {"customers"},
    "products": set(),
    "orders": {"accounts", "customers"},
    "order_products": {"orders", "products"},
}
sorter = TopologicalSorter(table_references)
list(sorter.static_order())

### breakpoint() function 

In [None]:
# impyport pdb; pdb.set_trace()
import ipdb
#PYTHONBREAKPOINT=ipdb.set_trace()


In [None]:
import sys
sys.setrecursionlimit(1 << 30)

def crasher():
    return crasher()

# crasher()

### Module-level `__getattr__()` and `__dir__()` functions.

Every python class can define the custom `___getattr__()` and `__dir__()` methods to customize the dynamic attributes access of objects.
The `__getattr__()` function is invoked when a given attribute name is not found to capture a missing attribute lookup and possibly generate a value on the fly.

The `__dir__()` method is called when an object is passed to the dir() function and it should return a list of object attribute names.


In [None]:
from typing import Any
from warnings import warn

def ci_lookup(d: dict[str, Any], key: str) -> Any:
    ...

def __getattr__(name: str):
    if name == "get_ci":
        warn(f"{name} is deprecated", DeprecationWarning)
        return ci_lookup
    raise AttributeError(f"module {__name__} has no attribute {name}")

### Formatting strings with f-strings

In [None]:
import math 
f"approx. value of pi: {math.pi:f}"

In [None]:
for x in range(10):
    print(f"10^{x} == {10**x:10d}")

### secret module

Used to generate random values for security context.
The three most important functions are:

* `secrets.token_bytes(nbytes=None)` : This returns nbytes of random bytes. This function is used internally by `secrets.token_hex()` and `secrets.token_urlsafe()`. If nbytes is not specified, it will return a default number of bytes, which is documented as "reasonable."
* `secrets.token_hex(nbytes=None)` : This returns nbytes of random bytes
in the form of a hex-encoded string (not a bytes() object). As it takes two hexadecimal digits to encode one byte, the resulting string will consist of nbytes × 2 characters. If nbytes is not specified, it will return the same default number of bytes as `secrets.token_bytes()`.
* `secrets.token_urlsafe(nbytes=None)` : This returns nbytes of random bytes
in the form of a URL-safe, base64-encoded string. As a single byte takes
approximately 1.3 characters in base64 encoding, the resulting string will
consist of nbytes × 1.3 characters. If nbytes is not specified, it will return the
same default number of bytes as `secrets.token_bytes()`.

In [None]:
import secrets

secrets.token_bytes(nbytes=None)
secrets.token_hex(nbytes=None)
secrets.token_urlsafe(nbytes=None)

In [None]:
a = secrets.token_urlsafe(nbytes=None)
b = c = secrets.token_urlsafe(nbytes=None)
secrets.compare_digest(a, b) # compare two string 
secrets.compare_digest(b, c)

### Union types with the | operator

In [None]:
# redefining the get_ci function to get bytes as inputs

from typing import Union, Any

def get_ci(d: dict[Union[str, bytes], Any], key: Union[str, bytes]) -> Any:
    ...


# we can rewrite the function above as 
# def get_ci(d: dict[str | bytes, Any], key: str | bytes) -> Any:  # Works on python 3.10
#     ...

### Structural pattern matching

In [None]:
# working on python 3.10
# class Point:
#     x: int
#     y: int

#     def __init__(self, x: int, y: int) -> None:
#         self.x = x
#         self.y = y

#     def where_is(point):
#         match point:
#             case Point(x=0, y=0):
#                 print("Origin")
#             case Point(x=0, y=y):
#                 print(f"Y={y}")
#             case Point(x=x, y=0):
#                 print(f"X={x}")
#             case Point():
#                 print("Somewhere else")
#             case _:
#                 print("Not a point")


# class Point:
#     x: int
#     y: int
# __match_args__ = ["x", "y"]

# def __init__(self, x, y):
#     self.x = x
#     self.y = y

# def where_is(point):
#     match point:
#         case Point(0, 0):
#             print("Origin")
#         case Point(0, y):
#             print(f"Y={y}")
#         case Point(x, 0):
#             print(f"X={x}")
#         case Point():
#             print("Somewhere else")
#         case _:
#             print("Not a point")

### Accessing super-classes

Subclassing is a convenient way of reusing existing classes by enhancing or specializing their behaviour. Subclass often rely on the behavior of their base classes but extend them with additional methods or provide completely new implementations for existing methods by overriding their definition.
Overriding methods without access to thier original implementations within the subclass would not facilitaet code resuse at all. This reuslts to python use of the `super()` function, which return as proxy object to the method implementation in all base classes.


In [None]:
from collections import UserDict
from typing import Any

class CaseInsensitiveDict(UserDict):
    # allows access to store keys thorugh case-sentive key lookup 
    def __setitem__(self, key: str, value: Any) -> None:
        return super().__setitem__(key.lower(), value)

    def __getitem__(self, key: str) -> Any:
        return super().__getitem__(key.lower())

    def __delitem__(self, key: str) -> None:
        return super().__delitem__(key.lower())

Notes -> Code Above


Our implementation of CaseInsensitiveDict relies on `collections.UserDict` instead of the built-in `dict` type. Although inheriting from the dict type is possible, we would quickly run into inconsistencies as the built-in dict type doesn't always call `__setitem__()` to update its state. Most importantly, it won't be used on object initialization and on `update()` method calls. Similar problems can arise when subclassing the list type. That's why good practice dictates to use collections. `UserDict` classes for subclassing the dict type and `collections.UserList` for
subclassing the list type.

`__setitem__()` is responsible for storing and modifying dictionary values. It would
not make sense to subclass the base dictionary type and not leverage its internal key-value storage. That's why we use `super().__setitem__()` to invoke the original set-item implementation. But before we allow the value to be stored, we transform the
key to lowercase using the `str.lower()` method. That way we ensure that all keys
stored in the dictionary will always be lowercase.

The `__getitem__()` method is analogous to the `__setitem__()` implementation. We
know that every key is transformed to lowercase before being stored in a dictionary.
Thanks to this, when key lookup occurs, we can also transform it to lowercase as
well. If the super implementation of the `__getitem__()` method does not return the
result, we can be sure that there is no case-insensitive match in the dictionary.

Last but not least, the `__delitem__()` method deletes existing dictionary keys. It
uses the same technique to transform a key to lowercase and invoke super-class
implementation. Thanks to this, we will be able to remove dictionary keys using the
`del dictionary[key]` statement.


In [None]:
headers = CaseInsensitiveDict({
    "Content-Length":  30,
    "Content-Type": "application/json",
})

headers["Content-Length"]
headers["Content-Type"]

### Multiple inheritance and Method Resolution Order

Python MRO is based on C3 linearization, the deterministic MRO algorithm
originally created for the Dylan programming language. The C3 algorithm builds
the linearization of a class, also called precedence, which is an ordered list of the
ancestors. This list is used to seek an attribute in a class inheritance tree.


In [None]:
class Base1:
    pass 

class Base2:
    def method(self):
        print("Base2.method() called")

class NewClass(Base1, Base2):
    pass

In [None]:
class CommonBase:
    pass

class Base1(CommonBase):
    pass 

class Base2(CommonBase):
    def method(self):
        print("Base2.method() called")

class NewClass(Base1, Base2):
    pass

In [None]:
NewClass.__mro__

### Class instance initilization

An object in OOP is an entity that encapsulates data together with behavior. Python, data is contained as object attributes, which are simply object variables.
Behavior, on the other hand, is represented by methods. Python classes do not require
you to define attributes in the class body.

A variable comes into existence at the time
it is initialized. That's why the canonical way to declare object attributes is through
assigning their values during object initialization in the `__init__()` method.


In [None]:
class Point:
    def __init__(self, x:int, y:int) -> None:
        self.x = x 
        self.y = y

In [None]:
from typing import List, Any
class Aggregator:
    all_aggregated: List[Any]
    last_aggregated: Any

    def __init__(self) -> None:
        self.all_aggregated = []
        self.last_aggregated = None

    def aggregate(self, value):
        self.last_aggregated = value
        self.all_aggregated.append(value)

### Attribute access patterns

In [None]:
class NewClass:
    def __init__(self) -> None:
        self.__secret_value = 1

In [None]:
instance_of = NewClass()
instance_of.__secret_value

In [None]:
instance_of._NewClass__secret_value

instance_of._NewClass__secret_value = 2

instance_of._NewClass__secret_value

### Descriptors

A descriptor lets you customize what should be done when you refer to an attribute
of an object. Descriptors are the basis of complex attribute access in Python. They are
used internally to implement properties, methods, class methods, static methods,
and super . They are objects that define how attributes of another class can be
accessed. In other words, a class can delegate the management of an attribute to
another class.

The descriptor classes are based no three special methods that form the descriptor protocol;

* `__set__(self, obj, value)`: This is called whenever the attribute is set. `setter`
* `__get__(self, obj, owner=None)`: This is called whenever the attribute is read. `getter`
* `__delete__(self, obj)`: This is called when del is invoked on the attribute.

descriptor that implements `__get__()` and `__set__()` ==> data descriptor.

if implements only `__get__()` ==> non-data descriptor.
Method of descriptor protocol ==> called by object's special   `__getattribute__()` method on every attribute lookup.

Whenever such a lookup is performed, either by using a dotted notation in the form of instance.attribute or by using the `getattr(instance, 'attribute')` function call, the `__getattribute__()`
method is implicitly invoked and it looks for an attribute in the following order:

1. It verifies whether the attribute is a data descriptor on the class object of the instance
2. If not, it looks to see whether the attribute can be found in the `__dict__` lookup of the instance object
3. Finally, it looks to see whether the attribute is a non-data descriptor on the
class object of the instance

In other words, data descriptors take precedence over the `__dict__` lookup, and the `__dict__` lookup takes precedence over non-data descriptors.


In [None]:
class RevealAccess(object):
    """A data descriptor that sets and return values normally and prints a
       message logging their accesss. 

    Args:
        object ([type]): [description]
    """

    def __init__(self, initval=None, name='var') -> None:
        self.val = initval
        self.name = name

    def __get__(self, obj, objtype):
        print("Retrieving", self.name)
        return self.val

    def __set__(self, obj, val):
        print('Updating', self.name)
        self.val = val

    def __delete__(self, obj):
        print('Deleting', self.name)
        

class NewClass(object):
    x = RevealAccess(10, 'var "x"')
    y = 5

xp = NewClass()

xp.x
xp.y

In [None]:
m = NewClass()
m.x
m.y
m.x = 15
m.x

del m.x

Descriptors also power the mechniasm behind the classmethod and staticmethod decorators. This is becuase function objects are non-data descriptors too.

In [None]:
def function(): pass

hasattr(function, '__get__')
hasattr(function, '__set__')

In [None]:
# lazily evaluated attributes

class InitOnAccess:
    def __init__(self, init_func, *args, **kwargs):
        self.klass = init_func
        self.args = args 
        self.kwargs = kwargs
        self._initialized = None

    def __get__(self, instance, owner):
        if self._initialized is None:
            print("initialized!")
            self._initialized = self.klass(*self.args, **self.kwargs)
        else:
            print('cached!')
        return self._initialized


import random

class WithSortedRandoms:
    lazily_initialized = InitOnAccess(sorted, [random.random() for _ in range(5)])


m = WithSortedRandoms()
m.lazily_initialized

In [None]:
m.lazily_initialized

In [None]:
class lazy_property(object):
    def __init__(self, function):
        self.fget = function

    def __get__(self, obj, cls):
        value = self.fget(obj)  ## setter is on obj. thus new object will be created when code runs
        setattr(obj, self.fget.__name__, value)
        return value

class WithSoretedRandoms:
    @lazy_property
    def lazily_initialized(self):
        return sorted([[random.random() for _ in range(5)]])

In [None]:
# here everytime you run method a new values are initialized.
WithSoretedRandoms().lazily_initialized
WithSoretedRandoms().lazily_initialized

In [None]:

class lazy_class_attribute(object):
    def __init__(self, function) -> None:
        self.fget = function

    def __get__(self, obj, cls):
        value = self.fget(cls)      #get is on class thus same object will be returned when class is called.
        setattr(cls, self.fget.__name__, value)
        return value


class MyComplexClass:
    @lazy_class_attribute
    def evaluated_only_once(self):
        print("Evaluation of a method!")
        return sum(x ** 2 for x in range(200))


MyComplexClass().evaluated_only_once
MyComplexClass().evaluated_only_once

In [None]:
class WithSoretedRandoms_:
    @lazy_class_attribute
    def lazily_initialized(self):
        return sorted([[random.random() for _ in range(5)]])

WithSoretedRandoms_().lazily_initialized
WithSoretedRandoms_().lazily_initialized

### Properties

In a fully encapsulated class, as few methods as possible should be exposed to public. Any write and read access to an object's state should be exposed through setter and getter methods that are able to guard proper usage.
Example below is an encapsulated UserAccount class that does not use the feature of properties.


In [None]:
class UserAccount:
    """Similar code that a C++ or java programmer could write.
    """
    def __init__(self, username: str, password: str):
        self._username = username
        self._password = password

    def get_username(self):
        return self._username

    def set_username(self, username: str):
        self._username = username

    def get_password(self):
        return self._password

    def set_password(self, password: str):
        self._password = password

In [None]:
class UserAccount:
    """In a pythonic way, we can hide a specific field behind a property.
    """
    def __init__(self, username: str, password: str):
        self._username = username 
        self._password =  password

    # def get_username(self):
    #     return self.username
    
    # def set_username(self, value: str):
    #     self.username = value
    @property
    def username(self):
        return self._username
    
    @username.setter
    def username(self, value: str):
        self._username = value

    @property
    def password(self):
        return self._password

    @password.setter
    def password(self, value: str):
        self._password = value

In [None]:
x = UserAccount(username="Emile", password="kkk")

In [None]:
x.password, x.username = "may", "Kindg"
x.password, x.username

In [None]:
from typing import Union

class Rectangle:
    def __init__(self, x1: Union[int, float], y1: Union[int, float], x2: Union[int, float], y2: Union[int, float]) -> None:
        self.x1, self.y1 = x1, y1
        self.x2, self.y2 = x2, y2

    def _width_get(self) -> Union[int, float]:
        return self.x2 - self.x1

    def _width_set(self, value: Union[int, float]) -> None:
        self.x2 = self.x1 + value

    def _height_get(self) -> Union[int, float]:
        return self.y2 - self.y1

    def _height_set(self, value: Union[int, float]) -> None:
        self.y2 = self.y1 + value

    width = property(_width_get, _width_set, doc="rectangle width measured from left")
    height = property(_height_get, _height_set, doc="rectangle height measured from top")

    def __repr__(self):
        return "{}({}, {}, {}, {})".format(self.__class__.__name__, self.x1, self.y1, self.x2, self.y2)


rectangle = Rectangle(10, 10, 25, 34)
rectangle.width, rectangle.height

rectangle.width = 100
rectangle

rectangle.height = 100
rectangle

help(Rectangle)

In [None]:
class MetricRectangle(Rectangle):
    def _width_get(self) -> str:
        """Overriding the _width_get class. But this will not work"""
        return f"{self.x2 - self.x1} meters"


MetricRectangle(0, 0, 100, 100).width

In [None]:
class MetricRectangle(Rectangle):
    def _width_get(self) -> str:
        return f"{self.x2 - self.x1} meters"
    
    """Overriding the derived class."""
    width = property(_width_get, Rectangle.width.fset)


MetricRectangle(0, 0, 100, 100).width

In [None]:
# Making the code more readable and maintainable
class Rectangle_:
    def __init__(self, x1: Union[int, float], y1: Union[int, float], x2: Union[int, float], y2: Union[int, float]) -> None:
        self.x1, self.y1 = x1, y1
        self.x2, self.y2 = x2, y2

    @property
    def width(self) -> Union[int, float]:
        """Rectangle width measured from left."""
        return self.x2 - self.x1

    @width.setter
    def width(self, value: Union[int, float]):
        self.x2 = self.x1 + value

    @property
    def height(self):
        """Rectangle height measured from top."""
        return self.y2 - self.y1

    @height.setter
    def height(self, value: Union[int, float]):
        self.y2 = self.y1 + value


rectangle = Rectangle(10, 10, 25, 34)
rectangle.width, rectangle.height

rectangle.width = 100
rectangle

rectangle.height = 100
rectangle

help(Rectangle)

In [None]:
class MetricRectangle(Rectangle_):
    def _width_get(self) -> str:
        return f"{self.x2 - self.x1} meters"
    
    """Overriding the derived class."""
    width = property(_width_get, Rectangle_.width.fset)


MetricRectangle(0, 0, 100, 100).width

### Dynamic polymorphism

Polymorphism abstracts the interface of an object from its type. For statically types language polymorphism is achived through:

* Subtyping: Subtypes of type A can be used in every interface that expects type A. Interfaces are defined explicity and subtypes/subclass inherit interfaces of their parents.
* Implicit interfaces: Every type can be used in the interface that expects an interface of type A as long as it implements the same methods (has the interface) as type A. The declarations of interfaces are still defined explicitly but subclass/subtypes don't have to explicitly inherit from the base classes/types that define an interface.

Python is a dynamically typed lang., so uses duck typing (i.e. if it walks like a duck and it quacks like a duck, then it must be a duck.)


In [None]:
def printfile(file):
    try:
        contents = file.read()
        print(file)
    finally:
        file.close()

The code above notes:

From the signature of the printfile() function, we can already guess that it expects a file or a file-like object (like StringIO from the io module). But the truth is this function will consume any object without raising an unexpected exception if we are able to ensure for the input argument that:

* The file argument has a read() method
* The result of file.read() is a valid argument to the print() function
* The file argument has the close() method


### Operator overloading

Operator overloading is a specific type of polymorphism that allows the language to have different implementations of specifi operators depending on the types of opernads.
Operator in prog. lang. are already polymorphic. Consider the following expersion tha would be valid in python operation.
```
7 * 6
3.14 * 2
["a", "b"] * 2
```

### Dunder methods (language protocols)

The Python data model specifies a lot of specially named methods tha can be overridden in your custom classes to provide them with additional syntax capabilities. You can recognize these methods by their specific naming conventions that wrap the method name with double underscores. They are referred as dunder methods. The most common dunder methods is `__init__()`, which is used for classs initailization.

Protocol language:

Callable protocol => `__call__()` => allows objects to be called with parenthese: `instance()`

Desciptor protocol => `__set__()`,`__get__()`, `__del__()` => allows us to manipulate the attributes access of pattern of classes.

Container protocol => `__contains__()` => allows us to test wether or not an object contains some values using the in keyword. `value in instance()`

Iterable protocol => `__iter__()` => allows objects to be iterated using the for keyword. `for value in instance`

Sequence protocol => `__getitem__()`, `__len_()` => allow objects to be indexed with square bracket syntax and queried for length using built-in function: `item = instance[index], lenght = len(instance)`


In [None]:
class Matrix:
    def __init__(self, rows) -> None:
        """Accepts lis tof matrix rows as inputs args. through args. unpacking. As every row
        needs  to have the same number of columns, we iterate over them and verify each have the 
        same length."""
        if len(set(len(row) for row in rows)) > 1:
            raise ValueError("All matrix rows must be the same length")

        self.rows = rows

    def __add__(self, other):
        if (len(self.rows)  != len(other.rows) or len(self.rows[0]) != len(other.rows[0])):
            raise ValueError("Matrix dimensions don't match")
        return Matrix([[a + b for a, b in zip(a_row, b_row)] for a_row, b_row in zip(self.rows, other.rows)])

    def __sub__(self, other):
        if (len(self.rows)  != len(other.rows) or len(self.rows[0]) != len(other.rows[0])):
            raise ValueError("Matrix dimensions don't match")
        return Matrix([[a - b for a, b in zip(a_row, b_row)] for a_row, b_row in zip(self.rows, other.rows)])

    def __mul__(self, other):
        if isinstance(other, Matrix):
            if len(self.rows[0]) != len(other.rows):
                raise ValueError("Matrix dimensions don't match")

            rows = [[0 for _ in other.rows[0]] for _ in self.rows]
            
            for i in range(len(self.rows)):
                for j in range(len(other.rows[0])):
                    for k in range(len(other.rows)):
                        rows[i][j] += self.rows[i][k] * other.rows[k][j]
            return Matrix(rows)
        elif isinstance(other, int):
            rows = [[item * other for item in row] for row in self.rows]
            return Matrix(rows)
    
        else:
            raise TypeError(f"Don't know how to multiply {type(other)} with Matrix")

    def __rmul__(self, other):
        if isinstance(other, Matrix):
            if len(self.rows[0]) != len(other.rows):
                raise ValueError("Matrix dimensions don't match")

            rows = [[0 for _ in other.rows[0]] for _ in self.rows]
            
            for i in range(len(self.rows)):
                for j in range(len(other.rows[0])):
                    for k in range(len(other.rows)):
                        rows[i][j] += self.rows[i][k] * other.rows[k][j]
            return Matrix(rows)
        elif isinstance(other, int):
            rows = [[item * other for item in row] for row in self.rows]
            return Matrix(rows)
    
        else:
            raise TypeError(f"Don't know how to multiply {type(other)} with Matrix")

In [None]:
x = Matrix([[1,2,4],[5,6,7]])
y = Matrix([[1,2,4],[5,6,7], [2,5,6]])

In [None]:
(x*y).rows

In [None]:
(Matrix([[1,2,4],[5,6,7]])*5).rows


In [None]:
(5*Matrix([[1,2,4],[5,6,7]])).rows

### Function and method overloading

Overloading allows you to have multiple implementations of a single function by using different call signatures. Either the language's complier or interpretor is able to select mathcing implementation based on the set of function calls args provided. Function overloading is usually resolved based on:

* Function arity (number of parameters): two func. definitions can share a function name if thier signatures expect a different number of params.
* Types of parameers: Two function can share a function name if their signatures expect different types of params.

Python lacks an overloading mechanism for func. and methods other than operator overlaoding. If you define multiple functions in a single module that share the same name, the latter definition will always shadow all previous definitions.

Python provides several alternatives:

* Using methods and/or subclassing: instead of relying on a function to distinguish the parameter type, you can bind it to a specifi type by defining it as a method of that type. -- For this kind of solution, pushing functions implementation directly to class definitions as methods will not make any sense if said method doesnt constitute unique object behavior.
* Using arg and keyword arg unpacking: Python allow for some flexibility regarding func signature to support variable number of args via `*args` and `**kwargs` patterns. -- This kind of solution is hard to maintain and args unpacking can make function signatures vague.
* Using type checking: The `isinstance()` func allows us to test input args against specific types of base classes to decide how to handle them. -- THis method is the most reliable one.


### Single-dispatch functions

In situation when an alternative to function overling is requrired and the number of alternative function implementations is really large, using multiple `if isintance(...)` clause get out of hands.
good design pratice dictates writing a small, sinlge-purpose function. One large function that branches over serveral types to handle inputs args differently is rarely a good design.
Python's `functools.singledispatch()` decorator allows to register multiple implementations of a function.

Those implementations can take any number of arguments but implementations will be dispatched depending on the type of the first argument. Single dispatch starts with a definition of a function that will be used by default for any non-registered type. Let's assume that we need a function that can output various variables in human-readable format for the purpose of a larger report being displayed in the console output.


In [None]:
from functools import singledispatch

@singledispatch
def report(value):
    return f"raw: {value}"

We can registering different implementations for various types using report.register() decorator. That decorator is able to read a function args type annotations to register specific type handles. Eg. we want datetime object to be reported in ISO format.

In [None]:
from datetime import datetime
from numbers import Real

@report.register
def _(value: datetime):
    return f"dt: {value.isoformat()}"


@report.register
def _(value: complex):
    return f"complex: {value.real}{value.imag:+}j"

@report.register
def _(value: Real):
    return f"real: {value:f}"


Notes of function above:

Note that we used the `_` token as the actual function name.
That serves two purposes.

* First, it is a convention for names of objects that are not supposed to be used explicitly. 
* Second, if we used the report name instead, we would shadow the original function, thus losing the ability to access it and register new types.


In [None]:
report(datetime.now())
report(100-30j)
report(100)
report('January')


In [None]:
for key, value in report.registry.items():
    print(f"{key} -> {value}")


### Data classes

In [None]:
from typing import Union

class Vector:
    def __init__(self, x:Union[int, float], y:Union[int, float]) -> None:
        self.x = x 
        self.y = y

    def __add__(self, other: Vector) -> Vector:
        """Add two vectors using + operator"""
        return Vector(self.x + other.x, self.y + other.y)

    def __radd__(self, other: Vector) -> Vector:
        """Add two vectors using + operator"""
        return Vector(self.x + other.x, self.y + other.y)

    def __sub__(self, other: Vector) -> Vector:
        """sub two vector using - operator"""
        return Vector(self.x - other.x, self.y - other.y)
    
    def __rsub__(self, other: Vector) -> Vector:
        """sub two vector using - operator"""
        return Vector(self.x - other.x, self.y - other.y)

    def __repr__(self):
        """Return textual representation of vector"""
        return f"<Vector: x={self.x}, y={self.y}"
    
    def __eq__(self, other):
        """Compare two vectors for equality."""
        return self.x == other.x and self.y == other.y

    def __req__(self, other):
        """Compare two vectors for equality."""
        return self.x == other.x and self.y == other.y
        

In [None]:
Vector(1,2) == Vector(5,6)
Vector(4, 4) == Vector(4,4)
Vector(1,3) + Vector(8, 10)
Vector(1,3) - Vector(8, 10)

Using dataclasses. Dataclass take care of __init__, __repr__ and __eq__ methods.
`dataclass` class decorator reads attribute annotations of the vector class and auto. creates  `__init__()`, `__repr__()` and `__eq__()` methods. The default equalty comparism assumes that the two instances are equal if all their respective attributes are equal to each other.


In [None]:
from dataclasses import dataclass

@dataclass
class Vector:
    x : Union[int, float]
    y : Union[int, float]

    def __add__(self, other):
        """Add two vectors using + operator"""
        return Vector(self.x + other.x, self.y + other.y)

    def __radd__(self, other):
        """Add two vectors using + operator"""
        return Vector(self.x + other.x, self.y + other.y)

    def __sub__(self, other):
        """sub two vector using - operator"""
        return Vector(self.x - other.x, self.y - other.y)
    
    def __rsub__(self, other):
        """sub two vector using - operator"""

In [None]:
Vector(1,2) == Vector(5,6)
Vector(4, 4) == Vector(4,4)
Vector(1,3) + Vector(8, 10)
Vector(1,3) - Vector(8, 10)

In [None]:
# making an class instance immutable.
@dataclass(frozen=True)
class FrozenVector:
    """this class becomes immutable, so you won't be able to modify any of it attributes.
    Operation performed will create a new class."""
    x: Union[int, float]
    y: Union[int, float]

`field() constructor` allws us to specify both mutable and immutable default values for data class attrbutes in a sane and secure way without risking leaking the state between class instances.
Static and immutable defuaults values are provided using the `field(default=value)` call. The mutable value should always be passed by providing the type constructor using the `field(default_factory=constructor)` call.

In [None]:
from dataclasses import field

@dataclass
class DataClasswithDefaults:
    immutable: str  = field(default="this is static default value")
    mutable: list = field(default_factory=list)

In [None]:
DataClasswithDefaults()

### Functional Programming

Functional programming is a paradigm where the program flow is achieved mainly through the evaluation of (mathematical) functions rather than through a series of steps that change the state of the program. Purely functional programs avoid the changing of state (side effects) and the use of mutable data structures.

Genereal Concept of functional programming:

* Side effects: A function is said to have side effect if it modifies the state outside of its local environment. IOW, side effect is any observable change outside of the function scope that happens as a result of a function call. Eg. modification of global variable.

* Referential transparency: When a function or expression is referentially transparent, it can be replaced with the value tha corresponds to its output without changing the behavior of the program. Python's `pow(x,y)` is a referentially transparent. `datetime.now()` constructor method of `datetime` type doesnt seem to have any observable side ffect but will return a different value every time is called, thus is referentially opaque.

* Pure functions: A pure function is a function that does not have any side effects and that always return the same value for the same set of input args. In other words, it is function that is referentially transparent.
Every mathematical function is, by definition, a pure function. Analogously, a function that leaves a trace of its execution for the outside world (for instance, by modifying received objects) is not a pure function.

* First-class functions: Language is said to contain first-class functions if functions in this language can be treated as any other value or entity. First-class functions can be passed as rgs to other functions, returned as function return values and assigned to variables. In other words, a language
that has first-class functions is a language that treats functions as first-class citizens.

A purely function programmng language is a language that:

* Has first-class functions
* Is concered only with pure functions
* Avoids any state modification and side effects.

Python provides some functional features such as:

* Lambda functions and first-class functions.
* `map()`, `filter()`, and `reduce()` functions.
* Partial objects and functions.
* Generators and generator expressions.


### Lambda functions
Lambda functions are anonymous functions that don't have to be bound to any identifier (variable).
Lambda functions in python can be defined using  expressions. i.e. `lambda <args> : <expression>`

In [None]:
import math 

def cirlce_area(radius: Union[int, float]):
    return math.pi * radius ** 2

cirlce_area_lambda = lambda radius: math.pi * radius ** 2

cirlce_area(42), cirlce_area_lambda(42)

In [None]:
cirlce_area_lambda.__name__, cirlce_area.__name__
cirlce_area_lambda.__class__, cirlce_area.__class__

In [None]:
from dataclasses import dataclass

@dataclass
class Person:
    age: int
    weight: int
    name: str

#sorted(people, key=lambda person: person.age)

### The map(), filter(), and reduce() functions
`map(func, iterable, ...)` ==> applies the `func` function arg to every item of iterable.

`filter(func, iterable)` ==> works like `map()` by evaluating input elements one by one. `filter()` does not tansform input elements but allows us to filter out those input value that do not meet the predicate defined by the func arg.

`reduce(func, iterable)` ==> works opposite of `map()`. This function is used reduce an iterable to a single value. i.e. `reduce(func, [a, b, c, d])` returns equivaluent to `func(func(func(a, b), c), d)`.

In [None]:
list(map(lambda x: x**2, range(10)))

In [None]:
# map() function being used over multiple iterables of different sizes.
list(map(print, range(5), range(4), range(5)))

In [None]:
evens = filter(lambda number: number % 2 == 0, range(10))
odds = filter(lambda number: number % 2 == 1, range(10))

list(evens), list(odds)

In [None]:
animals = ["giraffe", "snake", "lion", "squirrel"]
animals_s = filter(lambda animal: animal.startswith("s"), animals)
list(animals_s)

In [None]:
from functools import reduce

reduce(lambda a, b: a + b, [2, 2])
reduce(lambda a, b: a + b, [2, 2, 2])

reduce(lambda a, b: a + b, range(1000))

### Partial objects and partial fuctions

Partial objects are used to slice the possible input range of a give function by setting some of it args to a fixed value.
The signature of a partial object constructor is `partial(func, *args, **kwargs)`. The partial object will behave exactly like func, but its input args will be pre-populated with `*args` and `**keywords`.


In [None]:
from functools import partial

powers_of_2 = partial(pow, 2)
powers_of_2(2)
powers_of_2(5)
powers_of_2(10)

### Generators

Generators provide an elegant way to write simple and efficient code for functions that return a seq. of elements. Based on the `yield` statement, they allow you to pause a function and return an intermediate results. The fuction save its execution context and can be resumed later, if necessary.


In [None]:
def fibonacci():
    a, b = 0, 1
    while True:
        yield b
        a, b = b, a + b

fib = fibonacci()
next(fib)


In [None]:
next(fib)
next(fib)
next(fib)

In [None]:
for item in fibonacci():
    print(item)
    if item > 1_0:
        break

### Generator expression

Generator expression allows you to write code in a more functional way. `(item for item in iterable_expression)`.
Generator expressions can be used as input args in any function that accepts iterables. They also allow if clauses to filter specific elements the same way a list, dict and set comprehnsions. You can often replace complex `map()` and `filter()` constructions with more readable and compact generator expressions.

### Decorators

Is a callable expression that accepts a single arg when called and returns another callable object. Any object that implements the `__call__` method is callable and cabe used as a decorator and often, objects reutrned by them are not simple functions but are instances of more complex classes that are implemented thier own `__call__` method.

E.g. of a decorator.
```
@some_decorator
def decorated_function():
    pass
```

code above is equivalent to 
```
def decorated_function():
    pass
decorated_function = some_decorator(decorated_function)
```


### Enumerations

They are useful for encoding a closed set of values for variable or function args.


In [None]:


from enum import Enum, auto

class Weekday(Enum):
    MONDAY = 0
    TUESDAY = 1
    WEDNESDAY = 2
    THURSDAY= 3
    FRIDAY = 4
    SATURDAY = 5
    SUNDAY = 6


class Weekday_(Enum):
    MONDAY = auto()
    TUESDAY = auto()
    WEDNESDAY = auto()
    THURSDAY= auto()
    FRIDAY = auto()
    SATURDAY = auto()
    SUNDAY = auto()


class OrderStatus(Enum):
    PENDING = auto()
    PROCESSING = auto()
    PROCESSED = auto()

In [None]:
Weekday_.__members__
Weekday.__members__
OrderStatus.__members__

In [None]:
class Order:
    def __init__(self) -> None:
        self.status = OrderStatus.PENDING

    def process(self):
        if self.status == OrderStatus.PROCESSED:
            raise ValueError("Can't process order that has been already processed.")
        self.status = OrderStatus.PROCESSING
        ...
        self.status = OrderStatus.PROCESSED

In [None]:
from enum import Flag

class Side(Flag):
    GUACAMOLE = auto()
    TORTILLA = auto()
    FRIES = auto()
    BEER = auto()
    POTATO_SALAD = auto()

In [None]:
Side.__members__, Side.__flags__

In [None]:
mexican_side = Side.GUACAMOLE | Side.BEER | Side.TORTILLA
bavarian_side = Side.BEER | Side.POTATO_SALAD

common_side = mexican_side & bavarian_side

mexican_side, bavarian_side

Side.GUACAMOLE in mexican_side, Side.TORTILLA in bavarian_side

### Interfaces, Pattern and Modularity


Idioms are small and well-recognised programming patterns for solving small problems. The key x'tics of a programming idiom is that is specific to a single programming language.

Design patterns deals with larger code structure -functions and classes. Design pattern are reusable solutions to many common design problems appearing in software engineering. Design patterns can be expressed using many programming languages.


### Interfaces

An interface is an intermediary that takes part in the interaction between two entities. Eg. interface of a car consists mainly of steering wheel, pedals, dashboard and etc.

In programming interface means;

* The overall shape of the interaction plane that code can have.;
The interface of a function, for instance, will be the name of the function and its input args and the output it returns. The interface of an object will be all of its methods that can be invoked and all the attributes that can be accessed.
OS hae interfaces in the form of filesystems and system calls. Web and remote services have interfaces in teh form of communcation protocols.

* The abstract definition of possible interactions with the code that is intentionally separated from its implementation.;
Interface is understood as a contract that a specific element of a code declares to fulfill. Such a formal interface can be extracted from the implementation and can live as a standalone entity. This gives the possibility to build applications that depends on a specific interface but don't care about the actual implemenation, as long as it exists and fulfills the contract.


### zope.interface
The interface concept of zope.interface works best for areas where a single abstraction can have multiple implementations or can be applied to different objects that probably shouldn't be tangled with inheritance structure.
We build a simple collider system that can detect collisions between multiple overlapping objects. The algo to be used is the Axis-Aligned Bouding Box (AABB). It is the simplest way to detect a collision between two axis-aligned rectangles. It assumes that all elements that will be rested can be constrained with a rectangular bounding box.


In [None]:
import itertools
from dataclasses import dataclass
from typing import Iterable

def rects_collide(rect1, rect2) -> bool:
    """Check collision between two rectangle.
    Rectangle coordinates:
    ┌─────(x2, y2)
    │
    │
    (x1, y1) ────┘
    """
    return (rect1.x1 < rect2.x2 and rect1.x2 > rect2.x1 and 
            rect1.y1 < rect2.y2 and rect1.y2 > rect2.y1)


def find_collisions(objects: Iterable):
    """Detects all collisions within a batch of objects."""
    return [(item1, item2) for item1, item2 in itertools.combinations(objects, 2)
            if rects_collide(item1.bounding_box, item2.bounding_box)]


@dataclass
class Box:
    x1: float
    y1: float
    x2: float
    y2: float

@dataclass
class Square:
    x: float
    y: float
    size: float
    
    @property
    def bounding_box(self) -> Box:
        return Box(self.x, self.y, self.x +self.size, self.y + self.size)


@dataclass
class Rect:
    x: float
    y: float
    width: float
    height: float

    @property
    def bounding_box(self) -> Box:
        return Box(self.x, self.y, self.x + self.width, self.y + self.height)


@dataclass
class Circle:
    x: float
    y: float
    radius: float

    @property
    def bounding_box(self) -> Box:
        return Box(self.x - self.radius, self.y - self.radius, self.x + self.radius, self.y + self.radius)

In [None]:
for collision in find_collisions([Square(0, 0, 10), Rect(5, 5, 20, 20),Square(15, 20, 5), Circle(1, 1, 2)]):
    print(collision)

In [None]:
itertools.combinations([Square(0, 0, 10), Rect(5, 5, 20, 20),Square(15, 20, 5), Circle(1, 1, 2)], 2)

In [None]:

from zope.interface import Interface, Attribute, implementer
from zope.interface.verify import verifyObject

class ICollidable(Interface):
    bounding_box = Attribute("Object's bounding box")


@implementer(ICollidable)
@dataclass
class Square:
    x: float
    y: float
    size: float
    
    @property
    def bounding_box(self) -> Box:
        return Box(self.x, self.y, self.x +self.size, self.y + self.size)


@implementer(ICollidable)
@dataclass
class Rect:
    x: float
    y: float
    width: float
    height: float

    @property
    def bounding_box(self) -> Box:
        return Box(self.x, self.y, self.x + self.width, self.y + self.height)


@implementer(ICollidable)
@dataclass
class Circle:
    x: float
    y: float
    radius: float

    @property
    def bounding_box(self) -> Box:
        return Box(self.x - self.radius, self.y - self.radius, self.x + self.radius, self.y + self.radius)


def find_collisons(objects: Iterable):
    """Detects all collisions within a batch of objects."""
    for item in objects:
        verifyObject(ICollidable, item)
    
    return [(item1, item2) for item1, item2 in itertools.combinations(objects, 2)
            if rects_collide(item1.bounding_box, item2.bounding_box)]
    

In [None]:
# raise error
@dataclass
class Point:
    x: float
    y: float


for collision in find_collisons([Point(1,2), Square(0, 0, 10), Rect(5, 5, 20, 20),Square(15, 20, 5), Circle(1, 1, 2)]):
    print(collision)

In [None]:
# raise error
@implementer(ICollidable)
@dataclass
class Point:
    x: float
    y: float


for collision in find_collisons([Point(1,2), Square(0, 0, 10), Rect(5, 5, 20, 20),Square(15, 20, 5), Circle(1, 1, 2)]):
    print(collision)

In [None]:

from zope.interface import Interface, Attribute, invariant, implementer
from zope.interface.verify import verifyObject
from typing import Iterable


class IBBOX(Interface):
    x1 = Attribute("lower-left x coordinate")
    y1 = Attribute("lower-left y coordinate")
    x2 = Attribute("upper-right x coordinate")
    y2 = Attribute("upper-right y coordinate")


class ICollidate(Interface):
    bounding_box = Attribute("Object's bounding box")
    invariant(lambda self: verifyObject(IBBOX, self.bounding_box))


def find_collisons(objects: Iterable):
    """Detects all collisions within a batch of objects."""
    for item in objects:
        verifyObject(ICollidable, item)
        ICollidable.validateInvariants(item)  # indepth verification of the object.
    
    return [(item1, item2) for item1, item2 in itertools.combinations(objects, 2)
            if rects_collide(item1.bounding_box, item2.bounding_box)]

In [None]:
@implementer(ICollidable)
@dataclass
class Square:
    x: float
    y: float
    size: float
    
    @property
    def bounding_box(self) -> Box:
        return Box(self.x, self.y, self.x +self.size, self.y + self.size)


@implementer(ICollidable)
@dataclass
class Rect:
    x: float
    y: float
    width: float
    height: float

    @property
    def bounding_box(self) -> Box:
        return Box(self.x, self.y, self.x + self.width, self.y + self.height)


@implementer(ICollidable)
@dataclass
class Circle:
    x: float
    y: float
    radius: float

    @property
    def bounding_box(self) -> Box:
        return Box(self.x - self.radius, self.y - self.radius, self.x + self.radius, self.y + self.radius)


for collision in find_collisons([Square(0, 0, 10), Rect(5, 5, 20, 20),Square(15, 20, 5), Circle(1, 1, 2)]):
    print(collision)

In [None]:
# raise error
@implementer(ICollidable)
@dataclass
class Point:
    x: float
    y: float


for collision in find_collisons([Point(1,2), Square(0, 0, 10), Rect(5, 5, 20, 20),Square(15, 20, 5), Circle(1, 1, 2)]):
    print(collision)

### Using function annotations and abstract base classes.

In performing type comparism, it is less harmful to use `assert isinstance(instance, list)` instead of `assert type(instance) == list)`. Also you can check a large range of types  via `assert isintance(instance, (list, tuple, range))`.
`ABCs` is a class that does not need to provide a concrete implementation, but instead defines a bluerpint of a classs that may be used to check against type compatibility.
Abstract base classes are used for two purposes:

* checking for implementation completeness.
* checking for implicity interface compatibility.

The `@abstractmethod` decorator denotes a part of the interface that must be implemnted (by overriding) in classes that will subclass our `ABC`. If a class will have a nonoveridden method or property, you won't be able to instantiate it. Any attempt to do so will result in a `TypeError` exception.


In [None]:
from abc import ABC, abstractmethod

class DummyInterface(ABC):

    @abstractmethod
    def dummy_method(self): 
        ...

    @property
    @abstractmethod
    def dummy_property(self):
        ...

In [None]:
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Iterable


class ColliderABC(ABC):
    @property
    @abstractmethod
    def bounding_box(self):
        ...

@dataclass
class Square(ColliderABC):
    ...


@dataclass
class Rect(ColliderABC):
    ...

@dataclass
class Circle(ColliderABC):
    ...


def find_collisions(objects: Iterable):
    for item in objects:
        if not isinstance(item, ColliderABC):
            raise TypeError(f"{item} is not a collider")
    return [(item1, item2) for item1, item2 in itertools.combinations(objects, 2)
            if rects_collide(item1.bounding_box, item2.bounding_box)]

In [None]:
# __subclasshook__(cls) method allows you to inject your own logic into the procedure that 
# determines whether the object is an instance of a given class.

# extending our ColliderABC interface.ABC

class ColliderABC(ABC):
    @property
    @abstractmethod
    def bounding_box(self):
        ...

    @classmethod
    def __subclasshook__(cls, C):
        if cls is ColliderABC:
            if any("bounding_box" in B.__dict__ for B in C.__mro__):
                return True
        return NotImplemented

with the `__subclasshook__()` method defined that way, `ColliderABC` becomes an implicit interface. i.e. any object will be considered an instance of `ColliderABC` as long as it structure that passes the subclass hook check. With this we can add a new components compatible with the `ColliderABC` interface without explicitly inheriting from it. E.g. below.

In [None]:
@dataclass
class Line:
    p1: Point
    p2: Point

    @property
    def bounding_box(self):
        return Box(self.p1.x, self.p1.y, self.p2.x, self.p2.y)

line = Line(Point(0,0), Point(100,100))
line.bounding_box
isinstance(line, ColliderABC)

### Using collections.abc

ABCs are like small building blocks for creating a higher level of abstraction. They allow you to implement really usable interfaces, but are very generic and designed to handle a lot more than this single design pattern.

The `collections.abc` module provides lots of predefined ABCs that allows checking for the compactibility of types with common Python interfaces. With the base classes provided in this module you can check for eg., whether a give object is callable, mapping or whether it supports iter.

The most common abstract base classes from collections.abc are:

* `Container`: This interface means that the obj supports the in operator and implements the `__contains__()` method. 
* `Iterable`: This interface means that the obj supports iteration and implements the  `__iter__()` method.
* `Callable`: This interface means that it can be called like a function and implements the `__call__()` method.
* `Hashable`: This interface means that the obj is hashable (i.e. it can be included in set and as key in dict.) and implements the `__hash__` method.
* `Sized`: This interface means that the object has size (i.e. `len()` function) and implements the `__len__()` method.


### Interface through type annotations


In [None]:
from typing import Protocol, runtime_checkable

@runtime_checkable
class IBox(Protocol):
    x1: float
    y1: float
    x2: float
    y2: float


@runtime_checkable
class ICollider(Protocol):
    @property
    def bounding_box(self) -> IBox:
        ...

Notes: In the code above;  we have used two interfaces. mypy will perform deep type verification thus adding interfaces to increase the type safety. The `@runtime_checkable` decorator extends the protocol class with `isintance()` checks. This is something performed manually for `ABCs` using subclass hooks.

In [None]:
# Taking full advantage of static type analysis.

import itertools
from dataclasses import dataclass
from typing import Iterable, Protocol, runtime_checkable


@runtime_checkable
class IBox(Protocol):
    x1: float
    y1: float
    x2: float
    y2: float


@runtime_checkable
class ICollider(Protocol):
    @property
    def bounding_box(self) -> IBox:
        ...

def rects_collide(rect1: IBox, rect2: IBox):
    """Check collision between rectangles"""
    return (rect1.x1 < rect2.x2 and rect1.x2 > rect2.x1 and 
            rect1.y1 < rect1.y2 and rect1.y2 > rect2.y1)

def find_collisions(objects: Iterable[ICollider]):
    for item in objects:
        if not isinstance(item, ICollider):
            raise TypeError(f"{item} is not a collider")
    return [(item1, item2) for item1, item2 in itertools.combinations(objects, 2) 
            if rects_collide(item1.bounding_box, item2.bounding_box)]
    

Notes: The beauty of implicit interfaces is there is no explicit interface declaration in a concrete class beyond the inherent interface that comes from the actual implementation.

### Inversion of control and dependency injection

IoC is a simple property of some software designs. Traditional arch. of a program is a layered structure of procedures where control always goes from top to bottom. Higher-level layeers invoke procedures from lower layers. THose invoked procedures gain control and can invoke even deeper-layered procedures before returning control upward. In traditional arch. control is passed from application to library functions. Library functions may pass it deeper to even lower-level libraries but, eventually, return it back to the application.

IoC happens when a library passes control up to the application so that the application can take part in the library behavior.

`sorted([1,2,3,4,5,6])`

The built-in sorted() function takes an iterable of items and returns a list of sorted
items. Control goes from the caller (your application) directly to the sorted()
function. When the sorted() function is done with sorting, it simply returns the
sorted result and gives control back to the caller. Nothing special.


In [None]:
# sorting by absolute distance from number 3. Integers closet to 3 should be at the beginning of the result
# list and the farthest should be at the end.

def distance_from_3(item: int):
    return abs(item - 3)

sorted([1,2,3,4,5,6], key=distance_from_3)

Notes: In the code above, the `sorted()`  function will invoke the key function on every element of the iterable args. Instead of comparing item values, it now compares the return values of the key function. => IoC happens here. => the `sorted()` function `"upcalls"` back to the `distance_from_3()` function provided by the application as an argument. Now it is a library that calls the functions from the application, and thus the flow of control is reversed.

IoC is a property of a design and not a design pattern by itself.
Examples of callback-based IoC include:

* Polymorphism: When a custom class inherits from a base class and base methods are suppposed to call custom methods.
* Ags passing: When the receiving function is supposed to call methods of the supplied object.
* Decorators: When a decorator function calls a docorated function.
* Closures: When a nested function calls a function outside of its scope.


### IoC in applications

Here we build an application that tracks web page view using `tracking pixels` and serve page view stats over an HTTPP endpoint. This technique is commonly used in tracking advertisement views or email openings. Our application will have to track counts of page views in some persistent storage.

backend will have two endpoints:

* /track: Endpoint will return an HTTP response with a `1x1` pixel GIF image. Upon request, it will store the `Refere` header and increase the number of requests associated with that value.
* /stats: Endpoint will read the top most 10 most common `Referer` values recieved on `track` endpoint and return an HTTP response containing a summary of the results in JSON format.


In [None]:
%%writefile track_backend.py

from collections import Counter
from fastapi import status, FastAPI, Response, Request
import uvicorn

app = FastAPI(title="IoC Web Application")

storage = Counter()

PIXEL = (
    b'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00'
    b'\x00\x00\xff\xff\xff!\xf9\x04\x01\x00'
    b'\x00\x00\x00,\x00\x00\x00\x00\x01\x00'
    b'\x01\x00\x00\x02\x01D\x00;'
    )

@app.get("/track")
def track(request: Request):
    try:
        referer =  request.headers["Referer"]
    except KeyError:
        return Response(status_code=status.HTTP_400_BAD_REQUEST)

    storage[referer] += 1
    
    return Response(content=PIXEL,
                    headers={
                        "Content-Type": "image/gif",
                        "Expires": "Mon, 01 Jan 1990 00:00:00 GMT",
                        "Cache-Control": "no-cache, no-store, must-revalidate",
                        "Pragma": "no-cache",
                    }
    )

@app.get("/stats")
def stats():
    return dict(storage.most_common(10))

@app.get("/test")
def test(request: Request):
    try:
        referer =  request.headers["Referer"]
    except KeyError:
        return Response(status_code=status.HTTP_400_BAD_REQUEST)
    
    storage[referer] += 1

    return """
    <html>
    <head></head>
    <body><img src="/track"></body>
    </html>
    """

if __name__ == "__main__":
    uvicorn.run(app, port=8005, host='127.0.0.1')


In [3]:
%%writefile track_backend.py

# modifying the previous function with good design in mind.

from collections import Counter
from fastapi import status, FastAPI, Response, Request
from abc import ABC, abstractmethod
from redis import Redis
from typing import Dict
import uvicorn


class ViewStorageBackend(ABC):
    @abstractmethod
    def increment(self, key: str) -> None:
        ...

    @abstractmethod
    def most_common(self, n: int) -> Dict[str, int]: 
        ...


class CounterBackend(ViewStorageBackend):
    """This implementation adapts Counter class into the ViewsStorageBackend
    Infterface.
    """
    def __init__(self):
        self._counter = Counter()

    def increment(self, key: str) -> None:
        self._counter[key] += 1

    def most_common(self, n: int) -> Dict[str, int]:
        return dict(self._counter.most_common(n))


class RedisBackend(ViewStorageBackend):
    """Storage using redis"""
    def __init__(self, redis_client: Redis, set_name: str) -> None:
        self._client = redis_client
        self._set_name = set_name

    def increment(self, key: str) -> None:
        self._client.zincrby(self._set_name, 1, key)

    def most_common(self, n: int) -> Dict[str, int]:
        return {key.decode(): int(value) for key, value 
                in self._client.zrange(self._set_name, 0, n-1,
                                       desc=True, withscores=True,)}

app = FastAPI(title="IoC Web Application")

storage = RedisBackend(Redis(host="redis"), "my-stats")

PIXEL = (
    b'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00'
    b'\x00\x00\xff\xff\xff!\xf9\x04\x01\x00'
    b'\x00\x00\x00,\x00\x00\x00\x00\x01\x00'
    b'\x01\x00\x00\x02\x01D\x00;'
    )

@app.get("/track")
def track(request: Request, storage: ViewStorageBackend = storage):
    try:
        referer =  request.headers["Referer"]
    except KeyError:
        return Response(status_code=status.HTTP_400_BAD_REQUEST)
    storage.increment(referer)
    
    return Response(content=PIXEL,
                    headers={
                        "Content-Type": "image/gif",
                        "Expires": "Mon, 01 Jan 1990 00:00:00 GMT",
                        "Cache-Control": "no-cache, no-store, must-revalidate",
                        "Pragma": "no-cache",
                    }
    )

@app.get("/stats")
def stats(storage: ViewStorageBackend = storage):
    return storage.most_common(10)

@app.get("/test")
def test(request: Request, storage: ViewStorageBackend = storage):
    return """
    <html>
    <head></head>
    <body><img src="/track"></body>
    </html>
    """

if __name__ == "__main__":
    uvicorn.run(app, port=8005, host='127.0.0.1')

Overwriting track_backend.py


### Dependency Injection


### Concurrency

Concurrency is a property of a program, algorithm, or problem, whereas parallelism is just one of the possible approaches to problems that are concurrent.

By extrapolating events to programs, algorithms, or problems, we can say that something is concurrent if it can be fully or partially decomposed into components (units) that are order-independent. Such units may be processed independently from each other, and the order of processing does not affect the final result.
This means that they can also be processed simultaneously or in parallel. If we process
information this way (that is, in parallel), then we are indeed dealing with parallel processing. But this is still not obligatory.


#### Multithreading

This is characterized by running multiple threads of execution that share the memory context of the parent process. works in applications that do alot of I/O operations or need to maintain UI responsiveness. Its fairly lightweight but comes ceveats and memory safety risks.
Work can be split into threads and run simultaneously. Threads are still bound to the parent process and can easy communicate becuase they share the same memory context. The execution of threads is coordinated by the OS kernel.
Multithreading will benefit from a multiprocessor or multicore machines, where each thread can be executed on a separate CPU core, thus making the program run faster.


In [5]:
%%writefile test_thread.py
from threading import Thread

def my_func():
    print("printing from thread")

if __name__ == "___main__":
    thread = Thread(target=my_func)
    thread.start()
    thread.join()

Writing test_thread.py


In [7]:
!python test_thread.py

In [16]:
%%writefile test_thread.py
from threading import Thread

def my_func():
    print("printing from thread")

if __name__ == "__main__":
    threads = [Thread(target=my_func) for _ in range(10)]
    for thread in threads:
        thread.start()

    for thread in threads:
        thread.join()


Overwriting test_thread.py


In [18]:
!python test_thread.py

printing from thread
printing from threadprinting from thread

printing from thread
printing from threadprinting from thread
printing from thread

printing from thread
printing from thread
printing from thread


In [15]:
%%writefile test_thread2.py
from threading import Thread

thread_visits = 0

def visit_counter():
    global thread_visits
    for i in range(100_000):
        value = thread_visits
        thread_visits = value + 1

if __name__ == "__main__":
    thread_count = 100

    threads = [Thread(target=visit_counter) for _ in range(thread_count)]
    for thread in threads:
        thread.start()

    for thread in threads:
        thread.join()

    print(f"{thread_count=}, {thread_visits=}")

Overwriting test_thread2.py


In [17]:
!python test_thread2.py

thread_count=100, thread_visits=9876748


The codes above introduces us to a race condition. This happends in multithreaded applications. The way to avoid this issue is to use thread locking primitives. Python has a few lock classes in the thread module.
E.g. below.

In [3]:

%%writefile test_thread3.py

from threading import Lock
from threading import Thread

thread_visits = 0
thread_visits_lock = Lock()

def visit_counter():
    global thread_visits
    for i in range(100_000):
        with thread_visits_lock:
            thread_visits += 1

if __name__ == "__main__":
    thread_count = 100

    threads = [Thread(target=visit_counter) for _ in range(thread_count)]
    for thread in threads:
        thread.start()

    for thread in threads:
        thread.join()

    print(f"{thread_count=}, {thread_visits=}")

Overwriting test_thread3.py


In [4]:
!python test_thread3.py

thread_count=100, thread_visits=10000000


Though we fixed the race condition issue, we encounted performance issue as `Lock()` made sure only one thread at a time can process a single block of code. This resulted to the protected block not running in parallel. Also, acquiring and releasing lock are operation and takes some efforts.


#### When should we use multithreading?

* Application responsiveness: Apps that can accepts new inputs and respond within a given time frame even if they did not finish processing previous input.
* Multiuser applications and network communication: Apps that are supposed to accept inputs of multiple users simultaneously often communicate with users over the network.
* Work delegation and background processing: Apps where much of heavy lifting is done by external applications or services and your code act as a gateway to thos resources.


In [45]:
# example of multithreading

import  requests

response = requests.get("https://api.vatcomply.com/rates?base=USD")


response.json().get('rates')

{'EUR': 0.8447372867038352,
 'USD': 1.0,
 'JPY': 109.90032100016894,
 'BGN': 1.6521371853353608,
 'CZK': 21.454637607704004,
 'DKK': 6.281635411387059,
 'GBP': 0.7227149856394661,
 'HUF': 296.40141915864166,
 'PLN': 3.8296164892718365,
 'RON': 4.178577462409191,
 'SEK': 8.604747423551276,
 'CHF': 0.9174691670890354,
 'ISK': 127.38638283493835,
 'NOK': 8.671059300557527,
 'HRK': 6.32361885453624,
 'RUB': 72.88055414766008,
 'TRY': 8.470349721236696,
 'AUD': 1.3545362392295996,
 'BRL': 5.293968575772935,
 'CAD': 1.2672748775130935,
 'CNY': 6.453961817874641,
 'HKD': 7.777665146139551,
 'IDR': 14240.969758405136,
 'ILS': 3.203750633552965,
 'INR': 73.55169792194627,
 'KRW': 1168.043588443994,
 'MXN': 19.881990201047476,
 'MYR': 4.1496029734752495,
 'NZD': 1.4036999493157627,
 'PHP': 49.9113025848961,
 'SGD': 1.3432167595877682,
 'THB': 32.68035141071127,
 'ZAR': 14.145801655685082}

In [63]:
%%writefile fetch_rate_sync.py
import time
import requests
from typing import Tuple

SYMBOLS: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')
BASES: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')

def fetch_rate(base: str):
    """Fetches value from vatcomply api
    Args:
        base (str): currency
    """
    try:
        response = requests.get(f"https://api.vatcomply.com/rates?base={base}")
        response.raise_for_status()
        rates = response.json().get("rates")
        
        rates[base] = 1.
        
        rates_line = ", ".join([f"{rates[symbol]:7.03} {symbol}" for symbol in SYMBOLS])
        print(f"1 {base} = {rates_line}")
    except requests.exceptions.RequestException as err:
        raise SystemExit(err)
    

def main():
    for base in BASES:
        fetch_rate(base)

if __name__ == "__main__":
    started = time.time()
    main()
    elapsed = time.time() - started
    
    print()
    print("time elapsed: {:.2f}s".format(elapsed))

Overwriting fetch_rate_sync.py


In [64]:
!python fetch_rate_sync.py

1 USD =     1.0 USD,   0.845 EUR,    3.83 PLN,    8.67 NOK,    21.5 CZK
1 EUR =    1.18 USD,     1.0 EUR,    4.53 PLN,    10.3 NOK,    25.4 CZK
1 PLN =   0.261 USD,   0.221 EUR,     1.0 PLN,    2.26 NOK,     5.6 CZK
1 NOK =   0.115 USD,  0.0974 EUR,   0.442 PLN,     1.0 NOK,    2.47 CZK
1 CZK =  0.0466 USD,  0.0394 EUR,   0.178 PLN,   0.404 NOK,     1.0 CZK

time elapsed: 3.12s


In [65]:
%%writefile fetch_rate_thread.py
import time
import requests
from typing import Tuple
from threading import Thread

SYMBOLS: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')
BASES: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')

def fetch_rate(base: str):
    """Fetches value from vatcomply api
    Args:
        base (str): currency
    """
    try:
        response = requests.get(f"https://api.vatcomply.com/rates?base={base}")
        response.raise_for_status()
        rates = response.json().get("rates")
        
        rates[base] = 1.
        
        rates_line = ", ".join([f"{rates[symbol]:7.03} {symbol}" for symbol in SYMBOLS])
        print(f"1 {base} = {rates_line}")
    except requests.exceptions.RequestException as err:
        raise SystemExit(err)
    

def main():
    threads: list = []
    for base in BASES:
        thread = Thread(target=fetch_rate, args=[base])
        thread.start()
        threads.append(thread)

    while threads:
        threads.pop().join()

if __name__ == "__main__":
    started = time.time()
    main()
    elapsed = time.time() - started
    
    print()
    print("time elapsed: {:.2f}s".format(elapsed))

Overwriting fetch_rate_thread.py


In [66]:
!python fetch_rate_thread.py

1 PLN =   0.261 USD,   0.221 EUR,     1.0 PLN,    2.26 NOK,     5.6 CZK
1 USD =     1.0 USD,   0.845 EUR,    3.83 PLN,    8.67 NOK,    21.5 CZK
1 CZK =  0.0466 USD,  0.0394 EUR,   0.178 PLN,   0.404 NOK,     1.0 CZK1 NOK =   0.115 USD,  0.0974 EUR,   0.442 PLN,     1.0 NOK,    2.47 CZK

1 EUR =    1.18 USD,     1.0 EUR,    4.53 PLN,    10.3 NOK,    25.4 CZK

time elapsed: 0.74s


#### Using a thread pool

To avoid unobounded number of threads, we define size of thread workers to handle all parallel work and communicate with main thread through some safe data structure.

In [67]:
%%writefile fetch_rate_thread_pool.py
import time
import requests
from typing import Tuple
from threading import Thread
from queue import Queue, Empty

THREAD_POOL_SIZE: int = 6

SYMBOLS: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')
BASES: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')

def fetch_rate(base: str):
    """Fetches value from vatcomply api
    Args:
        base (str): currency
    """
    try:
        response = requests.get(f"https://api.vatcomply.com/rates?base={base}")
        response.raise_for_status()
        rates = response.json().get("rates")
        
        rates[base] = 1.
        
        rates_line = ", ".join([f"{rates[symbol]:7.03} {symbol}" for symbol in SYMBOLS])
        print(f"1 {base} = {rates_line}")
    except requests.exceptions.RequestException as err:
        raise SystemExit(err)
    
def worker(work_queue: Queue):
    while not work_queue.empty():
        try:
            item = work_queue.get_nowait()
        except Empty:
            break
        else:
            fetch_rate(item)
            work_queue.task_done()


def main():
    work_queue: Queue = Queue()

    for base in BASES:
        work_queue.put(base)

    threads = [Thread(target=worker, args=(work_queue,)) for  _ in range(THREAD_POOL_SIZE)]
    for thread in threads:
        thread.start()
    
    work_queue.join()

    while threads:
        threads.pop().join()

if __name__ == "__main__":
    started = time.time()
    main()
    elapsed = time.time() - started
    
    print()
    print("time elapsed: {:.2f}s".format(elapsed))

Overwriting fetch_rate_thread_pool.py


In [68]:
!python fetch_rate_thread_pool.py

1 EUR =    1.18 USD,     1.0 EUR,    4.53 PLN,    10.3 NOK,    25.4 CZK1 PLN =   0.261 USD,   0.221 EUR,     1.0 PLN,    2.26 NOK,     5.6 CZK1 USD =     1.0 USD,   0.845 EUR,    3.83 PLN,    8.67 NOK,    21.5 CZK


1 NOK =   0.115 USD,  0.0974 EUR,   0.442 PLN,     1.0 NOK,    2.47 CZK
1 CZK =  0.0466 USD,  0.0394 EUR,   0.178 PLN,   0.404 NOK,     1.0 CZK

time elapsed: 1.62s


In [69]:
%%writefile fetch_rate_thread_pool_two_way.py
# two way queues.
import time
import requests
from typing import Tuple, Dict
from threading import Thread
from queue import Queue, Empty

THREAD_POOL_SIZE: int = 6

SYMBOLS: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')
BASES: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')

def fetch_rate(base: str) -> Tuple[str, Dict[str, float]]:
    """Fetches value from vatcomply api
    Args:
        base (str): currency
    """
    try:
        response = requests.get(f"https://api.vatcomply.com/rates?base={base}")
        response.raise_for_status()
        rates = response.json().get("rates")
        
        rates[base] = 1.
        return (base, rates)

    except requests.exceptions.RequestException as err:
        raise SystemExit(err)


def present_results(base: str, rates: Dict[str, float]):
    rates_line = ", ".join([f"{rates[symbol]:7.03} {symbol}" for symbol in SYMBOLS])
    print(f"1 {base} = {rates_line}")


def worker(work_queue: Queue, results_queue: Queue):
    while not work_queue.empty():
        try:
            item = work_queue.get_nowait()
        except Empty:
            break
        else:
            results_queue.put(fetch_rate(item))
            work_queue.task_done()


def main():
    work_queue: Queue = Queue()
    result_queue: Queue = Queue()

    for base in BASES:
        work_queue.put(base)

    threads = [Thread(target=worker, args=(work_queue, result_queue)) for  _ in range(THREAD_POOL_SIZE)]
    for thread in threads:
        thread.start()
    
    work_queue.join()

    while threads:
        threads.pop().join()

    while not result_queue.empty():
        present_results(*result_queue.get())

if __name__ == "__main__":
    started = time.time()
    main()
    elapsed = time.time() - started
    
    print()
    print("time elapsed: {:.2f}s".format(elapsed))

Overwriting fetch_rate_thread_pool_two_way.py


In [70]:
!python fetch_rate_thread_pool_two_way.py

1 EUR =    1.18 USD,     1.0 EUR,    4.53 PLN,    10.3 NOK,    25.4 CZK
1 CZK =  0.0466 USD,  0.0394 EUR,   0.178 PLN,   0.404 NOK,     1.0 CZK
1 PLN =   0.261 USD,   0.221 EUR,     1.0 PLN,    2.26 NOK,     5.6 CZK
1 USD =     1.0 USD,   0.845 EUR,    3.83 PLN,    8.67 NOK,    21.5 CZK
1 NOK =   0.115 USD,  0.0974 EUR,   0.442 PLN,     1.0 NOK,    2.47 CZK

time elapsed: 0.63s


### Dealing with errors in threads


In [71]:
%%writefile fetch_rate_thread_pool_two_way_err.py
# two way queues.
import time
import requests
from typing import Tuple, Dict
from threading import Thread
from queue import Queue, Empty
import random

THREAD_POOL_SIZE: int = 6

SYMBOLS: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')
BASES: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')

def fetch_rate(base: str) -> Tuple[str, Dict[str, float]]:
    """Fetches value from vatcomply api
    Args:
        base (str): currency
    """

    response = requests.get(f"https://api.vatcomply.com/rates?base={base}")
    rand_val = random.randint(0, 5)
    if rand_val < 1:
        # simulate error by overiding status code
        response.status_code = 500
    response.raise_for_status()
    
    rates = response.json().get("rates")
    
    rates[base] = 1.
    return (base, rates)


def present_results(base: str, rates: Dict[str, float]):
    rates_line = ", ".join([f"{rates[symbol]:7.03} {symbol}" for symbol in SYMBOLS])
    print(f"1 {base} = {rates_line}")


def worker(work_queue: Queue, results_queue: Queue):
    while not work_queue.empty():
        try:
            item = work_queue.get_nowait()
        except Empty:
            break

        try:
            result = fetch_rate(item)
        except Exception as err:
            results_queue.put(err)
        else:
            results_queue.put(result)
        finally:
            work_queue.task_done()


def main():
    work_queue: Queue = Queue()
    results_queue: Queue = Queue()

    for base in BASES:
        work_queue.put(base)

    threads = [Thread(target=worker, args=(work_queue, results_queue)) for  _ in range(THREAD_POOL_SIZE)]
    for thread in threads:
        thread.start()
    
    work_queue.join()

    while threads:
        threads.pop().join()

    while not results_queue.empty():
        result = results_queue.get()
        if isinstance(result, Exception):
            raise result
        present_results(*result)

if __name__ == "__main__":
    started = time.time()
    main()
    elapsed = time.time() - started
    
    print()
    print("time elapsed: {:.2f}s".format(elapsed))

Overwriting fetch_rate_thread_pool_two_way_err.py


In [72]:
!python fetch_rate_thread_pool_two_way_err.py

1 PLN =   0.261 USD,   0.221 EUR,     1.0 PLN,    2.26 NOK,     5.6 CZK
1 CZK =  0.0466 USD,  0.0394 EUR,   0.178 PLN,   0.404 NOK,     1.0 CZK
1 EUR =    1.18 USD,     1.0 EUR,    4.53 PLN,    10.3 NOK,    25.4 CZK
Traceback (most recent call last):
  File "/home/emilextrig/workspace/code4all/test_python/fetch_rate_thread_pool_two_way_err.py", line 79, in <module>
    main()
  File "/home/emilextrig/workspace/code4all/test_python/fetch_rate_thread_pool_two_way_err.py", line 74, in main
    raise result
  File "/home/emilextrig/workspace/code4all/test_python/fetch_rate_thread_pool_two_way_err.py", line 46, in worker
    result = fetch_rate(item)
  File "/home/emilextrig/workspace/code4all/test_python/fetch_rate_thread_pool_two_way_err.py", line 25, in fetch_rate
    response.raise_for_status()
  File "/home/emilextrig/miniconda3/envs/ml_3.9/lib/python3.9/site-packages/requests/models.py", line 943, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptio

### Throttling

The last of the issues mentioned in the Using one thread per item section that we
haven't tackled yet is potential rate limits that may be imposed by external service
providers.

The algorithm we will use is sometimes called a token bucket and is very simple. It
includes the following functionality:

* There is a bucket with a predefined number of tokens
* Each token corresponds to a single permission to process one item of work
* Each time the worker asks for one or more tokens (permissions), we do the
following:

        * We check how much time has passed since the last time we refilled the bucket
        * If the time difference allows for it, we refill the bucket with the number of tokens that correspond to the time difference
        * If the number of stored tokens is bigger than or equal to the amount requested, we decrease the number of stored tokens and return that value
        * If the number of stored tokens is less than requested, we return zero


In [78]:
%%writefile fetch_rate_thread_pool_two_way_throttle.py

from threading import Lock
import time
import time
import requests
from typing import Tuple, Dict
from threading import Thread
from queue import Queue, Empty
import random

class Throttle:
    def __init__(self, rate: int) -> None:
        self._consume_lock = Lock()
        self.rate = rate
        self.tokens = 0
        self.last = None

    def consume(self, amount: int = 1):
        with self._consume_lock:
            now = time.time()

            # time measurement is initialized on first and token request to avoid initial bursts
            if self.last is None:
                self.last = now

            elapsed = now - self.last

            # make sure that quant of passed time is big enough to add new tokens.
            if elapsed * self.rate > 1:
                self.tokens += elapsed * self.rate
                self.last = now

            # never over-fill the bucket
            self.tokens = min(self.rate, self.tokens)

            # finally dispatch tokens if available
            if self.tokens >= amount:
                self.tokens -= amount
                return amount 
            return 0


THREAD_POOL_SIZE: int = 6

SYMBOLS: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')
BASES: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')

def fetch_rate(base: str) -> Tuple[str, Dict[str, float]]:
    """Fetches value from vatcomply api
    Args:
        base (str): currency
    """

    response = requests.get(f"https://api.vatcomply.com/rates?base={base}")
    rand_val = random.randint(0, 5)
    # if rand_val < 1:
    #     # simulate error by overiding status code
    #     response.status_code = 500
    response.raise_for_status()
    
    rates = response.json().get("rates")
    
    rates[base] = 1.
    return (base, rates)


def present_results(base: str, rates: Dict[str, float]):
    rates_line = ", ".join([f"{rates[symbol]:7.03} {symbol}" for symbol in SYMBOLS])
    print(f"1 {base} = {rates_line}")


def worker(work_queue: Queue, results_queue: Queue, throttle: Throttle):
    while True:
        try:
            item = work_queue.get_nowait()
        except Empty:
            break

        while not throttle.consume():
            time.sleep(0.1)

        try:
            result = fetch_rate(item)
        except Exception as err:
            results_queue.put(err)
        else:
            results_queue.put(result)
        finally:
            work_queue.task_done()


def main():
    work_queue: Queue = Queue()
    results_queue: Queue = Queue()
    throttle: Throttle = Throttle(10)

    for base in BASES:
        work_queue.put(base)

    threads = [Thread(target=worker, args=(work_queue, results_queue, throttle)) for  _ in range(THREAD_POOL_SIZE)]
    for thread in threads:
        thread.start()
    
    work_queue.join()

    while threads:
        threads.pop().join()

    while not results_queue.empty():
        result = results_queue.get()
        if isinstance(result, Exception):
            raise result
        present_results(*result)

if __name__ == "__main__":
    started = time.time()
    main()
    elapsed = time.time() - started
    
    print()
    print("time elapsed: {:.2f}s".format(elapsed))

Overwriting fetch_rate_thread_pool_two_way_throttle.py


In [79]:
!python fetch_rate_thread_pool_two_way_throttle.py

1 USD =     1.0 USD,   0.845 EUR,    3.84 PLN,    8.62 NOK,    21.4 CZK
1 EUR =    1.18 USD,     1.0 EUR,    4.54 PLN,    10.2 NOK,    25.3 CZK
1 PLN =   0.261 USD,    0.22 EUR,     1.0 PLN,    2.25 NOK,    5.57 CZK
1 CZK =  0.0468 USD,  0.0395 EUR,    0.18 PLN,   0.403 NOK,     1.0 CZK
1 NOK =   0.116 USD,   0.098 EUR,   0.445 PLN,     1.0 NOK,    2.48 CZK

time elapsed: 1.13s


### Multiprocessing

Multithreading is challenging and another approach that allows us to achieve parallelism is multiprocessing. i.e seperate python processes that do not contstrain each other with GIL allow for better resource utilizaiton. This is especially important for CPU-intensive task in applications running multicore processes.
Other advantage of using multiple processes over threads is the fact that they do not share a memory context; this introduction of data corruption and deadlocks/race conditions harder in aplications. This process of not sharing memory context results to difficulty in passing data between seperate processes, however there are many good ways to implment reliable interprocess communication.


In [83]:
%%writefile fork_prog.py
import os 

pid_list = []

def main():
    pid_list.append(os.getpid())
    child_pid = os.fork()
    
    if child_pid == 0:
        pid_list.append(os.getpid())
        print()
        print("PRNT: hey I am the child process")
        print(f"PRNT: all the pid I know {pid_list}")
    else:
        pid_list.append(os.getpid())
        print()
        print("PRNT: hey I am the parent process")
        print(f"PRNT: the child pid is {child_pid}")
        print(f"PRNT: all the pid I know {pid_list}")


if __name__=="__main__":
    main()

Overwriting fork_prog.py


In [85]:
!python fork_prog.py


PRNT: hey I am the parent process
PRNT: the child pid is 1485708
PRNT: all the pid I know [1485706, 1485706]

PRNT: hey I am the child process
PRNT: all the pid I know [1485706, 1485708]


In [93]:
%%writefile os_mutliprocessing.py
from multiprocessing import Process
import os

def work(identifier: int):
    print(f"Hey, I am the process, {identifier}, pid {os.getpid()} \n")

def main():
    processes = [Process(target=work, args=(number,)) for number in range(10)]
    for process in processes:
        process.start()

    while processes:
        processes.pop().join()

if __name__ == "__main__":
    main()

Overwriting os_mutliprocessing.py


In [94]:
!python os_mutliprocessing.py

Hey, I am the process, 0, pid 1489083 

Hey, I am the process, 1, pid 1489084 

Hey, I am the process, 2, pid 1489085 

Hey, I am the process, 4, pid 1489087 

Hey, I am the process, 9, pid 1489092 

Hey, I am the process, 8, pid 1489091 

Hey, I am the process, 3, pid 1489086 

Hey, I am the process, 7, pid 1489090 

Hey, I am the process, 5, pid 1489088 

Hey, I am the process, 6, pid 1489089 



When processes are created, the memoory is forked on POSIX and POSIX-like systems. Beside the memory state that is copied, the `Process` class also provides an extra args in it constructor so that data can be passed along.

Communication between processes requires some additional work because thier local memory is not shared by default. To make this work, the multiprocessing module provides the following few ways of communcating between processes:

* Using the `multiprocessing.Queue` class, which is function equivalent of queue.Queue as used in threads. 
* Using `multiprocessing.Pipe`, which is a socket-like two-way communcation channel.
* Using `multiprocessing.sharedctypes` module, which allows you to create arbitrary C types in a dedicated pool of memory that is shared between processes.


In [108]:
%%writefile os_mutliprocessing_queue.py
from multiprocessing import Process
import os
import time
import requests
from typing import Tuple, Dict

from queue import Queue, Empty
import random

PROCESS_POOL_SIZE: int = 10

SYMBOLS: Tuple[str] = ('USD', 'EUR')
BASES: Tuple[str] = ('USD', 'EUR')

def fetch_rate(base: str) -> Tuple[str, Dict[str, float]]:
    """Fetches value from vatcomply api
    Args:
        base (str): currency
    """

    response = requests.get(f"https://api.vatcomply.com/rates?base={base}")
    
    response.raise_for_status()
    
    rates = response.json().get("rates")
    
    rates[base] = 1.
    return (base, rates)


def present_results(base: str, rates: Dict[str, float]):
    rates_line = ", ".join([f"{rates[symbol]:7.03} {symbol}" for symbol in SYMBOLS])
    print(f"1 {base} = {rates_line}")


def worker(work_queue: Queue, results_queue: Queue):
    while not work_queue.empty():
        try:
            item = work_queue.get_nowait()
        except Empty:
            break

        try:
            result = fetch_rate(item)
        except Exception as err:
            results_queue.put(err)
        else:
            results_queue.put(result)
        finally:
            work_queue.task_done()


def main():
    work_queue: Queue = Queue()
    results_queue: Queue = Queue()

    for base in BASES:
        work_queue.put(base)

    processes = [Process(target=worker, args=(work_queue, results_queue)) for  _ in range(10)]
    for process in processes:
        process.start()
    
    work_queue.join()

    while processes:
        processes.pop().join()

    while not results_queue.empty():
        result = results_queue.get()
        if isinstance(result, Exception):
            raise result
        present_results(*result)


if __name__ == "__main__":
    started = time.time()
    main()
    elapsed = time.time() - started
    
    print()
    print("time elapsed: {:.2f}s".format(elapsed))

Overwriting os_mutliprocessing_queue.py


In [2]:
# !python os_mutliprocessing_queue.py # code not working currently. In a kind of loop 


In [3]:
%%writefile multiprocess_pipe.py
def worker(connection):
    while True:
        instance = connection.recv()
        if instance:
            print(f"CHILD: recv: {instance}")
        if instance is None:
            break

from multiprocessing import Process, Pipe

class CustomClass:
    pass

def main():
    parent_conn, child_conn = Pipe()
    child = Process(target=worker, args=(child_conn,))

    for item in (42, "some string",{"one":1}, CustomClass, None):
        parent_conn.send(item)

    child.start()
    child.join()
    

if __name__ == "__main__":
    main()

Writing multiprocess_pipe.py


In [4]:
!python multiprocess_pipe.py

CHILD: recv: 42
CHILD: recv: some string
CHILD: recv: {'one': 1}
CHILD: recv: <class '__main__.CustomClass'>


Other way to share a state between processes is to use raw types in a shared memory pool with classes provided in `multiprocess.sharedctypes`. The most basic ones are Values and Array.

In [5]:
from multiprocessing import Process, Value, Array

def f(n, a):
    n.values = 3.1515927
    for i in range(len(a)):
        a[i] = -a[i]


num = Value('d', 0.0)
arr = Array('i', range(10))

p = Process(target=f, args=(num, arr))
p.start()
p.join()

print(num.value)
print(arr[:])

0.0
[0, -1, -2, -3, -4, -5, -6, -7, -8, -9]


### Using process pools

Using multiple processes instead of threads adds some overhead. Mostly, it increases the memory footprint because each process has its own and independent memory context. This means allowing unbound numbers of child processes may be more of an issue than allowing an unbounded number of threads in multithreaded applications.
The best pattern to control resource usage in applications that rely on multiprocessing is to build a process pool [like a thread pool].


In [19]:
%%writefile fetch_rate_processpools.py

from time import time
from multiprocessing import Pool

import requests
from typing import List, Tuple, Dict

SYMBOLS : Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')
BASES: Tuple[str] = ('USD', 'EUR', 'PLN', 'NOK', 'CZK')

POOL_SIZE : int = 4

def fetch_rate(base: str) -> Tuple[str, Dict[str, float]]:
    response = requests.get(f"https://api.vatcomply.com/rates?base={base}")
    rates = response.json()["rates"]
    
    rates[base] = 1
    return base, rates

def present_results(base: str, rates: Dict[str, float]) -> None:
    rates_line = ", ".join([f"{rates[symbol]:7.03} {symbol}" for symbol in SYMBOLS])
    print(f"1 {base} = {rates_line}")

def main():
    with Pool(POOL_SIZE) as pool:
        results = pool.map(fetch_rate, BASES)
    
    for result in results:
        print(*result)

if __name__ == "__main__":
    started: time = time()
    main()
    elapsed: time = time() - started

    print()
    print("time elapsed: {:.2f}s".format(elapsed))

Writing fetch_rate_processpools.py


In [21]:
!python fetch_rate_processpools.py

USD {'EUR': 0.8525876033762468, 'USD': 1, 'JPY': 109.55750703384773, 'BGN': 1.6674908346832635, 'CZK': 21.638673373689144, 'DKK': 6.339926677466109, 'GBP': 0.7332253389035723, 'HUF': 302.90732372751296, 'PLN': 3.949612072640464, 'RON': 4.22073493051411, 'SEK': 8.675760934436013, 'CHF': 0.9224997868530992, 'ISK': 129.67857447352714, 'NOK': 8.633984141870576, 'HRK': 6.388865205899906, 'RUB': 72.80049450080995, 'TRY': 8.655725125756671, 'AUD': 1.379316224742092, 'BRL': 5.255008952169835, 'CAD': 1.2786256287833575, 'CNY': 6.466876971608833, 'HKD': 7.786938357916275, 'IDR': 14233.753943217665, 'ILS': 3.205047318611987, 'INR': 73.85284337965726, 'KRW': 1183.2807570977916, 'MXN': 20.081251598601757, 'MYR': 4.1925142808423566, 'NZD': 1.4234802625969818, 'PHP': 50.29840566118168, 'SGD': 1.3517776451530394, 'THB': 33.440190979623154, 'ZAR': 14.781140762213317}
EUR {'EUR': 1, 'USD': 1.1729, 'JPY': 128.5, 'BGN': 1.9558, 'CZK': 25.38, 'DKK': 7.4361, 'GBP': 0.86, 'HUF': 355.28, 'PLN': 4.6325, 'RON':