## decorators

In [4]:
class C:
    def meth(self): 
        return 42

    def func(): 
        return 42
    
    func = staticmethod(func)

In [6]:
C.func()

42

In [7]:
class C2:
    def meth(self): return 42

    @staticmethod
    def func(): return 42

In [8]:
c2 = C2()
c2.func()

42

## closures

In [9]:
def outer(arg1):
    def inner(arg2):
        return arg1 + arg2
    return inner

In [10]:
i10 = outer(10)
i10(4)

14

In [11]:
i10

<function __main__.outer.<locals>.inner(arg2)>

In [15]:
i10.__closure__[0].cell_contents

10

## simple decorator

In [16]:
# broken decorator, as this function replaces the old function. and returns none.
def hello(func): print('hello') 

# decorator is run at "init time", ie when function definition is loaded into program.
@hello
def add(a, b): return a + b

hello


In [18]:
add(1, 2)

TypeError: 'NoneType' object is not callable

In [12]:
def hello(func):
    def wrapper(*args, **kwargs):       # packing of positional args into a tuple `args`, and packing of named args into dict `kwargs`
        print('hello')
        return func(*args, **kwargs)    # unpacking of tuple `args` and dictionary `kwargs`
    return wrapper

# note this doesn't print hello!
@hello
def add(a, b): return a + b

In [11]:
add(1, 2)

hello


3

---

### note: *args and **kwargs

In [24]:
# *args stores positional arguments in a tuple (to keep ordering)
def func(*args): return args

print(func())
print(func(1))
print(func(1, "string"))

()
(1,)
(1, 'string')


In [28]:
# **kwargs catches all keyword arguments
def func(**kwargs): return kwargs
print(func(a=1))
print(func(a=1, b="string"))

{'a': 1}
{'a': 1, 'b': 'string'}


In [29]:
L = [1, 2, 3]
print(L)
print(L[0], L[1], L[2])
print(*L)   # unpacking

[1, 2, 3]
1 2 3
1 2 3


---

In [13]:
def add(a, b):
    """adds two objects"""
    return a + b

print(add.__name__)
print(add.__doc__)

add
adds two objects


In [19]:
def hello(func):
    def wrapper(*args, **kwargs):       # packing of positional args into a tuple `args`, and packing of named args into dict `kwargs`
        print('hello')
        return func(*args, **kwargs)    # unpacking of tuple `args` and dictionary `kwargs`
    return wrapper


# you're fucked. if you use this decorator 10 times, you have 10 functions
# in your namespace called "wrapper". the solution is to use @wraps from
# functools!
@hello
def add(a, b):
    """adds two objects"""
    return a + b

print(add.__name__)
print(add.__doc__)

wrapper
docstring of wrapper


In [23]:
from functools import wraps

def hello(func):
    @wraps(func)
    def wrapper(*args, **kwargs):       # packing of positional args into a tuple `args`, and packing of named args into dict `kwargs`
        print('hello')
        return func(*args, **kwargs)    # unpacking of tuple `args` and dictionary `kwargs`
    return wrapper

@hello
def add(a, b):
    """adds two objects"""
    return a + b

print(add.__name__)
print(add.__doc__)

add
adds two objects


In [17]:
def myfunction(a):
    """my docstring"""
    return 42

myfunction.__name__
myfunction.__doc__

'my docstring'

In [40]:
from functools import wraps 

def hello(func):

    @wraps(func) # this ensures the add function defined below still has its name and docstring. comment this line out to see the difference in what's printed
    def wrapper(*args, **kwargs):
        # -------------------------------------
        # this part is added functionality
        print('hello')
        # -------------------------------------

        # -------------------------------------
        # this part executes the original function
        return func(*args, **kwargs)
        # -------------------------------------
    return wrapper

@hello
def add(a, b):
    """adds two objects"""
    return a + b

print(add.__name__)
print(add.__doc__)

add
adds two objects


## caching

In [44]:
"""Caching results with a decorator.
"""

import functools
import pickle


def cached(func):
    """Decorator that caches."""
    cache = {}

    @functools.wraps(func)
    def _cached(*args, **kwargs):
        """Takes the arguments.
        """
        # dicts cannot be use as dict keys
        # dumps are strings and can be used
        key = pickle.dumps((args, kwargs))
        if key not in cache:
            cache[key] = func(*args, **kwargs)
        return cache[key]
    return _cached


In [46]:
@cached
def add(a, b):
    print('adding...')
    return a+b 

In [47]:
add(3, 4) # calculates 3+4

adding...


7

In [48]:
add(3, 4) # uses stored result

7

In [49]:
add.__closure__

(<cell at 0x7f5ba1e3bd60: dict object at 0x7f5ba1e29100>,
 <cell at 0x7f5ba1e583a0: function object at 0x7f5ba2161120>)

In [52]:
# results are stored in the closure.
# here are the results stored
# each function that gets decorated with @cached will have its own closure
add.__closure__[0].cell_contents 

{b'\x80\x04\x95\x0b\x00\x00\x00\x00\x00\x00\x00K\x03K\x04\x86\x94}\x94\x86\x94.': 7}

In [53]:
@cached
def substract(a, b):
    print("substracting...")
    return a-b

In [54]:
substract(2, 1)

substracting...


1

In [55]:
substract(2, 1)

1

In [56]:
substract.__closure__[0].cell_contents

{b'\x80\x04\x95\x0b\x00\x00\x00\x00\x00\x00\x00K\x02K\x01\x86\x94}\x94\x86\x94.': 1}

## logging

In [42]:
"""Helper to switch on and off logging of decorated functions."""
# avoids to have lots of "if logging..."-statements in your code.

import functools

LOGGING = False


def logged(func):
    """Decorator for logging.
    """

    @functools.wraps(func)
    def _logged(*args, **kwargs):   # _logging is a *convention*. because the @wraps replaces the function name anyway. 
        """Takes the arguments
        """
        if LOGGING:
            print('logged') # do proper logging here
        return func(*args, **kwargs)
    return _logged


In [43]:
@logged
def add(a, b): return a + b

add(1, 2)

3

In [61]:
LOGGING = True 
add(1, 2)

logged


3

In [63]:
add is add.__wrapped__

False

In [64]:
add == add.__wrapped__

False

How to decorate all functions?

* Put all your functions into a class.
* Then give your class a decorator that decorators all the methods.

# advanced

To add functionality, like passing in an argument to a decorator, you just add another layer on top.

In other words, you wrap your wrapper with another ... wrapper function `say`.

In [68]:
from functools import wraps

def say(text):
    def _say(func):
        @wraps(func)
        def __say(*args, **kwargs):       
            print(text)
            return func(*args, **kwargs) 
        return __say
    return _say

In [69]:
@say('hello')
def add(a, b): return a+b

add(1, 2)

hello


3

In [70]:
# equivalent to
add = say('hello')(add)
add(1, 2)

hello
hello


3

In [73]:
# as many decorators as you like. but make sure you write your decorators
# such that ordering of possibly many decorators does not matter.
@say('hello')
@say('goodbye')
def add(a, b): return a+b

add(1, 2)

hello
goodbye


3

## callable

* 

In [78]:
sum([1, 2])

3

In [79]:
int(1.0)

1

In [75]:
type(sum)

builtin_function_or_method

In [76]:
type(int)

type

In [85]:
callable(sum)

True

In [86]:
callable(int)

True

In [91]:
# you can make a class, like int, callable!

class CallCounter:
    def __init__(self): self.count = 0
    def __call__(self): self.count += 1

In [96]:
c = CallCounter()
c.count

0

In [97]:
c() # the class is now callable. such as the class "int" is callable, ie you can call int(5.0)
c.count

1

A class that is a decorator. A different way to make a decorator. It is less "deep". It removes the "outest" layer from our previously written decorator-function `say(text): ...`

The advantage is that you could gather multiple decorators in a single class. Or have the decorator in the `__call__` call other decorators defined in the same class. Can make the code more organized.

In [35]:
int(4.0)

4

In [36]:
print(type(int))

<class 'type'>


In [37]:
from functools import wraps

class Say:   # this will be a decorator. you could write it in lowerclass, despite it being a class.
    def __init__(self, text): self.text = text
    def __call__(self, func):
        @wraps(func)
        def _say(*args, **kwargs):       
            print(self.text)
            return func(*args, **kwargs) 
        return _say

In [38]:
@Say('hello')
def add(a, b): return a + b
add(4, 5)

hello


9

In [104]:
def add(a, b): return a + b 
add = Say('text')(add)
add(4, 5)

text


9

Note: You can take the role as a compiler and e.g. make checks before a function is run. This is because decorators are executed at **Import Time**.

## Use Cases

### Use Case: Argument Tracking

In [112]:
"""Check function arguments for given type."""

import functools


def check(*argtypes):
    """Function argument type checker."""

    def _check(func):
        """Takes the function."""

        @functools.wraps(func)
        def __check(*args):
            """Takes the arguments"""
            # argtypes are the argument types I *want* to have
            # args are are the argument type I *have*
            if len(args) != len(argtypes):
                msg = f'Expected {len(argtypes)} but got {len(args)} arguments'
                raise TypeError(msg)
            for arg, argtype in zip(args, argtypes):
                if not isinstance(arg, argtype):
                    msg = f'Expected {argtypes} but got '
                    msg += f'{tuple(type(arg) for arg in args)}'
                    raise TypeError(msg)
            return func(*args)
        return __check
    return _check

In [115]:
@check(float, float)
def add(a, b): return a + b

In [117]:
add(1, 2)

TypeError: Expected (<class 'float'>, <class 'float'>) but got (<class 'int'>, <class 'int'>)

In [116]:
add(1.0, 2.0)

3.0

### Use Case: registering

In [25]:
"""A function registry.
"""

import functools

registry = {}


def register_at_call(name):
    """Register the decorated function at call time.
    """

    def _register(func):
        """Takes the function.
        """

        @functools.wraps(func)
        def __register(*args, **kwargs):
            """Takes the arguments.
            """
            registry.setdefault(name, []).append(func) # pretty great one-liner. instead of if key not exists then dic[key] = [] else dic[key].append(item)
            return func(*args, **kwargs)
        return __register
    return _register


def register_at_def(name):
    """Register the decorated function at definition time.
    """

    def _register(func):
        """Takes the function.
        """
        registry.setdefault(name, []).append(func)

        return func
    return _register


In [27]:
registry

{}

In [28]:
@register_at_call('at_call')
def add(a, b): return a + b

In [29]:
registry

{}

In [30]:
add(1, 2)
registry

{'at_call': [<function __main__.add(a, b)>]}

In [31]:
add(1, 2)
registry

{'at_call': [<function __main__.add(a, b)>, <function __main__.add(a, b)>]}

In [32]:
add(5, 6)
registry

{'at_call': [<function __main__.add(a, b)>,
  <function __main__.add(a, b)>,
  <function __main__.add(a, b)>]}

function is added to registry every time the function is *called*

In [34]:
@register_at_def('at_def')
def add(a, b): return a + b

In [158]:
registry

{'at_call': [<function __main__.add(a, b)>, <function __main__.add(a, b)>],
 'at_def': [<function __main__.add(a, b)>]}

## class decorators

In [165]:
# the boilerplate you want. now you can do anything with the class
def mark(cls):
    return cls

In [166]:
def mark(cls):
    cls.new_attr = 100
    return cls

In [167]:
@mark
class A:
    pass

In [168]:
A.new_attr

100

In [169]:
# equivalent to this syntax without the "@"
class A:
    pass
A = mark(A)

A.new_attr

100

### use case: inspecting class methods

use case idea: inspecting the class and doing something with the class

In [40]:
"""Class decorator to check method name length."""


def check_name_length(max_len=30):
    """Check method name length.

    Raises a `NameError` if one method name of a decoratoed class is
    longer than `max_len`.
    """
    def _check_name_length(cls):
        for name, obj in cls.__dict__.items():
            if callable(obj) and len(name) > max_len:
                msg = (f'name `{name}` too long,\n  ' + len('NameError') * ' ' +
                       f'found {len(name)} characters, only {max_len} are allowed')
                raise NameError(msg)
        return cls
    return _check_name_length

In [41]:
@check_name_length(max_len=10)
class B:
    def meth(self): pass
    def meth_with_too_long_name_blablabla_whatthefuck(self): pass
B

NameError: name `meth_with_too_long_name_blablabla_whatthefuck` too long,
           found 45 characters, only 10 are allowed

to decorate all methods with a decorator, maybe use `.setattribute()`. maybe loop over all `callable(obj)` in `cls.__dict__.items()`...

## decorator to check output schema

In [132]:
import logging

logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s] [%(name)s] [%(levelname)s] [%(lineno)04d] %(message)s",
    datefmt="%d.%m.%Y %H:%M:%S",
)

logger = logging.getLogger(name='hello')
type(logger)

logging.Logger

In [133]:
logger.info("hello world")

[05.07.2023 17:40:21] [hello] [INFO] [0001] hello world


In [140]:
import polars as pl
from typing import List, Dict, Any, Union, Optional, Set
from functools import wraps

ColName = str
DataType = Any


def check_output(schema: Dict[ColName, DataType], coerce_dtypes: bool, coerce_column_order: bool, logger: Optional[logging.Logger] = None):
    def _check_output(wrapped):
        @wraps(wrapped)
        def __check_output(*args, **kwargs):
            func_name = wrapped.__name__ # name of decorated function

            df = wrapped(*args, **kwargs)
            df_schema = dict(zip(df.columns, df.dtypes))

            # check if the the columns match
            columns_in_df = df.columns
            columns_in_schema = schema.keys()

            if set(columns_in_schema) == set(columns_in_df):
                if logger is not None:
                    logger.info(f"Checking output of function {func_name}() | Schema and DataFrame have matching columns.")
                if coerce_column_order:
                    df = df.select(schema.keys())
            else:
                columns_in_df_but_not_in_schema = list(set(columns_in_df) - set(columns_in_schema))
                columns_in_schema_but_not_in_df = list(set(columns_in_schema) - set(columns_in_df))

                if logger is not None:
                    logger.info(f"Checking output of function {func_name}() | Found mismatching columns | Columns in df but not in schema: {columns_in_df_but_not_in_schema}")
                    logger.info(f"Checking output of function {func_name}() | Found mismatching columns | Columns in schema but not in df: {columns_in_schema_but_not_in_df}")
            
            # check if the datatypes match
            for column_name, expected_dtype in schema.items():

                if column_name not in df.columns:
                    continue

                actual_dtype = df_schema[column_name]

                if expected_dtype != actual_dtype:
                    
                    if logger is not None:
                        logger.info(f"Checking output of function {func_name}() | Column {column_name} | is of type {actual_dtype} but should be {expected_dtype}")

                    if coerce_dtypes: 
                        try:
                            # try to coerce column into the expected dtype
                            df = df.with_column(pl.col(column_name).cast(expected_dtype))
                            if logger is not None: 
                                logger.info(f"Checking output of function {func_name}() | Column {column_name} | successfully casted from {actual_dtype} to {expected_dtype}")
                        except:
                            if logger is not None:
                                logger.info(f"Checking output of function {func_name}() | Column {column_name} | failed to cast from {actual_dtype} to {expected_dtype}")

            return df

        return __check_output
    return _check_output

In [141]:
df = pl.DataFrame({'A': [1, 2]})
df

A
i64
1
2


In [143]:
@check_output(schema={'B': pl.Int8, 'A': pl.Int8}, coerce_dtypes=True, coerce_column_order=True, logger=logger)
def transform(df: pl.DataFrame) -> pl.DataFrame:
    return df.with_column((pl.col('A')*2).alias('B'))

transform(df)

[05.07.2023 17:43:16] [hello] [INFO] [0024] Checking output of function transform() | Schema and DataFrame have matching columns.
[05.07.2023 17:43:16] [hello] [INFO] [0046] Checking output of function transform() | Column B | is of type <class 'polars.datatypes.Int64'> but should be <class 'polars.datatypes.Int8'>
[05.07.2023 17:43:16] [hello] [INFO] [0053] Checking output of function transform() | Column B | successfully casted from <class 'polars.datatypes.Int64'> to <class 'polars.datatypes.Int8'>
[05.07.2023 17:43:16] [hello] [INFO] [0046] Checking output of function transform() | Column A | is of type <class 'polars.datatypes.Int64'> but should be <class 'polars.datatypes.Int8'>
[05.07.2023 17:43:16] [hello] [INFO] [0053] Checking output of function transform() | Column A | successfully casted from <class 'polars.datatypes.Int64'> to <class 'polars.datatypes.Int8'>


B,A
i8,i8
2,1
4,2
