In [None]:
#| hide
#| default_exp utils

# Utils

## Disk Cache

In [None]:
#| export
# dill is an improved version of pickle, using it to support namedtuples
import dill
from pathlib import Path
import inspect
import hashlib
from pyprojroot import here

In [None]:
#| hide
def a():
    return 'a'
"\n".join(inspect.getsource(a).split("\n")[1:]).encode()

b"    return 'a'\n"

this doesn't work completely because the decorator is also included in the source..

In [None]:
a.__code__.co_code

b'd\x01S\x00'

In [None]:
#| hide
def a():
    return 'a'
a.__code__.co_code

b'd\x01S\x00'

In [None]:
#| hide
def a():
    1 + 1
    return 'a'
a.__code__.co_code

b'\t\x00d\x01S\x00'

In [None]:
hashlib.md5(a.__code__.co_code).hexdigest()

'9cf8bd1d73fd245ea1325fbc4055d3d4'

In [None]:
#| export
cache_dir = here(".cache")

In [None]:
#| export
# inspired from https://gist.github.com/shantanuo/c6a376309d6bac6bd55bf77e3961b5fb
def cache_disk(base_file, rm_cache=False, verbose=False):
    "Decorator to cache function output to disk"
    base_file = Path(base_file)
    def decorator(original_func):
        
        # remove the first line when computing the has so that parameters to `cache_disk` are not included
        f_hash = hashlib.md5("\n".join(inspect.getsource(original_func).split("\n")[1:]).encode()).hexdigest()
        filename = base_file.parent / (base_file.stem + f_hash + ".pickle")
        
        if verbose: print(filename)
        
        if rm_cache: filename.unlink()
        
        try:
            cache = dill.load(open(filename, 'rb'))
        except (IOError, ValueError, FileNotFoundError):
            cache = {}

        def save_data():
            dill.dump(cache, open(filename, "wb"))  

        def new_func(*args):
            if tuple(args) not in cache:
                cache[tuple(args)] = original_func(*args)
                save_data()
            return cache[args]

        return new_func

    return decorator

In [None]:
import time
from tempfile import tempdir

In [None]:
cp = Path(tempdir) / "test_cache"

In [None]:
@cache_disk(cp)
def slow_add(a,b):
    time.sleep(1)
    return a + b 

this time is the first time so not from the cache

In [None]:
%time slow_add(1,2)

CPU times: user 3.34 ms, sys: 1.9 ms, total: 5.23 ms
Wall time: 1 s


3

now is much faster beacuse of the cache

In [None]:
%time slow_add(1,2)

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 4.05 µs


3

adding comments change the hash, so the function is still cached

In [None]:
@cache_disk(cp)
def slow_add(a,b):
    time.sleep(1)
    # this is a comment
    return a + b 

In [None]:
%time slow_add(1,2)

CPU times: user 1.2 ms, sys: 1.03 ms, total: 2.22 ms
Wall time: 1 s


3

In [None]:
%time slow_add(1,2)

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 5.25 µs


3

## Random

In [None]:
#| export
import torch
import numpy as np
import random

In [None]:
#| export
def reset_seed(seed=27):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

## Testing

In [None]:
#| export
from typing import Generator, Iterable
from functools import partial
from fastcore.test import test
from fastcore.basics import patch

In [None]:
#| exporti
def is_close(a,b,eps=1e-5):
    "Is `a` within `eps` of `b`"
    if hasattr(a, '__array__') or hasattr(b,'__array__'):
        a = torch.as_tensor(a)
        b = torch.as_tensor(b)
        return (abs(a-b)<eps).all()
    if isinstance(a, (Iterable,Generator)) or isinstance(b, (Iterable,Generator)):
        return all(is_close(a_, b_, eps) for a_,b_ in zip(a,b))
    return abs(a-b)<eps

In [None]:
#| export
def test_close(a,b,eps=1e-5):
    "`test` that `a` is within `eps` of `b`"
    test(a,b,partial(is_close,eps=eps),'close')

## Standard Scaler

make a standard scaler that can also inverse transfor standard deviations. see `Standardizer` for details of implementation

In [None]:
#| export
from collections import namedtuple
from fastcore.basics import patch
from sklearn.preprocessing import StandardScaler

In [None]:
reset_seed()
xx = np.random.random((4, 10))

In [None]:
s = StandardScaler().fit(xx)

In [None]:
s.transform(xx)

array([[ 0.07263978,  0.63279488, -0.9975139 ,  0.50899177,  0.15537652,
         1.45555506,  1.56629646, -1.60237369,  1.51674974,  1.29584745],
       [ 1.58579521,  0.83086419, -0.68281902,  0.51578245, -0.62395756,
        -1.19720248, -0.43000476,  1.1539719 , -0.74724819, -0.85525414],
       [-1.05809926, -1.69049694,  0.0895118 , -1.72684476, -1.08418417,
         0.32617669, -1.16657374,  0.2345773 ,  0.26525847,  0.64349108],
       [-0.60033573,  0.22683787,  1.59082112,  0.70207053,  1.55276521,
        -0.58452927,  0.03028204,  0.21382449, -1.03476002, -1.08408439]])

In [None]:
s.mean_

array([0.40358703, 0.6758362 , 0.77934606, 0.70748673, 0.34417949,
       0.62067044, 0.48500116, 0.54921643, 0.34604713, 0.3660338 ])

In [None]:
s.scale_

array([0.30471427, 0.21926148, 0.04405831, 0.31536161, 0.25229864,
       0.24649441, 0.26061043, 0.21187396, 0.26093989, 0.22927816])

In [None]:
#| export
@patch
def inverse_transform_std(self: StandardScaler, 
                         x_std # standard deviations
                        ):
    return x_std * self.scale_

## Info visualization

In [None]:
#| export
from torch import Tensor
from typing import Collection
import pandas as pd

from IPython.display import HTML
from IPython.display import display
from typing import Iterable

from fastcore.basics import *

In [None]:
#| export
def array2df(x: Tensor, # 2d tensor
             row_names: Collection[str]|None=None, # names for the row
             col_names: Collection[str]|None=None, # names for the columns
             row_var: str = '' # name of the first column (the one with row names). This should describe the values of `row_name`
            ):
    df = pd.DataFrame(x.detach().cpu().numpy(), columns=col_names)
    if row_names is not None: df.insert(0, row_var, row_names)
    return df

In [None]:
import numpy as np

In [None]:
a = np.random.rand(2,3,3)

In [None]:
display(HTML(f"<pre> {repr(a)} </pre>"))

In [None]:
a

array([[[0.96646567, 0.58332229, 0.09242191],
        [0.0136295 , 0.83693011, 0.9147879 ],
        [0.70458626, 0.3870066 , 0.7056939 ]],

       [[0.92331116, 0.28815289, 0.68401985],
        [0.5202925 , 0.87736578, 0.92388931],
        [0.48923016, 0.59621396, 0.26427542]]])

In [None]:
#| export
import inspect

In [None]:
#| export
# inspired from https://stackoverflow.com/questions/18425225/ 
def maybe_retrieve_callers_name(args):
    """Tries to retrieve the argument name in the call frame, if there are multiple matches name is ''"""
    names = []
    for i, arg in enumerate(args):
        callers_local_vars = inspect.currentframe().f_back.f_back.f_locals.items()
        var_names = [var_name for var_name, var_val in callers_local_vars if var_val is arg and not var_name.startswith("_")]
        names.append(var_names[0] if len(var_names)==1 else f'#{i}')
    return names

def retrieve_names(*args):
    """Tries to retrieve the argument name in the call frame, if there are multiple matches name is ''"""
    names = []
    for arg in args:
        callers_local_vars = inspect.currentframe().f_back.f_locals.items()
        var_names = [var_name for var_name, var_val in callers_local_vars if var_val is arg]
        names.append(var_names)
    return names

In [None]:
x, y, z = 1, 2, 3

def func(*args):
    return maybe_retrieve_callers_name(args)

print(func(x,y))

['x', 'y']


In [None]:
retrieve_names(a,a)

[['_', 'a', '_32'], ['_', 'a', '_32']]

In [None]:
#| export
from contextlib import redirect_stdout
import io
from pprint import pp

In [None]:
trap = io.StringIO()
with redirect_stdout(trap):
    print("hello")

trap.getvalue()

'hello\n'

In [None]:
#| export
def pretty_repr(o):
    trap = io.StringIO()
    with redirect_stdout(trap):
        pp(o)
    return trap.getvalue()
def row_items(**kwargs):
    columns = [f"<div><p style='font-size: 1.2rem;'>{title}</p> <pre>{pretty_repr(o)}</pre> </div>" for title, o in kwargs.items()]
    out = f"<div style=\"display: flex; column-gap: 20px; flex-wrap: wrap;\" class='table table-striped table-sm'> {''.join(columns)}</div>"
    return out
def show_as_row(*os, names: Iterable[str]=None, **kwargs):
    """Shows a interable of tensors on a row"""
    if names is None: names = maybe_retrieve_callers_name(os)
    kwargs.update(dict(zip(names, os)))
    out = row_items(**kwargs)
    display(HTML(out))

NameError: name 'Iterable' is not defined

In [None]:
type(a)

numpy.ndarray

In [None]:
show_as_row(a,b=a)

In [None]:
from itertools import zip_longest

In [None]:
zip_longest

itertools.zip_longest

In [None]:
func(a,a)

['a', 'a']

In [None]:
b = a.copy()

In [None]:
show_as_row([1], [2])

In [None]:
show_as_row(a,b)

In [None]:
show_as_row(a, names='b')

In [None]:
show_as_row(c=a)

In [None]:
#| export
def _style_df(df_style):
    """style dataframe for better printing """
    return df_style.format(precision = 4)

def row_dfs(dfs: dict[str, pd.DataFrame], title="", hide_idx = True, styler=_style_df):
    out = []
    for df_title, df in dfs.items():
        df_styled =  df.style.hide(axis="index") if hide_idx else df.style 
        df_html = styler(df_styled).to_html()
        out.append(f"<div> <p style='font-size: 1.3rem;'>{df_title}</p> {df_html} </div>")
    out = f"<div style=\"display: flex; column-gap: 20px; flex-wrap: wrap;\" class='table table-striped table-sm'> {''.join(out)}</div>"
    return f"<p style='font-size: 1.5rem; font-decoration: bold'>{title}<p>" + "".join(out)
def display_as_row(dfs: dict[str, pd.DataFrame], title="", hide_idx=True, styler=_style_df):
    """display multiple dataframes in the same row"""
    display(HTML(row_dfs(dfs, title, hide_idx, styler)))

In [None]:
a = HTML(pd.DataFrame([1,2]).to_html(notebook=True))

In [None]:
display_as_row({"test": pd.DataFrame([1,2])}, "hello")

0
1
2


In [None]:
display_as_row({"test": pd.DataFrame([1,2])}, "hello", hide_idx=False)

Unnamed: 0,0
0,1
1,2


In [None]:
display_as_row({f"test{i}": pd.DataFrame([1,2], columns=[i]) for i in range(10)})

0
1
2

1
1
2

2
1
2

3
1
2

4
1
2

5
1
2

6
1
2

7
1
2

8
1
2

9
1
2


## Torch Helpers

inspired from source: https://github.com/pytorch/pytorch/pull/9281

In [None]:
from fastcore.test import *

In [None]:
#| export
def eye_like(x: torch.Tensor) -> torch.Tensor:
    """
    Return a tensor with same batch size as x, that has a nxn eye matrix in each sample in batch.

    Args:
        x: tensor of shape (B, n, m) or (n,m)

    Returns:
        tensor of shape (B, n, m) or (n,m) that has the same dtype and device as x.
    """
    eye = torch.eye(x.shape[-2], x.shape[-1], dtype=x.dtype, device=x.device)
    if x.dim() > 2:
        for i in range(x.dim()-2):
            eye.unsqueeze_(0) # add as many dim in front
        size_repeat = [x.shape[i] for i in range(x.dim()-2)] + [-1,-1]
        eye = eye.expand(*size_repeat)
    return eye

In [None]:
a = torch.ones(3,2,2)

In [None]:
test_close(eye_like(a),
           torch.tensor([[[1., 0.],
                          [0., 1.]],
 
                         [[1., 0.],
                          [0., 1.]],
 
                         [[1., 0.],
                          [0., 1.]]]))

In [None]:
eye_like(torch.ones(3,3))

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [None]:
eye_like(torch.ones(2,2,3,3))

tensor([[[[1., 0., 0.],
          [0., 1., 0.],
          [0., 0., 1.]],

         [[1., 0., 0.],
          [0., 1., 0.],
          [0., 0., 1.]]],


        [[[1., 0., 0.],
          [0., 1., 0.],
          [0., 0., 1.]],

         [[1., 0., 0.],
          [0., 1., 0.],
          [0., 0., 1.]]]])

In [None]:
eye_like(torch.ones(2,3,4))

tensor([[[1., 0., 0., 0.],
         [0., 1., 0., 0.],
         [0., 0., 1., 0.]],

        [[1., 0., 0., 0.],
         [0., 1., 0., 0.],
         [0., 0., 1., 0.]]])

### Is diagonal

In [None]:
e = torch.eye(3,3)
d = torch.diagonal(e, dim1=-2, dim2=-1)
d
torch.diag_embed(d, dim1=-2, dim2=-1)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [None]:
#| export
def is_diagonal(x: torch.Tensor):
    """ Check that tensor is diagonal respect to the last 2 dimensions"""
    d = torch.diagonal(x, dim1=-2, dim2=-1)
    return (x == torch.diag_embed(d, dim1=-2, dim2=-1)).all()

In [None]:
is_diagonal(torch.eye(3,3))

tensor(True)

In [None]:
is_diagonal(torch.ones(3,3))

tensor(False)

In [None]:
is_diagonal(eye_like(torch.ones(2,3,3)))

tensor(True)

## Itertools

In [None]:
#| export
import itertools

In [None]:
#| export
# from https://stackoverflow.com/a/5228294
def product_dict(**kwargs):
    keys = kwargs.keys()
    vals = kwargs.values()
    for instance in itertools.product(*vals):
        yield dict(zip(keys, instance))

## Export

In [None]:
#| hide
from nbdev import nbdev_export
nbdev_export()