## imports

In [1]:
import ast
import inspect
import types
import numpy as np
import numba as nb
import pandas as pd
import time
import logging
import functools
import copy
import sys
from typing import Callable, Type, Dict, Tuple, Any
logger = logging.getLogger()
logger.setLevel(0)

## dev imports

In [2]:
import astpretty

## Current state

In [4]:


def create_callmap_function_ast(mapping: Dict[str, int]) -> ast.FunctionDef:
    # Create the body of the callmap function
    body = []
    for key, value in mapping.items():
        compare = ast.Compare(
            left=ast.Name(id='x', ctx=ast.Load()),
            ops=[ast.Eq()],
            comparators=[ast.Str(s=key)]
        )
        body.append(
            ast.If(
                test=compare,
                body=[ast.Return(value=ast.Num(n=value))],
                orelse=[]
            )
        )
    
    # Add a default return statement
    body.append(ast.Return(value=ast.Name(id='x', ctx=ast.Load())))

    # Create the function definition
    func_def = ast.FunctionDef(
        name='callmap',
        args=ast.arguments(
            posonlyargs=[],
            args=[ast.arg(arg='x')],
            vararg=None,
            kwonlyargs=[],
            kw_defaults=[],
            kwarg=None,
            defaults=[]
        ),
        body=body,
        decorator_list=[],
        returns=None
    )
    return func_def

class SubscriptReplacer(ast.NodeTransformer):
    def __init__(self, arg_name):
        self.arg_name = arg_name

    def visit_Subscript(self, node):
        if isinstance(node.value, ast.Name) and node.value.id == self.arg_name:
            # Check for Python version compatibility
            if sys.version_info >= (3, 9):
                # Python 3.9 and later
                old_slice = node.slice
            else:
                # Python 3.8 and earlier
                old_slice = node.slice.value if isinstance(node.slice, ast.Index) else node.slice

            # Wrap the subscript in a call to callmap
            node.slice = ast.Call(
                func=ast.Name(id='callmap', ctx=ast.Load()),
                args=[old_slice],
                keywords=[]
            )
        return self.generic_visit(node) 

def create_transformed_function_ast(original_func: Callable, mapping: Dict[str, int]) -> Tuple[ast.AST, ast.AST, ast.AST]:
    # Parse the original function
    original_tree = ast.parse(inspect.getsource(original_func))
    arg_name = original_tree.body[0].args.args[0].arg
    
    # Rename the original function
    original_tree.body[0].name = 'temporary'
    
    # Apply the AST transformation
    replacer = SubscriptReplacer(arg_name)
    original_tree = replacer.visit(original_tree)
    ast.fix_missing_locations(original_tree)

    # Replace dictionary accesses with callmap in the original function
    # This would be similar to the code in SubscriptReplacer

    # Create a new function that applies 'temporary' over an array
    loop_base_func_str = f"""
def {original_func.__qualname__}_loop(Z):
    n = Z.shape[0]
    res = np.zeros((n, 1))
    for i in nb.prange(n):
        res[i, 0] = temporary(Z[i, :])
    return res
    """
    vectorized_base_func_str = f"""
def {original_func.__qualname__}_vectorized(Z):
    return temporary(Z.T)
    """
    loop_func_tree = ast.parse(loop_base_func_str)
    vectorize_func_tree = ast.parse(vectorized_base_func_str)

    return original_tree, loop_func_tree, vectorize_func_tree

def numba_decorate(func_tree: ast.AST, nopython: bool = True, nogil: bool = True, parallel: bool = True) -> ast.AST:
    # # Add Numba JIT decorator
    nb_compyled_func_tree = copy.deepcopy(ast.fix_missing_locations(func_tree))
    numba_decorator = ast.Call(
        func=ast.Attribute(value=ast.Name(id='nb', ctx=ast.Load()), attr='jit', ctx=ast.Load()),
        args=[],
        keywords=[
            ast.keyword(arg='nopython', value=ast.NameConstant(value=nopython)),
            ast.keyword(arg='nogil', value=ast.NameConstant(value=nogil)),
            ast.keyword(arg='parallel', value=ast.NameConstant(value=parallel))
        ]
    )
    nb_compyled_func_tree.body[0].decorator_list.append(numba_decorator)
    nb_compyled_func_tree.body[0].name += '_nb_compyled'
    return ast.fix_missing_locations(nb_compyled_func_tree)

def encapulate(wrap_tree: ast.AST, callmap_tree: ast.AST, original_tree: ast.AST) -> ast.AST:
    wrap_tree.body[0].body.insert(0, callmap_tree.body[0])
    wrap_tree.body[0].body.insert(1, original_tree.body[0])
    return ast.fix_missing_locations(wrap_tree)

def compile_tree(built_func_tree: ast.AST, exec_globals: Dict[str, Any], qualname: str, build_qualifier: str) -> Dict:
    try:
        exec(compile(built_func_tree, filename="<ast>", mode="exec"), exec_globals)
        return {build_qualifier: exec_globals[qualname + build_qualifier]}
    except Exception as e:
        logger.warning(e)
    return {}

def _prepare_funcs(original_func: ast.AST, mapping: Dict[str, int]) -> Dict[str, Callable]:
    exec_globals = globals().copy()
    exec_globals.update({'np': np, 'nb': nb})
    callmap_func_ast = create_callmap_function_ast(mapping)
    callmap_func_tree = ast.fix_missing_locations(ast.Module(body=[callmap_func_ast], type_ignores=[]))
    original_tree, loop_func_tree, vectorize_func_tree = create_transformed_function_ast(original_func, mapping)

    loop_func_tree = encapulate(loop_func_tree, callmap_func_tree, original_tree)
    vectorize_func_tree = encapulate(vectorize_func_tree, callmap_func_tree, original_tree)
    
    available_funcs = {}
    available_funcs.update(compile_tree(vectorize_func_tree, exec_globals, original_func.__qualname__, '_vectorized'))
    available_funcs.update(compile_tree(loop_func_tree, exec_globals, original_func.__qualname__, '_loop'))
    
    nb_compyled_loop_func_tree = numba_decorate(loop_func_tree)
    nb_compyled_vectorize_func_tree = numba_decorate(vectorize_func_tree)

    available_funcs.update(compile_tree(nb_compyled_vectorize_func_tree, exec_globals, original_func.__qualname__, '_vectorized_nb_compyled'))
    available_funcs.update(compile_tree(nb_compyled_loop_func_tree, exec_globals, original_func.__qualname__, '_loop_nb_compyled'))

    return available_funcs

def make_class_decorator(function_decorator: Callable) -> Callable:
    """
    Creates a class decorator from a given function decorator.

    Args:
        function_decorator (Callable): A function decorator to be applied to class methods.

    Returns:
        Callable: A class decorator.
    """
    @functools.wraps(function_decorator)
    def class_decorator(cls: Type) -> Type:
        """
        The class decorator generated from the function decorator.

        Args:
            cls (Type): The class to which the decorator is applied.

        Returns:
            Type: The decorated class.
        """
        for attr_name, attr_value in cls.__bases__[0].__dict__.items():
            if callable(attr_value) and not attr_name.startswith('_') and attr_name not in cls.__dict__:
                setattr(cls, attr_name, function_decorator(attr_value))
        for attr_name, attr_value in cls.__dict__.items():
             if callable(attr_value) and not attr_name.startswith('_'):
                setattr(cls, attr_name, function_decorator(attr_value))
        return cls
    return class_decorator

def autowrap_pandas_return(fn: Callable) -> Callable:
    """
    Decorator to add validation and error handling to class methods.

    Args:
        fn (Callable): The original method of the class.

    Returns:
        Callable: The decorated method with added validation and error handling.
    """
    @functools.wraps(fn)
    def wrapper(self, *args, **kwargs):
        if self._outside_call:
            self._outside_call = False
            res = fn(self, *args, **kwargs)
            if isinstance(res, pd.DataFrame):
                res = pandopt(res)
            self._outside_call = True
            return res
        return fn(self, *args, **kwargs)
    return wrapper

@make_class_decorator(autowrap_pandas_return)
class pandopt(pd.DataFrame):
    _compiled_func = None
    _outside_call = True

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._compiled_func = {}

    @property
    def __name__(self):
        return functools.reduce(lambda x, y: x + y, self.name_to_index.keys())

    @property
    def colname_to_colnum(self):
        return {k: i for i, k in enumerate(self.columns)}

    @property
    def rowname_to_rownum(self):
        return {k: i for i, k in enumerate(self.index)}
    
    def _compiled_qualifier(self, func_qualifier, mapper):
        return hash(functools.reduce(lambda x, y: f'{x}&{y}', mapper) + func_qualifier)

    def apply(self, func, axis = 0, *args, pandas_fallback = False, **kwargs):
        if pandas_fallback: 
            logger.warning(f'{__class__} finish in pandas fallback for func {func}')
            return super().apply(func, axis = 0, *args, **kwargs)
        if args or kwargs:
            logger.warning(f'{__class__} apply only supports func and axis arguments, using default pandas apply')
            return super().apply(func, axis = 0, *args, **kwargs)
        return pandopt((self._compiled_func.get((name:=self._compiled_qualifier(func_qualifier = func.__qualname__, mapper=(mapper:=self.colname_to_colnum if axis else self.rowname_to_rownum)))) or self._build_apply_versions(func, mapper, name))(self.to_numpy() if axis else self.to_numpy().T), index = self.index if axis else self.columns)

    def _with_fallback_wrap(self, apply_func_dict):
        def _with_protects(*args, **kwargs):
            for key in ('_vectorized_nb_compyled', '_loop_compyled', '_vectorized', '_loop'):
                if key not in apply_func_dict:
                    continue
                try:
                    return apply_func_dict[key](*args, **kwargs)
                except:
                    apply_func_dict.pop(key)
            return self.apply(*args, pandas_fallback = True, **kwargs)
        return _with_protects
    
    def _build_apply_versions(self, func, map, name):
        self._compiled_func[name] = self._with_fallback_wrap(_prepare_funcs(func, map))
        return self._compiled_func[name]


In [20]:
@nb.jit(target='host', nopython=True, nogil=True, parallel = True)
def mean(x):
    return np.mean(x)

df=pd.DataFrame(data=np.random.randn(10000,4), columns = ['A', 'B', 'C', 'D']).astype(np.float32)
dfx = pandopt(df)
%timeit dfx.mean()
%timeit df.mean()
%timeit np.mean(dfx.to_numpy())
%timeit mean(dfx.to_numpy())

571 µs ± 146 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
566 µs ± 593 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
31.6 µs ± 38.1 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


KeyError: "Unrecognized options: {'target'}. Known options are dict_keys(['_dbg_extend_lifetimes', '_dbg_optnone', '_nrt', 'boundscheck', 'debug', 'error_model', 'fastmath', 'forceinline', 'forceobj', 'inline', 'looplift', 'no_cfunc_wrapper', 'no_cpython_wrapper', 'no_rewrites', 'nogil', 'nopython', 'parallel', 'target_backend'])"

In [73]:
@nb.jit(fastmath=True, forceinline=True, looplift=True, inline='always', target_backend='host', no_cfunc_wrapper=True,nopython=True, nogil=True, parallel = True)
def var1(x):
    K=x.shape
    print(x)
    res = np.zeros((K,1))
    for k in nb.prange():
        res[k,0] = np.var(x[:,k])
    return res

@nb.jit(fastmath=True, forceinline=True, looplift=True, inline='always', target_backend='host', no_cfunc_wrapper=True, no_rewrites=True,nopython=True, nogil=True, parallel = True)
def var2(x):
    m = np.sum(x, axis=0)/x.shape[0]
    return (m**2 - m)
    
var2(df.to_numpy())

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.

File "../../../../tmp/ipykernel_1489282/391797843.py", line 10:
<source missing, REPL/exec in use?>



array([ 0.00473638, -0.00229121, -0.00144589,  0.00074497], dtype=float32)

In [78]:
a=0
def test(x):
    global a
    a+=1
    return np.mean(x)

test(df.rolling(4))

TypeError: Rolling.mean() got an unexpected keyword argument 'axis'

In [86]:
def f(x):
    return np.mean(x)

def f2(x):
    return functools.reduce(lambda k, z: k + z, x)/len(x)

%timeit df.mean(axis=1)
%timeit df.apply(np.mean, axis=1)
%timeit dfx.apply(f, axis=1)


1.76 ms ± 3.63 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
374 ms ± 1.21 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/usr/local/pyenv/versions/3.10-opt-G1/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_1489282/306640829.py", line 9, in <module>
    get_ipython().run_line_magic('timeit', 'dfx.apply(f, axis=1)')
  File "/usr/local/pyenv/versions/3.10-opt-G1/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2456, in run_line_magic
    result = fn(*args, **kwargs)
  File "/usr/local/pyenv/versions/3.10-opt-G1/lib/python3.10/site-packages/IPython/core/magics/execution.py", line 1185, in timeit
    time_number = timer.timeit(number)
  File "/usr/local/pyenv/versions/3.10-opt-G1/lib/python3.10/site-packages/IPython/core/magics/execution.py", line 173, in timeit
    timing = self.inner(it, self.timer)
  File "<magic-timeit>", line 1, in inner
  File "/tmp/ipykernel_1489282/2333732339.py", line 195, in wrapper
    return fn(self, *ar

In [108]:
def mymean(x):
    return functools.reduce(lambda k, z: k + z, x)/len(x)

def somemean(x):
    return sum(x)/len(x)

df=pd.DataFrame(data=np.random.randn(1000000,4), columns = ['A', 'B', 'C', 'D']).astype(np.float32)
dfx = pandopt(df)


%time df.mean(axis=1)
%time df.apply(np.mean, axis=1)
%time dfx.apply(mymean, axis=1)
%time dfx.apply(somemean, axis=1)

print(df.mean(axis=1))
print(df.apply(np.mean, axis=1))
print(dfx.apply(mymean, axis=1))
print(dfx.apply(somemean, axis=1))


CPU times: user 139 ms, sys: 3.99 ms, total: 143 ms
Wall time: 143 ms
CPU times: user 37.7 s, sys: 168 ms, total: 37.8 s
Wall time: 37.8 s
CPU times: user 30.2 ms, sys: 8.03 ms, total: 38.2 ms
Wall time: 38.6 ms
CPU times: user 99.8 ms, sys: 0 ns, total: 99.8 ms
Wall time: 101 ms
0        -0.130979
1         0.525160
2        -0.207952
3        -0.605372
4         0.045172
            ...   
999995   -0.342278
999996   -0.338848
999997    0.175517
999998   -0.083926
999999    0.506741
Length: 1000000, dtype: float32
0        -0.130979
1         0.525160
2        -0.207952
3        -0.605372
4         0.045172
            ...   
999995   -0.342278
999996   -0.338848
999997    0.175517
999998   -0.083926
999999    0.506741
Length: 1000000, dtype: float32
               0
0      -0.130979
1       0.525160
2      -0.207952
3      -0.605372
4       0.045172
...          ...
999995 -0.342278
999996 -0.338848
999997  0.175517
999998 -0.083926
999999  0.506741

[1000000 rows x 1 columns]
     

In [118]:

@nb.jit(nopython=True, nogil=True, parallel=True)
def cdmtest_func(Z):
    def callmap(x):
        if x == 'A':
            return 0
        elif x == 'B':
            return 1
        elif x == 'C':
            return 2
        elif x == 'D':
            return 3
        return x
    def tmporary(x):
        return np.kurt(x)
    n = Z.shape[0]
    res = np.zeros((n, 1))
    for i in nb.prange(5, n):
        res[i,0] = np.var(Z[i-5:i,:])
    return res


def mymean(x):
    return functools.reduce(lambda k, z: k + z, x)/len(x)


df=pd.DataFrame(data=np.random.randn(10000,4), columns = ['A', 'B', 'C', 'D']).astype(np.float32)
dfx = pandopt(df)

%time cdmtest_func(df.to_numpy())

%time df.rolling(5).var()
%time df.rolling(5).apply(np.var)
%time dfx.rolling(5).apply(mymean)

CPU times: user 1.56 s, sys: 66 µs, total: 1.56 s
Wall time: 1.56 s
CPU times: user 2.65 ms, sys: 7 µs, total: 2.65 ms
Wall time: 2.64 ms


AttributeError: module 'numpy' has no attribute 'kurt'

CPU times: user 1.05 s, sys: 3.97 ms, total: 1.05 s
Wall time: 1.05 s


Unnamed: 0,A,B,C,D
0,,,,
1,,,,
2,,,,
3,,,,
4,0.562386,0.376509,-0.459252,0.033682
...,...,...,...,...
9995,-0.024557,-0.145580,0.397332,-0.328758
9996,-0.212622,-0.015524,0.219020,0.085507
9997,-0.192893,-0.184232,0.523436,0.270414
9998,-0.253410,-0.458986,0.409766,0.772995


In [77]:

def test_func(z):
    x = (z['A']+z['B'])
    x = z['B']*z['D']
    return x / z['B']

@nb.jit(nopython=True, nogil=True, parallel=True, cache=True)
def test_funcv(z):
    x = (z[:,0]+z[:,1])
    x = z[:,0]*z[:,3]
    return x / z[:,1]

@nb.jit(nopython=True, nogil=True, cache=True)
def test_funcd(z):
    x = (z[0]+z[1])
    x = z[0]*z[3]
    return x / z[1]

@nb.jit(nopython=True, nogil=True, parallel=True, cache=True)
def test_funcdd(z):
    for i in nb.prange(len(z)):
        z[i,0]=test_funcd(z[i])
    return z[:,0]
    
def rowtest_func(z):
    x = (z['A']+z['B'])
    if x > 0:
        return z['B']*z['D']
    return x / z['B']

@nb.jit(nopython=True, nogil=True, cache=True)
def rowtest_funcdi(z):
    x = (z[0]+z[1])
    if x > 0:
        return z[0]*z[3]
    return x / z[1]

@nb.jit(nopython=True, nogil=True, parallel=True)
def rowtest_funcddo(z):
    n = len(z)
    result = np.zeros((n,1), dtype=np.float32)
    for i in nb.prange(n):
        result[i,0] = rowtest_funcdi(z[i,:])
    return result

@nb.jit(nopython=True, nogil=True, parallel=True)
def rowtest_funcdd(z):
    n = len(z)
    result = np.zeros((n,1), dtype=np.float32)
    for i in nb.prange(n):
        x = (z[i, 0]+z[i, 1])
        if x > 0:
            result[i,0] = z[i, 0]*z[i, 3]
            continue
        result[i,0] = z[i,0]
        continue
    return result



@nb.jit(nopython=True, nogil=True, parallel=True)
def rowtest_funcdv(z):
    x = (z[:,0]+z[:,1])
    return ((x > 0) * z[:,0]*z[:,3]) + ((x < 0) * x / z[:,1])


df=pd.DataFrame(data=np.random.randn(10**3,4), columns = ['A', 'B', 'C', 'D']).astype(np.float32)
arr=df.astype(np.float32).to_numpy()
print(df.apply(rowtest_func, axis=1))
print(rowtest_funcdd(arr.copy()), rowtest_funcdd(arr.copy()).shape, np.sum(rowtest_funcdd(arr.copy())))
print(rowtest_funcdv(arr.copy()), rowtest_funcdv(arr.copy()).shape, np.sum(rowtest_funcdv(arr.copy())))

print(df.apply(test_func, axis=1))
print(test_funcdd(arr.copy()), test_funcdd(arr.copy()).shape, np.sum(test_funcdd(arr.copy())))
print(test_funcv(arr.copy()), test_funcv(arr.copy()).shape, np.sum(test_funcv(arr.copy())))

df.rolling(4)

39988

In [46]:
%timeit var1(df.to_numpy())
%timeit var2(df.to_numpy())
%timeit np.var(df.to_numpy(), axis=0)
%timeit df.var()

24.1 µs ± 1.88 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
21.7 µs ± 2 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
78.7 µs ± 151 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
727 µs ± 391 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## Func compilation / live modification performance test

In [5]:
# Example usage
def simple_start(z):
    x = (z['A'] + z['B']) / z['C']
    x += z['B'] * z['D']
    return x / z['B']

def harder_func(z):
    x = (z['A'] + z['B']) / z['C']
    if x > 0:
        return x / z['B']
    x += z['B'] * z['D']
    return x * z['B']

def harder2_func(z):
    x = (z['A'] + z['B']) / z['C']
    if (k:=z['A']-z['C']) > (j:=z['B']/z['D']):
        return x / k
    x *= j
    return x - k if k > z['C'] else x + k

def harder3_func(z):
    g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
    x = (z['A'] + z['B']) / z['C']
    if (k:=z['A']-z['C']) > (j:=g(z['B'],z['D'])):
        return j / k
    x *= j
    return x - k if k > z['C'] else x + k

df=pd.DataFrame(data=np.random.randn(100000,4), columns = ['A', 'B', 'C', 'D']).astype(np.float32)
mapping = {k: i for i, k in enumerate(df.columns)}

for func_test in [simple_start, harder_func, harder2_func, harder3_func]:
    print('Testing ', func_test.__name__)
    prepared_funcs = _prepare_funcs(func_test, mapping)
    try:
        print('df.apply(f, axis=1):', end=' ')
        %timeit df.apply(func_test, axis=1)
        print('checksum: ', np.sum(df.apply(func_test, axis=1)))
    except Exception as e:
        print('df.apply(f, axis=1) FAILED : ', e)
    try:
        print('test_func(df):', end=' ')
        %timeit func_test(df)
        print('checksum: ', np.sum(func_test(df)))
    except Exception as e:
        print('test_func(df) FAILED : ', e)
    try:
        print('_loop(df):', end=' ')
        %timeit prepared_funcs['_loop'](df.to_numpy())
        print('checksum: ', np.sum(prepared_funcs['_loop'](df.to_numpy())))
    except Exception as e:
        print('safe_func(df) FAILED : ', e)
    try:
        print('_vectorize(df):', end=' ')
        %timeit prepared_funcs['_vectorize'](df.to_numpy())
        print('checksum: ', np.sum(prepared_funcs['_vectorize'](df.to_numpy().T)))
    except Exception as e:
        print('vectorized_func(df) FAILED : ', e)
    try:
        print('_loop_nb_compyled(df):', end=' ')
        %timeit prepared_funcs['_loop_nb_compyled'](df.to_numpy())
        print('checksum: ', np.sum(prepared_funcs['_loop_nb_compyled'](df.to_numpy())))
    except Exception as e:
        print('opt_func(df) FAILED : ', e)
    try:
        print('_vectorized_nb_compyled(df):', end=' ')
        %timeit prepared_funcs['_vectorized_nb_compyled'](df.to_numpy())
        print('checksum: ', np.sum(prepared_funcs['_vectorized_nb_compyled'](df.to_numpy().T)))
    except Exception as e:
        print('opt_func(df) FAILED : ', e)
    print('\n\n\n')

Testing  simple_start
df.apply(f, axis=1): 2.26 s ± 26.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
checksum:  -3688407.5
test_func(df): 943 µs ± 4.01 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
checksum:  -3688407.5
_loop(df): 279 ms ± 2.85 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
checksum:  -3688407.0195359862
_vectorize(df): vectorized_func(df) FAILED :  '_vectorize'
_loop_nb_compyled(df): 187 µs ± 59.2 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
checksum:  -3688407.0195359862
_vectorized_nb_compyled(df): The slowest run took 5.79 times longer than the fastest. This could mean that an intermediate result is being cached.
33.8 µs ± 31.7 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
checksum:  -3688407.5




Testing  harder_func
df.apply(f, axis=1): 2.31 s ± 2.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
checksum:  -3345036.5
test_func(df): test_func(df) FAILED :  The truth value of a Series is ambiguous. Use a.empty

  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))


3.05 s ± 13.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
checksum:  -2001.9037082990963
test_func(df): test_func(df) FAILED :  The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
_loop(df): 



602 ms ± 41.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
checksum:  nan
_vectorize(df): vectorized_func(df) FAILED :  '_vectorize'
_loop_nb_compyled(df): opt_func(df) FAILED :  Failed in nopython mode pipeline (step: nopython frontend)
Type of variable 'closure__locals__temporary_v182__v100compare_op_45' cannot be determined, operation: closure__locals__temporary_v182__v66binary_subtract_30 > closure__locals__temporary_v182__v94call_function_43, location: <ast> (4)

File "<ast>", line 4:
<source missing, REPL/exec in use?>

_vectorized_nb_compyled(df): opt_func(df) FAILED :  Failed in nopython mode pipeline (step: nopython frontend)
No implementation of function Function(<built-in function abs>) found for signature:
 
 >>> abs(array(float32, 1d, C))
 
There are 6 candidate implementations:
      - Of which 2 did not match due to:
      Type Restricted Function in function 'abs': File: unknown: Line unknown.
        With argument(s): '(array(float32, 1d, C))':
       No match

In [30]:
df=pd.DataFrame(data=np.random.randn(1000000,4), columns = ['A', 'B', 'C', 'D']).astype(np.float32)
dfx = pandopt(df)
dfx.apply(simple_start, axis=1)

for func_test in [simple_start, harder_func, harder2_func, harder3_func]:
    print('Testing ', func_test.__name__)

    %timeit df.apply(func_test, axis=1)

    %timeit dfx.apply(func_test, axis=1)
    

Testing  simple_start
2.26 s ± 16.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
393 µs ± 238 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
Testing  harder_func
2.32 s ± 8.26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
516 ms ± 3.02 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Testing  harder2_func
2.72 s ± 20.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
317 ms ± 5.07 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Testing  harder3_func


  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))


3.04 s ± 11.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)




575 ms ± 1.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
def simple_start(z):
    x = (z['A'] + z['B']) / z['C']
    x += z['B'] * z['D']
    return x / z['B']


df=pd.DataFrame(data=np.random.randn(1000000,4), columns = ['A', 'B', 'C', 'D']).astype(np.float32)
dfx = pandopt(df)

%timeit df.apply(simple_start, axis=1)

%timeit dfx.apply(simple_start, axis=1)

In [None]:
def simple_start(z):
    x = (z['A'] + z['B']) / z['C']
    x += z['B'] * z['D']
    return x / z['B']

df=pd.DataFrame(data=np.random.randn(10000000,4), columns = ['A', 'B', 'C', 'D']).astype(np.float32)
dfx = pandopt(df)

%timeit df.apply(simple_start, axis=1)

%timeit dfx.apply(simple_start, axis=1)