In [1]:
from PandOpt import pandopt
import pandas as pd
from PandOpt import *
import numpy as np
import tqdm 
import pandas as pd
import numpy as np
import timeit
import functools


In [2]:
# Example usage
def simple_start(z):
    x = (z['A'] + z['B']) / z['C']
    x += z['B'] * z['D']
    return x / z['B']

def harder_func(z):
    x = (z['A'] + z['B']) / z['C']
    if x > 0:
        return x / z['B']
    x += z['B'] * z['D']
    return x * z['B']

def harder2_func(z):
    x = (z['A'] + z['B']) / z['C']
    if (k:=z['A']-z['C']) > (j:=z['B']/z['D']):
        return x / k
    x *= j
    return x - k if k > z['C'] else x + k

def harder3_func(z):
    g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
    x = (z['A'] + z['B']) / z['C']
    if (k:=z['A']-z['C']) > (j:=g(z['B'],z['D'])):
        return j / k
    x *= j
    return x - k if k > z['C'] else x + k
        

In [3]:

# Assume pandopt, simple_start, harder_func, harder2_func, harder3_func are defined
def agg_sum(z):
    return z.sum()

def agg_mean(z):
    return z.mean()

def agg_max(z):
    return z.max()

def agg_min(z):
    return z.min()

def agg_std(z):
    return z.std()



def measure_performance(df, func, test_type, window_size=3):
    try:
        if test_type == "apply":
            operation = functools.partial(df.apply, func, axis=1)
        elif test_type == "rolling":
            operation = functools.partial(df.rolling(window=window_size).apply, func, raw=True)
        elif test_type == "groupby":
            operation = functools.partial(df.groupby('A').apply, func)
        else:
            raise ValueError("Invalid test type")

        start_time = timeit.default_timer()
        result = operation()
        elapsed_time = timeit.default_timer() - start_time
        return np.sum(result), elapsed_time, None
    except Exception as e:
        return None, None, str(e)

def run_tests(data_size, test_funcs, agg_funcs, test_types):
    results = {}
    total_tests = (data_size - 1) * len(test_funcs + agg_funcs) * len(test_types)
    progress_bar = tqdm.tqdm(total=total_tests, desc="Running Tests", ncols=100)

    for test_num in range(1, data_size):
        df_size = int(10**test_num)
        pandas_df = pd.DataFrame(np.random.randn(df_size, 4), columns=['A', 'B', 'C', 'D']).astype(np.float32)
        pandopt_df = pandopt(pandas_df)

        for func in test_funcs + agg_funcs:
            for test_type in test_types:
                pandas_checksum, pandas_time, pandas_error = measure_performance(pandas_df, func, test_type)
                pandopt_checksum, pandopt_time, pandopt_error = measure_performance(pandopt_df, func, test_type)

                key = f"Size: 10^{test_num}, Func: {func.__name__}, Test: {test_type}"
                results[key] = {
                    "Pandas Time (s)": pandas_time,
                    "Pandopt Time (s)": pandopt_time,
                    "Checksum Pandas": pandas_checksum,
                    "Checksum Pandopt": pandopt_checksum,
                    "Pandas Error": pandas_error,
                    "Pandopt Error": pandopt_error
                }

                progress_bar.update(1)

    progress_bar.close()
    return pd.DataFrame.from_dict(results, orient='index')

# Run the tests
test_functions = [simple_start, harder_func, harder2_func, harder3_func]
agg_functions = [agg_sum, agg_mean, agg_max, agg_min, agg_std]
test_types = ["apply", "rolling", "groupby"]

results_df = run_tests(data_size=5, test_funcs=test_functions, agg_funcs=agg_functions, test_types=test_types)
print(results_df)

  return fn(self, *args, **kwargs)
Running Tests:   1%|▍                                               | 1/108 [00:01<02:34,  1.44s/it]<class 'PandOpt.pandopt'> finish in pandas fallback for func [[[ 0.7510271   0.8819945   0.47146   ]
  [ 2.2847526   1.3871288  -0.00258136]
  [ 1.3421756   1.2163529   0.54852206]
  [-1.105924    0.7163399   0.69091845]]

 [[ 0.8819945   0.47146     1.2668036 ]
  [ 1.3871288  -0.00258136  0.03414368]
  [ 1.2163529   0.54852206  0.07470609]
  [ 0.7163399   0.69091845  0.11548724]]

 [[ 0.47146     1.2668036  -0.48238975]
  [-0.00258136  0.03414368 -0.47365156]
  [ 0.54852206  0.07470609 -0.2637943 ]
  [ 0.69091845  0.11548724  0.03047474]]

 [[ 1.2668036  -0.48238975  0.05272019]
  [ 0.03414368 -0.47365156 -0.14680655]
  [ 0.07470609 -0.2637943   0.40040585]
  [ 0.11548724  0.03047474  1.6624091 ]]

 [[-0.48238975  0.05272019  1.3545709 ]
  [-0.47365156 -0.14680655  1.2609037 ]
  [-0.2637943   0.40040585 -1.6764638 ]
  [ 0.03047474  1.6624091   0.491347

In [17]:
df_size = int(10**5)
pandas_df = pd.DataFrame(np.random.randn(df_size, 4), columns=['A', 'B', 'C', 'D']).astype(np.float32)
pandopt_df = pandopt(pandas_df)
test_types = ["apply", "rolling", "groupby"]
measure_performance(pandas_df, harder_func, "apply",)

(8255.686, 0.9976772890076973, None)

In [19]:
measure_performance(pandopt_df, harder_func, "apply",)

<class 'PandOpt.pandopt'> finish in pandas fallback for func [[-1.4614375  -0.72791356  0.4858306  -0.17091323]
 [-0.7444548   0.340198   -1.9502494  -0.3394356 ]
 [-0.52052134 -0.22974943  0.56264186  0.34056336]
 ...
 [-0.38195738  1.2708303  -0.75209063 -0.40508595]
 [ 1.0395015  -0.8953692  -0.76272565 -0.3593748 ]
 [ 0.6658821   1.0201312  -0.4170845  -0.22054024]]


(None, None, "'numpy.float32' object is not callable")

In [16]:
pandopt_df._compiled_func

{85219505574198954: <function PandOpt.pandopt._with_fallback_wrap.<locals>._with_protects(*args, **kwargs)>,
 8115278541247942364: <function PandOpt.pandopt._with_fallback_wrap.<locals>._with_protects(*args, **kwargs)>,
 4411026621755478361: <function PandOpt.pandopt._with_fallback_wrap.<locals>._with_protects(*args, **kwargs)>,
 2575778278030769627: <function PandOpt.pandopt._with_fallback_wrap.<locals>._with_protects(*args, **kwargs)>,
 7327869446532249945: <function PandOpt.pandopt._with_fallback_wrap.<locals>._with_protects(*args, **kwargs)>,
 -6042259897073996500: <function PandOpt.pandopt._with_fallback_wrap.<locals>._with_protects(*args, **kwargs)>,
 4081650103367688955: <function PandOpt.pandopt._with_fallback_wrap.<locals>._with_protects(*args, **kwargs)>,
 131944967447514148: <function PandOpt.pandopt._with_fallback_wrap.<locals>._with_protects(*args, **kwargs)>,
 2778706282200863172: <function PandOpt.pandopt._with_fallback_wrap.<locals>._with_protects(*args, **kwargs)>}