In [1]:
from PandOpt import pandopt
import pandas as pd
import numpy as np
import tqdm 
import pandas as pd
import numpy as np
import timeit
import functools


In [2]:
# Example usage
def simple_start(z):
    x = (z['A'] + z['B']) / z['C']
    x += z['B'] * z['D']
    return x / z['B']

def harder_func(z):
    x = (z['A'] + z['B']) / z['C']
    if x > 0:
        return x / z['B']
    x += z['B'] * z['D']
    return x * z['B']

def harder2_func(z):
    x = (z['A'] + z['B']) / z['C']
    if (k:=z['A']-z['C']) > (j:=z['B']/z['D']):
        return x / k
    x *= j
    return x - k if k > z['C'] else x + k

def harder3_func(z):
    g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
    x = (z['A'] + z['B']) / z['C']
    if (k:=z['A']-z['C']) > (j:=g(z['B'],z['D'])):
        return j / k
    x *= j
    return x - k if k > z['C'] else x + k
        

In [3]:

def measure_performance(df, func, test_type, window_size=3):
    try:
        if test_type == "apply":
            operation = functools.partial(df.apply, func, axis=1)
        elif test_type == "rolling":
            operation = functools.partial(df.rolling(window=window_size).apply, func, raw=True)
        elif test_type == "groupby":
            operation = functools.partial(df.groupby('A').apply, func)
        else:
            raise ValueError("Invalid test type")

        start_time = timeit.default_timer()
        result = operation()
        elapsed_time = timeit.default_timer() - start_time
        return np.sum(result, axis=0), elapsed_time, None
    except Exception as e:
        return None, None, str(e)

def run_tests(data_size, test_funcs, agg_funcs, test_types):
    results = {}
    total_tests = (data_size - 1) * len(test_funcs + agg_funcs) * len(test_types)
    progress_bar = tqdm.tqdm(total=total_tests, desc="Running Tests", ncols=100)

    for test_num in range(1, data_size):
        df_size = int(10**test_num)
        for func in test_funcs + agg_funcs:
            for test_type in test_types:
                for test_iter in range(15):
                    pandas_df = pd.DataFrame(np.random.randn(df_size, 4), columns=['A', 'B', 'C', 'D']).astype(np.float32)
                    pandopt_df = pandopt(pandas_df)
                    pandas_checksum, pandas_time, pandas_error = measure_performance(pandas_df, func, test_type)
                    pandopt_checksum, pandopt_time, pandopt_error = measure_performance(pandopt_df, func, test_type)
    
                    key = f"Size: 10^{test_num}, Func: {func.__name__}, Test: {test_type} - {test_iter}"
                    results[key] = {
                        "Pandas Time (s)": pandas_time,
                        "Pandopt Time (s)": pandopt_time,
                        "Checksum Pandas": pandas_checksum,
                        "Checksum Pandopt": pandopt_checksum,
                        "Pandas Error": pandas_error,
                        "Pandopt Error": pandopt_error
                    }
    
                progress_bar.update(1)

    progress_bar.close()
    return pd.DataFrame.from_dict(results, orient='index')

# Run the tests
test_functions = [simple_start, harder_func, harder2_func, harder3_func]
agg_functions = []
test_types = ["apply"]

results_df = run_tests(data_size=7, test_funcs=test_functions, agg_funcs=agg_functions, test_types=test_types)
results_df

  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a

Unnamed: 0,Pandas Time (s),Pandopt Time (s),Checksum Pandas,Checksum Pandopt,Pandas Error,Pandopt Error
"Size: 10^1, Func: simple_start, Test: apply - 0",0.000448,1.383500,2.514960e+01,0 25.149603 dtype: float32,,
"Size: 10^1, Func: simple_start, Test: apply - 1",0.000336,0.000115,-1.630520e+03,0 -1630.520264 dtype: float32,,
"Size: 10^1, Func: simple_start, Test: apply - 2",0.000260,0.000102,-1.182125e+01,0 -11.821253 dtype: float32,,
"Size: 10^1, Func: simple_start, Test: apply - 3",0.000247,0.000099,1.078366e+01,0 10.783657 dtype: float32,,
"Size: 10^1, Func: simple_start, Test: apply - 4",0.000252,0.000099,-4.743784e+01,0 -47.437836 dtype: float32,,
...,...,...,...,...,...,...
"Size: 10^6, Func: harder3_func, Test: apply - 10",12.935969,2.711630,-1.265750e+06,0 -1.265750e+06 dtype: float64,,
"Size: 10^6, Func: harder3_func, Test: apply - 11",13.035154,2.707187,2.017005e+06,0 2.017005e+06 dtype: float64,,
"Size: 10^6, Func: harder3_func, Test: apply - 12",12.991005,2.726901,3.861232e+05,0 386123.150063 dtype: float64,,
"Size: 10^6, Func: harder3_func, Test: apply - 13",12.946690,2.699433,-1.148069e+06,0 -1.148069e+06 dtype: float64,,


In [4]:
results_df['time reduction'] = results_df["Pandopt Time (s)"] / results_df["Pandas Time (s)"] - 1
results_df['performance multiplicator'] = results_df["Pandas Time (s)"] / results_df["Pandopt Time (s)"] - 1
results_df

Unnamed: 0,Pandas Time (s),Pandopt Time (s),Checksum Pandas,Checksum Pandopt,Pandas Error,Pandopt Error,time reduction,performance multiplicator
"Size: 10^1, Func: simple_start, Test: apply - 0",0.000448,1.383500,2.514960e+01,0 25.149603 dtype: float32,,,3086.845172,-0.999676
"Size: 10^1, Func: simple_start, Test: apply - 1",0.000336,0.000115,-1.630520e+03,0 -1630.520264 dtype: float32,,,-0.659264,1.934820
"Size: 10^1, Func: simple_start, Test: apply - 2",0.000260,0.000102,-1.182125e+01,0 -11.821253 dtype: float32,,,-0.605256,1.533289
"Size: 10^1, Func: simple_start, Test: apply - 3",0.000247,0.000099,1.078366e+01,0 10.783657 dtype: float32,,,-0.600548,1.503430
"Size: 10^1, Func: simple_start, Test: apply - 4",0.000252,0.000099,-4.743784e+01,0 -47.437836 dtype: float32,,,-0.608798,1.556223
...,...,...,...,...,...,...,...,...
"Size: 10^6, Func: harder3_func, Test: apply - 10",12.935969,2.711630,-1.265750e+06,0 -1.265750e+06 dtype: float64,,,-0.790381,3.770551
"Size: 10^6, Func: harder3_func, Test: apply - 11",13.035154,2.707187,2.017005e+06,0 2.017005e+06 dtype: float64,,,-0.792316,3.815018
"Size: 10^6, Func: harder3_func, Test: apply - 12",12.991005,2.726901,3.861232e+05,0 386123.150063 dtype: float64,,,-0.790093,3.764018
"Size: 10^6, Func: harder3_func, Test: apply - 13",12.946690,2.699433,-1.148069e+06,0 -1.148069e+06 dtype: float64,,,-0.791496,3.796078


In [5]:
results_df.to_csv('benchmark.csv')