In [7]:
from PandOpt import pandopt
import pandas as pd
import numpy as np
import tqdm 
import pandas as pd
import numpy as np
import timeit
import functools


In [2]:
# Example usage
def simple_start(z):
    x = (z['A'] + z['B']) / z['C']
    x += z['B'] * z['D']
    return x / z['B']

def harder_func(z):
    x = (z['A'] + z['B']) / z['C']
    if x > 0:
        return x / z['B']
    x += z['B'] * z['D']
    return x * z['B']

def harder2_func(z):
    x = (z['A'] + z['B']) / z['C']
    if (k:=z['A']-z['C']) > (j:=z['B']/z['D']):
        return x / k
    x *= j
    return x - k if k > z['C'] else x + k

def harder3_func(z):
    g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
    x = (z['A'] + z['B']) / z['C']
    if (k:=z['A']-z['C']) > (j:=g(z['B'],z['D'])):
        return j / k
    x *= j
    return x - k if k > z['C'] else x + k
        

In [9]:

def measure_performance(df, func, test_type, window_size=3):
    try:
        if test_type == "apply":
            operation = functools.partial(df.apply, func, axis=1)
        elif test_type == "rolling":
            operation = functools.partial(df.rolling(window=window_size).apply, func, raw=True)
        elif test_type == "groupby":
            operation = functools.partial(df.groupby('A').apply, func)
        else:
            raise ValueError("Invalid test type")

        start_time = timeit.default_timer()
        result = operation()
        elapsed_time = timeit.default_timer() - start_time
        return np.sum(result), elapsed_time, None
    except Exception as e:
        return None, None, str(e)

def run_tests(data_size, test_funcs, agg_funcs, test_types):
    results = {}
    total_tests = (data_size - 1) * len(test_funcs + agg_funcs) * len(test_types)
    progress_bar = tqdm.tqdm(total=total_tests, desc="Running Tests", ncols=100)

    for test_num in range(1, data_size):
        df_size = int(10**test_num)
        pandas_df = pd.DataFrame(np.random.randn(df_size, 4), columns=['A', 'B', 'C', 'D']).astype(np.float32)
        pandopt_df = pandopt(pandas_df)

        for func in test_funcs + agg_funcs:
            for test_type in test_types:
                for test_iter in range(15):
                    pandas_checksum, pandas_time, pandas_error = measure_performance(pandas_df, func, test_type)
                    pandopt_checksum, pandopt_time, pandopt_error = measure_performance(pandopt_df, func, test_type)
    
                    key = f"Size: 10^{test_num}, Func: {func.__name__}, Test: {test_type} - {test_iter}"
                    results[key] = {
                        "Pandas Time (s)": pandas_time,
                        "Pandopt Time (s)": pandopt_time,
                        "Checksum Pandas": pandas_checksum,
                        "Checksum Pandopt": pandopt_checksum,&²
                        "Pandas Error": pandas_error,
                        "Pandopt Error": pandopt_error
                    }
    
                progress_bar.update(1)

    progress_bar.close()
    return pd.DataFrame.from_dict(results, orient='index')

# Run the tests
test_functions = [simple_start, harder_func, harder2_func, harder3_func]
agg_functions = []
test_types = ["apply"]

results_df = run_tests(data_size=7, test_funcs=test_functions, agg_funcs=agg_functions, test_types=test_types)
results_df

  return fn(self, *args, **kwargs)
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  return fn(self, *args, **kwargs)
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  return fn(self, *args, **kwargs)
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  return fn(self, *args, **kwargs)
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  return fn(self, *args, **kwargs)
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
  return fn(self, *args, **kwargs)
  g=lambda a, b: a if abs(a) > abs(b) else - 2 * (b**(-a))
Running Tests: 100%|████████████████████████████████████████████████| 24/24 [13:46<00:00, 34.44s/it]

                                                  Pandas Time (s)  \
Size: 10^1, Func: simple_start, Test: apply - 0          0.000532   
Size: 10^1, Func: simple_start, Test: apply - 1          0.000413   
Size: 10^1, Func: simple_start, Test: apply - 2          0.000395   
Size: 10^1, Func: simple_start, Test: apply - 3          0.000387   
Size: 10^1, Func: simple_start, Test: apply - 4          0.000295   
...                                                           ...   
Size: 10^6, Func: harder3_func, Test: apply - 10        12.944417   
Size: 10^6, Func: harder3_func, Test: apply - 11        12.924447   
Size: 10^6, Func: harder3_func, Test: apply - 12        12.736367   
Size: 10^6, Func: harder3_func, Test: apply - 13        12.644797   
Size: 10^6, Func: harder3_func, Test: apply - 14        12.991788   

                                                  Pandopt Time (s)  \
Size: 10^1, Func: simple_start, Test: apply - 0           0.000427   
Size: 10^1, Func: simple_start,




In [15]:
results_df['time reduction'] = results_df["Pandopt Time (s)"] / results_df["Pandas Time (s)"] - 1
results_df['performance multiplicator'] = results_df["Pandas Time (s)"] / results_df["Pandopt Time (s)"] - 1
results_df

Unnamed: 0,Pandas Time (s),Pandopt Time (s),Checksum Pandas,Checksum Pandopt,Pandas Error,Pandopt Error,time reduction,performance multiplicator
"Size: 10^1, Func: simple_start, Test: apply - 0",0.000532,0.000427,11.939754,0 11.939754 dtype: float32,,,-0.197864,0.246671
"Size: 10^1, Func: simple_start, Test: apply - 1",0.000413,0.000278,11.939754,0 11.939754 dtype: float32,,,-0.326652,0.485117
"Size: 10^1, Func: simple_start, Test: apply - 2",0.000395,0.000168,11.939754,0 11.939754 dtype: float32,,,-0.575636,1.356469
"Size: 10^1, Func: simple_start, Test: apply - 3",0.000387,0.000124,11.939754,0 11.939754 dtype: float32,,,-0.679024,2.115493
"Size: 10^1, Func: simple_start, Test: apply - 4",0.000295,0.000131,11.939754,0 11.939754 dtype: float32,,,-0.558056,1.262730
...,...,...,...,...,...,...,...,...
"Size: 10^6, Func: harder3_func, Test: apply - 10",12.944417,2.641331,864990.941182,0 864990.941182 dtype: float64,,,-0.795948,3.900718
"Size: 10^6, Func: harder3_func, Test: apply - 11",12.924447,2.655027,864990.941182,0 864990.941182 dtype: float64,,,-0.794573,3.867915
"Size: 10^6, Func: harder3_func, Test: apply - 12",12.736367,2.539643,864990.941182,0 864990.941182 dtype: float64,,,-0.800599,4.015023
"Size: 10^6, Func: harder3_func, Test: apply - 13",12.644797,2.592890,864990.941182,0 864990.941182 dtype: float64,,,-0.794944,3.876720


In [17]:
results_df.to_csv('benchmark.csv')