In [1]:
from PandOpt import pandopt
import pandas as pd
import numpy as np
import tqdm 
import pandas as pd
import numpy as np
import timeit
import functools



In [2]:
def agg_sum(z):
    return np.sum(z, axis=1)

def agg_mean(z):
    return np.mean(z)

def agg_max(z):
    return np.max(z)

def agg_min(z):
    return np.min(z)

def agg_std(z):
    return np.std(z)



In [3]:

def measure_performance(df, func, window_size=3):
    try:
        operation = functools.partial(df.rolling(window=window_size).apply, func, raw=True)

        start_time = timeit.default_timer()
        result = operation()
        elapsed_time = timeit.default_timer() - start_time
        return np.sum(result, axis=0), elapsed_time, None
    except Exception as e:
        return None, None, str(e)

def run_tests(data_size, agg_funcs):
    results = {}
    total_tests = (data_size - 1) * len(agg_funcs) 
    progress_bar = tqdm.tqdm(total=total_tests, desc="Running Tests", ncols=100)

    for test_num in range(1, data_size):
        df_size = int(10**test_num)
        for func in agg_funcs:
            for test_iter in range(15):
                pandas_df = pd.DataFrame(np.random.randn(df_size, 4), columns=['A', 'B', 'C', 'D']).astype(np.float32)
                pandopt_df = pandopt(pandas_df)
                pandas_checksum, pandas_time, pandas_error = measure_performance(pandas_df, func)
                pandopt_checksum, pandopt_time, pandopt_error = measure_performance(pandopt_df, func)

                key = f"Size: 10^{test_num}, Func: {func.__name__}, Test: {test_iter}"
                results[key] = {
                    "Pandas Time (s)": pandas_time,
                    "Pandopt Time (s)": pandopt_time,
                    "Checksum Pandas": pandas_checksum,
                    "Checksum Pandopt": pandopt_checksum,
                    "Pandas Error": pandas_error,
                    "Pandopt Error": pandopt_error
                }

            progress_bar.update(1)

    progress_bar.close()
    return pd.DataFrame.from_dict(results, orient='index')

agg_functions = [agg_sum, agg_mean, agg_max, agg_min, agg_std]

results_df = run_tests(data_size=5, agg_funcs=agg_functions)
results_df

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.

File "<ast>", line 2:
<source missing, REPL/exec in use?>

Running Tests:   5%|██▍                                              | 1/20 [00:01<00:23,  1.25s/it]Error in pandoptRoll apply: DataFrame constructor not properly called!
Error in pandoptRoll apply: DataFrame constructor not properly called!
Error in pandoptRoll apply: DataFrame constructor not properly called!
Error in pandoptRoll apply: DataFrame constructor not properly called!
Error in pandoptRoll apply: DataFrame constructor not properly called!
Error in pandoptRoll apply: DataFrame constructor not properly called!
Error in pandoptRoll apply: DataFrame constructor not properly called!
Error in pandoptRoll apply: DataFrame constructor not properly called!
Error in pandoptRoll ap

Unnamed: 0,Pandas Time (s),Pandopt Time (s),Checksum Pandas,Checksum Pandopt,Pandas Error,Pandopt Error
"Size: 10^1, Func: agg_sum, Test: 0",,1.243996,,0 7.771499 1 3.881912 2 4.502372 dtyp...,axis 1 is out of bounds for array of dimension 1,
"Size: 10^1, Func: agg_sum, Test: 1",,0.000143,,0 -1.691465 1 -2.924282 2 -2.050309 dtyp...,axis 1 is out of bounds for array of dimension 1,
"Size: 10^1, Func: agg_sum, Test: 2",,0.000107,,0 4.246821 1 3.796359 2 2.536170 dtyp...,axis 1 is out of bounds for array of dimension 1,
"Size: 10^1, Func: agg_sum, Test: 3",,0.000105,,0 3.510989 1 1.758188 2 1.908867 dtyp...,axis 1 is out of bounds for array of dimension 1,
"Size: 10^1, Func: agg_sum, Test: 4",,0.000110,,0 1.888192 1 9.075993 2 15.703785 d...,axis 1 is out of bounds for array of dimension 1,
...,...,...,...,...,...,...
"Size: 10^4, Func: agg_std, Test: 10",0.561268,,A 7114.660871 B 7292.077677 C 7213.07...,,,DataFrame constructor not properly called!
"Size: 10^4, Func: agg_std, Test: 11",0.562555,,A 7239.623877 B 7327.761485 C 7185.65...,,,DataFrame constructor not properly called!
"Size: 10^4, Func: agg_std, Test: 12",0.559817,,A 7298.910956 B 7208.337953 C 7268.39...,,,DataFrame constructor not properly called!
"Size: 10^4, Func: agg_std, Test: 13",0.566073,,A 7207.527032 B 7271.465887 C 7258.80...,,,DataFrame constructor not properly called!


In [4]:
results_df['time reduction'] = results_df["Pandopt Time (s)"] / results_df["Pandas Time (s)"] - 1
results_df['performance multiplicator'] = results_df["Pandas Time (s)"] / results_df["Pandopt Time (s)"] - 1
results_df

Unnamed: 0,Pandas Time (s),Pandopt Time (s),Checksum Pandas,Checksum Pandopt,Pandas Error,Pandopt Error,time reduction,performance multiplicator
"Size: 10^1, Func: agg_sum, Test: 0",,1.243996,,0 7.771499 1 3.881912 2 4.502372 dtyp...,axis 1 is out of bounds for array of dimension 1,,,
"Size: 10^1, Func: agg_sum, Test: 1",,0.000143,,0 -1.691465 1 -2.924282 2 -2.050309 dtyp...,axis 1 is out of bounds for array of dimension 1,,,
"Size: 10^1, Func: agg_sum, Test: 2",,0.000107,,0 4.246821 1 3.796359 2 2.536170 dtyp...,axis 1 is out of bounds for array of dimension 1,,,
"Size: 10^1, Func: agg_sum, Test: 3",,0.000105,,0 3.510989 1 1.758188 2 1.908867 dtyp...,axis 1 is out of bounds for array of dimension 1,,,
"Size: 10^1, Func: agg_sum, Test: 4",,0.000110,,0 1.888192 1 9.075993 2 15.703785 d...,axis 1 is out of bounds for array of dimension 1,,,
...,...,...,...,...,...,...,...,...
"Size: 10^4, Func: agg_std, Test: 10",0.561268,,A 7114.660871 B 7292.077677 C 7213.07...,,,DataFrame constructor not properly called!,,
"Size: 10^4, Func: agg_std, Test: 11",0.562555,,A 7239.623877 B 7327.761485 C 7185.65...,,,DataFrame constructor not properly called!,,
"Size: 10^4, Func: agg_std, Test: 12",0.559817,,A 7298.910956 B 7208.337953 C 7268.39...,,,DataFrame constructor not properly called!,,
"Size: 10^4, Func: agg_std, Test: 13",0.566073,,A 7207.527032 B 7271.465887 C 7258.80...,,,DataFrame constructor not properly called!,,


In [5]:
results_df.to_csv('benchmark.csv')

In [7]:
results_df['performance multiplicator'].dropna(
    
)

Series([], Name: performance multiplicator, dtype: float64)