In [5]:
import dask.dataframe as dd 
import pandas as pd
import time

In [None]:
def perform_test(df):
    """
    Perform a groupby operation on a dataframe and measure the execution time.

    Args:
        df (pandas.DataFrame or dask.DataFrame): The input dataframe to perform the groupby operation on.

    Returns:
        float: The elapsed time in seconds for the groupby operation.
    """
    start_time = time.time()

    if isinstance(df, pd.DataFrame):
        result = df.groupby('id1')['v1'].sum()
    elif isinstance(df, dd.DataFrame):
        result = df.groupby('id1')['v1'].sum().compute()
    else:
        raise ValueError("Unsupported dataframe type. Only Pandas and Dask dataframes are supported.")

    end_time = time.time()
    elapsed_time = end_time - start_time

    return elapsed_time

df = pd.read_csv('test/groupby-N_1000000_K_100_file_0.csv')
dask_df = dd.from_pandas(df, npartitions=4)

pandas_time = perform_test(df)
dask_time = perform_test(dask_df)

print("Pandas time:", pandas_time)
print("Dask time:", dask_time)