# PANDAS ALTERNATIVES IN PYTHON

- System: 

    Mackbook Pro 15 Inch, 2019 
        
        Processor: 2.3 GHz 8-Core Intel Core i9
        
        Memory: 16 GB 2400 MHz DDR4
        
        macOS:  Sonoma 14.5 Beta (23F5049f)

- Python 3.10.9

______


In [1]:
# Importing libraries 
import polars as pl
import datatable  as dt
import os
os.environ['MODIN_ENGINE'] = 'dask'
import modin.pandas as md
import modin
print("NUM Partitions available: ", modin.config.NPartitions.get())
os.environ["MODIN_CPUS"] = "4"
import pandas as pd
import time
import numpy as np
from statistics import mean, stdev
import vaex as vx

NUM Partitions available:  16
NUM Partitions available:  16


In [2]:
def exec_time(code_str):
    start = time.time()
    exec(code_str)
    end = time.time()
    return np.round((end-start)/60*1000, 6)

def functiontiming(cmd_d, metr_nm, dictionary, loop = 10, add_cmd = None):
    print(metr_nm)
    for key, cmd in cmd_d.items():
        counter = 0
        times = []
        try: 
            while counter <= loop:
                if add_cmd:
                    new_cmd = add_cmd[key]
                    exec_time(new_cmd)
                time = exec_time(cmd)
                times.append(time)
                counter +=1
            meant = np.round(mean(times), 6)
            stdevt = np.round(stdev(times), 6)
            maxt = np.round(max(times), 6)
            mint = np.round(min(times), 6)
            add_dic = {'METRIC': metr_nm, 'LIBRARY': key, 'TIME (avg)': meant, "TIME (stdv)": stdevt, "TIME (max)": maxt, "TIME (min)": mint,   'N': loop}

        except Exception as E:
            print(key, " ERROR:\n", str(E))
            add_dic = {}

        if metr_nm in dictionary:
            dictionary[metr_nm].append(add_dic)
        else: 
            dictionary[metr_nm] = [add_dic]
        try: 
            print(key, '\t', meant, 'usec')     
        except: print('No data')
        
def dict_to_df(dictionary, file_size = None):
    df = pd.DataFrame()
    for k, v in dictionary.items(): 
        df1 = pd.DataFrame(v)
        df = pd.concat([df, df1], axis = 0).reset_index(drop = True)
    if file_size: 
        df.loc[:,'FILE_SZ'] = file_size
    return df  

______ 
### ~SMALL FILE 

In [3]:
# Number of data points to calculate statictics 
loops = 5
file = 'data/data_small.csv'
print("File size: ", np.round(os.stat(file).st_size / (1024 * 1024), 2), "MB")
RESULT_SM = {}

File size:  13.84 MB


In [4]:
cmdsrd = {
        'datatable': 'global dtdf ; dtdf = dt.fread(file)',
        'pandas'   : 'global pdf  ; pdf  = pd.read_csv(file, low_memory = False )',
        'polars'   : 'global pldf ; pldf = pl.read_csv(file, infer_schema_length=100000, ignore_errors = True )',
        'modin'    : 'global mdf  ; mdf  = md.read_csv(file, low_memory = False)',
        'vaex'     : 'global vxdf ; vxdf = vx.open(file)'
        }
metric = 'READ_CSV'
functiontiming(cmdsrd, metric, RESULT_SM, loop = loops)

READ_CSV
datatable 	 2.048697 usec
pandas 	 9.562804 usec
polars 	 7.397899 usec



    from distributed import Client

    client = Client()

Dask needs bokeh >= 2.4.2, < 3 for the dashboard.
You have bokeh==3.0.3.
Continuing without the dashboard.
2024-04-28 18:56:12,759 - distributed.diskutils - INFO - Found stale lock file and directory '/var/folders/dg/fckc2gz96c599j8pqfzz6jzr0000gn/T/dask-worker-space/worker-90rrv919', purging
2024-04-28 18:56:12,760 - distributed.diskutils - INFO - Found stale lock file and directory '/var/folders/dg/fckc2gz96c599j8pqfzz6jzr0000gn/T/dask-worker-space/worker-vsv5njnl', purging
2024-04-28 18:56:12,762 - distributed.diskutils - INFO - Found stale lock file and directory '/var/folders/dg/fckc2gz96c599j8pqfzz6jzr0000gn/T/dask-worker-space/worker-5i5nj2fx', purging
2024-04-28 18:56:12,763 - distributed.diskutils - INFO - Found stale lock file and directory '/var/folders/dg/fckc2gz96c599j8pqfzz6jzr0000gn/T/dask-worker-space/worker-24bzes0n', purging
2024-04-28 18:56:12,764 - distributed.diskutils - INFO - Found stale lock file and di

modin 	 26.922177 usec
vaex 	 6.83318 usec


In [5]:
cmdshp = {
        'datatable': 'dtdf.shape',
        'pandas'   : 'pdf.shape',
        'polars'   : 'pldf.shape',
        'modin'    : 'mdf.shape',
        'vaex'     : 'vxdf.shape'
        }
metric = 'PRINT DF SHAPE'

functiontiming(cmdshp, metric, RESULT_SM, loop = loops)

PRINT DF SHAPE
datatable 	 0.000486 usec
pandas 	 0.000427 usec
polars 	 0.00048 usec
modin 	 0.000622 usec
vaex 	 0.001058 usec


In [6]:
cmds_copy = {
         'datatable': 'global dtdf1 ; dtdf1 = dtdf.copy()',
         'pandas'  : 'global pdf1  ; pdf1 = pdf.copy()',
         'polars'  : 'global pldf1 ; pldf1 = pldf.clone()',
         'modin'   : 'global mdf1  ; mdf1 = mdf.copy()',
         'vaex'    : 'global vxdf1 ; vxdf1 = vxdf.copy()'
        }
metric = 'CREATE COPY'
functiontiming(cmds_copy, metric, RESULT_SM, loop = loops)

CREATE COPY
datatable 	 0.00081 usec
pandas 	 0.290458 usec
polars 	 0.000926 usec
modin 	 0.008698 usec
vaex 	 0.058691 usec


In [7]:
cmds_col1 = {
         'datatable': 'global dtdf1 ; dtdf1.names = {"CRASH_CRN":"CRASH_CRNnew"}',
         'pandas'   : 'global pdf1  ; pdf1 = pdf1.rename(columns = {"CRASH_CRN":"CRASH_CRNnew"})',
         'polars'   : 'global pldf1 ; pldf1 = pldf1.rename({"CRASH_CRN":"CRASH_CRNnew"})',
         'modin'    : 'global mdf1  ; mdf1 = mdf1.rename(columns = {"CRASH_CRN":"CRASH_CRNnew"})',
         'vaex'     :  'vxdf1.rename("CRASH_CRN","CRASH_CRNnew")'
        }
metric = 'RENAME SINGLE COLUMN'
functiontiming(cmds_col1, metric, RESULT_SM, add_cmd = cmds_copy, loop = loops)

RENAME SINGLE COLUMN
datatable 	 0.001229 usec
pandas 	 0.20983 usec
polars 	 0.00437 usec
modin 	 0.064741 usec
vaex 	 0.065772 usec


In [8]:
new_columns = [col+'NEW' for col in pdf.columns]
new_colums_dict = {}
for col in pdf.columns:
    new_colums_dict[col] = col+'NEW'

cmds_col_all = {
         'datatable': 'global dtdf  ; dtdf.names = new_columns',
         'pandas'   : 'global pdf   ; pdf.columns = new_columns',
         # For polars to work with no errors I had to create a new dataframe. 
         # Tests without new copy in other platforms worked with no issues
         'polars'   : 'global pldf2 ; pldf2 =  pldf.rename(new_colums_dict)',
         'modin'    : 'global mdf   ; mdf = mdf.rename(columns = new_colums_dict)',
         'vaex'     : 'for cur_nm, new_nm in new_colums_dict.items(): vxdf1.rename(cur_nm, new_nm)'
        }
metric = 'RENAME ALL COLUMNS'
functiontiming(cmds_col_all, metric, RESULT_SM, add_cmd = cmds_copy, loop = loops)

RENAME ALL COLUMNS
datatable 	 0.001191 usec
pandas 	 0.005534 usec
polars 	 0.027041 usec
modin 	 0.058977 usec
vaex 	 5.374467 usec


In [9]:
cmds_sort1 = {
         'datatable': 'dtdf[:,:, dt.sort("MUNICIPALITYNEW", reverse=True)]',
         'pandas'   : 'pdf.sort_values(by = ["MUNICIPALITYNEW"], ascending = [False])',
         'polars'   : 'pldf2.sort("MUNICIPALITYNEW", descending=True)',
         'modin'    : 'mdf.sort_values(by = ["MUNICIPALITYNEW"], ascending = [False])',
         'vaex'     : 'vxdf.sort(["MUNICIPALITY"])'
        }
metric = 'SORT ONE COLUMN'
functiontiming(cmds_sort1, metric, RESULT_SM, loop = loops)

SORT ONE COLUMN
datatable 	 0.017212 usec
pandas 	 0.3518 usec
polars 	 0.372836 usec
modin 	 132.792391 usec
vaex 	 1.920883 usec


In [10]:
cmds_sort2 = {
         'datatable': 'dtdf[:,:, dt.sort(["MUNICIPALITYNEW", "CRASH_YEARNEW"], reverse=[True, False])]',
         'pandas'   : 'pdf.sort_values(by = ["MUNICIPALITYNEW", "CRASH_YEARNEW"], ascending = [False, True])',
         'polars'   : 'pldf2.sort("MUNICIPALITYNEW", "CRASH_YEARNEW", descending=[True, False])',
         'modin'    : 'mdf.sort_values(by = ["MUNICIPALITYNEW", "CRASH_YEARNEW"], ascending = [False, True])',
         'vaex'     : 'vxdf.sort(["MUNICIPALITY", "CRASH_YEAR"], ascending = [False, True])'
        }
metric = 'SORT TWO COLUMN'
functiontiming(cmds_sort2, metric, RESULT_SM, loop = loops)

SORT TWO COLUMN
datatable 	 0.075463 usec
pandas 	 0.556926 usec
polars 	 0.284105 usec




modin 	 134.910463 usec
vaex 	 3.833902 usec


In [19]:
from datatable import dt, f, by
grp_by_sum = {
         'datatable': 'dtdf[:, dt.sum(f.CRASH_YEARNEW), by("MUNICIPALITYNEW")]',
         'pandas'   : 'pdf.groupby("MUNICIPALITYNEW")["CRASH_YEARNEW"].sum()',
         'polars'   : 'pldf2.group_by("MUNICIPALITYNEW").agg(pl.sum("CRASH_YEARNEW"))',
         'modin'    : 'mdf.groupby("MUNICIPALITYNEW")["CRASH_YEARNEW"].sum()',
         'vaex'     : "vxdf.groupby(by='MUNICIPALITY').agg({'CRASH_YEAR': 'sum'})"

        }
metric = 'GROUP BY SUM'
functiontiming(grp_by_sum, metric, RESULT_SM, loop = loops)

GROUP BY SUM
datatable 	 0.023239 usec
pandas 	 0.031094 usec
polars 	 0.19369 usec
modin 	 13.064914 usec
vaex 	 11.953482 usec


In [20]:
dict_to_df(RESULT_SM, 'Small')

Unnamed: 0,METRIC,LIBRARY,TIME (avg),TIME (stdv),TIME (max),TIME (min),N,FILE_SZ
0,READ_CSV,datatable,2.048697,0.082008,2.198601,1.957333,5,Small
1,READ_CSV,pandas,9.562804,0.797654,10.446338,8.278171,5,Small
2,READ_CSV,polars,7.397899,0.52033,8.097732,6.930431,5,Small
3,READ_CSV,modin,26.922177,38.016839,104.510701,10.309048,5,Small
4,READ_CSV,vaex,6.83318,5.626612,18.301062,4.035985,5,Small
5,PRINT DF SHAPE,datatable,0.000486,0.000204,0.000866,0.000318,5,Small
6,PRINT DF SHAPE,pandas,0.000427,0.000151,0.000731,0.00035,5,Small
7,PRINT DF SHAPE,polars,0.00048,0.000316,0.001121,0.000318,5,Small
8,PRINT DF SHAPE,modin,0.000622,0.000278,0.00118,0.000465,5,Small
9,PRINT DF SHAPE,vaex,0.001058,0.000218,0.001498,0.000934,5,Small


_____

### ~Medium file

In [21]:
### Deleting dataframes used with the Small file data
del dtdf, dtdf1, pdf, pdf1, pldf, pldf1, pldf2, mdf, mdf1, vxdf, vxdf1

In [22]:
file = 'data/data_medium.csv'
print("File size: ", np.round(os.stat(file).st_size / (1024 * 1024), 2), "MB")
RESULT_MD = {}

File size:  140.68 MB


In [23]:
metric = 'READ_CSV'
functiontiming(cmdsrd, metric, RESULT_MD, loop = loops)
metric = 'PRINT DF SHAPE'
functiontiming(cmdshp, metric, RESULT_MD, loop = loops)
metric = 'CREATE COPY'
functiontiming(cmds_copy, metric, RESULT_MD, loop = loops)
metric = 'RENAME SINGLE COLUMN'
functiontiming(cmds_col1, metric, RESULT_MD, add_cmd = cmds_copy, loop = loops)
metric = 'RENAME ALL COLUMNS'
functiontiming(cmds_col_all, metric, RESULT_MD, add_cmd = cmds_copy, loop = loops)
metric = 'SORT ONE COLUMN'
functiontiming(cmds_sort1, metric, RESULT_MD, loop = loops)
metric = 'SORT TWO COLUMN'
functiontiming(cmds_sort2, metric, RESULT_MD, loop = loops)
metric = 'GROUP BY SUM'
functiontiming(grp_by_sum, metric, RESULT_MD, loop = loops)

READ_CSV
datatable 	 7.996454 usec
pandas 	 130.581264 usec
polars 	 35.696691 usec
modin 	 34.626186 usec
vaex 	 4.419714 usec
PRINT DF SHAPE
datatable 	 0.000421 usec
pandas 	 0.000416 usec
polars 	 0.000405 usec
modin 	 0.000572 usec
vaex 	 0.001114 usec
CREATE COPY
datatable 	 0.000722 usec
pandas 	 5.911 usec
polars 	 0.0008 usec
modin 	 0.00789 usec
vaex 	 0.079807 usec
RENAME SINGLE COLUMN
datatable 	 0.001511 usec
pandas 	 4.395672 usec
polars 	 0.002392 usec
modin 	 0.062919 usec
vaex 	 0.087836 usec
RENAME ALL COLUMNS
datatable 	 0.001142 usec
pandas 	 0.005552 usec
polars 	 0.015736 usec
modin 	 0.061135 usec
vaex 	 4.82273 usec
SORT ONE COLUMN
datatable 	 0.024921 usec
pandas 	 5.948919 usec
polars 	 4.067038 usec




modin 	 140.495656 usec
vaex 	 15.106791 usec
SORT TWO COLUMN
datatable 	 0.190091 usec
pandas 	 6.568081 usec
polars 	 3.077682 usec




modin 	 159.478923 usec
vaex 	 42.926262 usec
GROUP BY SUM
datatable 	 0.106646 usec
pandas 	 0.121413 usec
polars 	 0.989457 usec
modin 	 16.079056 usec
vaex 	 35.849466 usec


In [24]:
dict_to_df(RESULT_MD, 'Medium')

Unnamed: 0,METRIC,LIBRARY,TIME (avg),TIME (stdv),TIME (max),TIME (min),N,FILE_SZ
0,READ_CSV,datatable,7.996454,1.164161,9.432566,6.934714,5,Medium
1,READ_CSV,pandas,130.581264,3.909886,136.345418,124.531937,5,Medium
2,READ_CSV,polars,35.696691,1.768031,38.459468,34.373788,5,Medium
3,READ_CSV,modin,34.626186,3.137712,39.51722,31.705717,5,Medium
4,READ_CSV,vaex,4.419714,0.506521,5.371082,4.061266,5,Medium
5,PRINT DF SHAPE,datatable,0.000421,0.000144,0.000699,0.000314,5,Medium
6,PRINT DF SHAPE,pandas,0.000416,0.000117,0.000652,0.00035,5,Medium
7,PRINT DF SHAPE,polars,0.000405,0.000145,0.000699,0.000334,5,Medium
8,PRINT DF SHAPE,modin,0.000572,0.000165,0.000902,0.000481,5,Medium
9,PRINT DF SHAPE,vaex,0.001114,0.000213,0.001534,0.000981,5,Medium


_____

### ~Large file

In [25]:
### Deleting dataframes used with the Medium file data
del dtdf, dtdf1, pdf, pdf1, pldf, pldf1, pldf2, mdf, mdf1, vxdf, vxdf1

In [26]:
file = 'data/data_large.csv'
print("File size: ", np.round(os.stat(file).st_size / (1024 * 1024), 2), "MB")
RESULT_LG = {}

File size:  281.36 MB


In [27]:
metric = 'READ_CSV'
functiontiming(cmdsrd, metric, RESULT_LG, loop = loops)
metric = 'PRINT DF SHAPE'
functiontiming(cmdshp, metric, RESULT_LG, loop = loops)
metric = 'CREATE COPY'
functiontiming(cmds_copy, metric, RESULT_LG, loop = loops)
metric = 'RENAME SINGLE COLUMN'
functiontiming(cmds_col1, metric, RESULT_LG, add_cmd = cmds_copy, loop = loops)
metric = 'RENAME ALL COLUMNS'
functiontiming(cmds_col_all, metric, RESULT_LG, add_cmd = cmds_copy, loop = loops)
metric = 'SORT ONE COLUMN'
functiontiming(cmds_sort1, metric, RESULT_LG, loop = loops)
metric = 'SORT TWO COLUMN'
functiontiming(cmds_sort2, metric, RESULT_LG, loop = loops)
metric = 'GROUP BY SUM'
functiontiming(grp_by_sum, metric, RESULT_LG, loop = loops)

READ_CSV
datatable 	 12.275466 usec
pandas 	 265.443018 usec
polars 	 42.216853 usec
modin 	 77.288881 usec
vaex 	 7.128306 usec
PRINT DF SHAPE
datatable 	 0.000461 usec
pandas 	 0.000485 usec
polars 	 0.000786 usec
modin 	 0.000911 usec
vaex 	 0.001284 usec
CREATE COPY
datatable 	 0.001208 usec
pandas 	 11.766 usec
polars 	 0.000912 usec
modin 	 0.0081 usec
vaex 	 0.061523 usec
RENAME SINGLE COLUMN
datatable 	 0.001364 usec
pandas 	 10.337778 usec
polars 	 0.011423 usec
modin 	 0.067888 usec
vaex 	 0.069774 usec
RENAME ALL COLUMNS
datatable 	 0.001341 usec
pandas 	 0.005558 usec
polars 	 0.026735 usec
modin 	 0.059544 usec
vaex 	 4.633701 usec
SORT ONE COLUMN
datatable 	 0.0471 usec
pandas 	 15.07819 usec
polars 	 10.493359 usec


2024-04-28 19:12:06,547 - distributed.worker - ERROR - Worker stream died during communication: tcp://127.0.0.1:51573
Traceback (most recent call last):
  File "/Users/jorgepinzon/opt/anaconda3/envs/py310/lib/python3.10/site-packages/tornado/iostream.py", line 869, in _read_to_buffer
    bytes_read = self.read_from_fd(buf)
  File "/Users/jorgepinzon/opt/anaconda3/envs/py310/lib/python3.10/site-packages/tornado/iostream.py", line 1138, in read_from_fd
    return self.socket.recv_into(buf, len(buf))
ConnectionResetError: [Errno 54] Connection reset by peer

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/Users/jorgepinzon/opt/anaconda3/envs/py310/lib/python3.10/site-packages/distributed/worker.py", line 2058, in gather_dep
    response = await get_data_from_worker(
  File "/Users/jorgepinzon/opt/anaconda3/envs/py310/lib/python3.10/site-packages/distributed/worker.py", line 2872, in get_data_from_worker
    return await ret

modin  ERROR:
 Attempted to run task deploy_dask_func-7557be68-bb54-4520-8eff-82b135908ba9 on 3 different workers, but all those workers died while running it. The last worker that attempt to run the task was tcp://127.0.0.1:51587. Inspecting worker logs is often a good next step to diagnose what went wrong. For more information see https://distributed.dask.org/en/stable/killed.html.
modin 	 10.493359 usec
vaex 	 32.547749 usec
SORT TWO COLUMN
datatable 	 0.308714 usec
pandas 	 17.690414 usec
polars 	 11.158356 usec




modin  ERROR:
 Attempted to run task deploy_dask_func-8d4b7154-ac34-44b1-9132-fa7fcd505b2d on 3 different workers, but all those workers died while running it. The last worker that attempt to run the task was tcp://127.0.0.1:51584. Inspecting worker logs is often a good next step to diagnose what went wrong. For more information see https://distributed.dask.org/en/stable/killed.html.
modin 	 11.158356 usec
vaex 	 75.452062 usec
GROUP BY SUM
datatable 	 0.155615 usec
pandas 	 0.209529 usec
polars 	 2.223554 usec
modin 	 13.317662 usec
vaex 	 68.644051 usec


In [28]:
dict_to_df(RESULT_LG, 'Large')

Unnamed: 0,METRIC,LIBRARY,TIME (avg),TIME (stdv),TIME (max),TIME (min),N,FILE_SZ
0,READ_CSV,datatable,12.275466,1.608227,15.054599,10.906065,5.0,Large
1,READ_CSV,pandas,265.443018,8.637416,281.663982,257.771667,5.0,Large
2,READ_CSV,polars,42.216853,3.420519,47.92763,39.443453,5.0,Large
3,READ_CSV,modin,77.288881,16.487715,105.849985,64.173464,5.0,Large
4,READ_CSV,vaex,7.128306,3.36655,13.51345,4.673302,5.0,Large
5,PRINT DF SHAPE,datatable,0.000461,0.000252,0.000966,0.000318,5.0,Large
6,PRINT DF SHAPE,pandas,0.000485,0.00018,0.000787,0.000334,5.0,Large
7,PRINT DF SHAPE,polars,0.000786,0.001021,0.002869,0.00033,5.0,Large
8,PRINT DF SHAPE,modin,0.000911,0.001001,0.002952,0.000469,5.0,Large
9,PRINT DF SHAPE,vaex,0.001284,0.000522,0.002333,0.000981,5.0,Large


____

# Combining the results

In [29]:
results = dict_to_df(RESULT_LG).drop(['N'], axis = 1).merge(dict_to_df(RESULT_MD), on = ['METRIC', 'LIBRARY'], suffixes=['_LG', '_MD']).drop(['N'], axis = 1).merge(dict_to_df(RESULT_SM), on = ['METRIC', 'LIBRARY'])
ordered_columns = ['METRIC', 'LIBRARY', 'TIME (avg)_LG', 'TIME (avg)_MD', 'TIME (avg)',
                    'TIME (stdv)_LG', 'TIME (stdv)_MD', 'TIME (stdv)', 
                    'TIME (max)_LG', 'TIME (max)_MD', 'TIME (max)',
                    'TIME (min)_LG', 'TIME (min)_MD', 'TIME (min)', 'N']
results[ordered_columns]

Unnamed: 0,METRIC,LIBRARY,TIME (avg)_LG,TIME (avg)_MD,TIME (avg),TIME (stdv)_LG,TIME (stdv)_MD,TIME (stdv),TIME (max)_LG,TIME (max)_MD,TIME (max),TIME (min)_LG,TIME (min)_MD,TIME (min),N
0,READ_CSV,datatable,12.275466,7.996454,2.048697,1.608227,1.164161,0.082008,15.054599,9.432566,2.198601,10.906065,6.934714,1.957333,5
1,READ_CSV,pandas,265.443018,130.581264,9.562804,8.637416,3.909886,0.797654,281.663982,136.345418,10.446338,257.771667,124.531937,8.278171,5
2,READ_CSV,polars,42.216853,35.696691,7.397899,3.420519,1.768031,0.52033,47.92763,38.459468,8.097732,39.443453,34.373788,6.930431,5
3,READ_CSV,modin,77.288881,34.626186,26.922177,16.487715,3.137712,38.016839,105.849985,39.51722,104.510701,64.173464,31.705717,10.309048,5
4,READ_CSV,vaex,7.128306,4.419714,6.83318,3.36655,0.506521,5.626612,13.51345,5.371082,18.301062,4.673302,4.061266,4.035985,5
5,PRINT DF SHAPE,datatable,0.000461,0.000421,0.000486,0.000252,0.000144,0.000204,0.000966,0.000699,0.000866,0.000318,0.000314,0.000318,5
6,PRINT DF SHAPE,pandas,0.000485,0.000416,0.000427,0.00018,0.000117,0.000151,0.000787,0.000652,0.000731,0.000334,0.00035,0.00035,5
7,PRINT DF SHAPE,polars,0.000786,0.000405,0.00048,0.001021,0.000145,0.000316,0.002869,0.000699,0.001121,0.00033,0.000334,0.000318,5
8,PRINT DF SHAPE,modin,0.000911,0.000572,0.000622,0.001001,0.000165,0.000278,0.002952,0.000902,0.00118,0.000469,0.000481,0.000465,5
9,PRINT DF SHAPE,vaex,0.001284,0.001114,0.001058,0.000522,0.000213,0.000218,0.002333,0.001534,0.001498,0.000981,0.000981,0.000934,5
