# Comparing speed performance of pandas vs. polars

In [1]:
import pandas as pd
import polars as pl
from timeit import timeit, repeat

filter_test_number = 150
read_test_number = 150

## Reading in data difference (via csv, but assume comparable )

### With Pandas

In [2]:

pd_read = timeit(
    setup= 'import pandas as pd',
    stmt= 'pd.read_csv("Phones_accelerometer.csv")',
    number=read_test_number
)

'''Pandas ran the process {read_test_number} times in {total_time} seconds, for an average of {avg_seconds} seconds'''.format(
    read_test_number = read_test_number,
    total_time=round(pd_read,2), 
    avg_seconds=round((pd_read/read_test_number),2 )
    )


'Pandas ran the process 150 times in 1497.62 seconds, for an average of 9.98 seconds'

### With Polars

In [3]:
pl_read = timeit(
    setup= 'import polars as pl',
    stmt= 'pl.read_csv("Phones_accelerometer.csv")',
    number=read_test_number
)

'''Polars ran the process {read_test_number} times in {total_time} seconds, for an average of {avg_seconds} seconds'''.format(
    read_test_number = read_test_number,
    total_time=round(pl_read,2), 
    avg_seconds=round((pl_read/read_test_number),2 )
    )

'Polars ran the process 150 times in 321.35 seconds, for an average of 2.14 seconds'

### How much faster is Polars compared to Pandas (Pandas Time / Polars Time)

In [4]:
pd_read / pl_read

4.660415146896333

## Filtering performance difference 

### With Pandas

In [5]:
pd_filter_setup = '''
import pandas as pd
pd_acc_data = pd.read_csv("Phones_accelerometer.csv")
'''
pd_filter = timeit(
    setup= pd_filter_setup,
    stmt= "pd_acc_data[pd_acc_data['Device'] == 'samsungold_2']",
    number=read_test_number
)

'''Pandas ran the process {filter_test_number} times in {total_time} seconds, for an average of {avg_seconds} seconds'''.format(
    filter_test_number = filter_test_number,
    total_time=round(pd_filter,2), 
    avg_seconds=round((pd_filter/filter_test_number),2 )
    )

'Pandas ran the process 150 times in 98.18 seconds, for an average of 0.65 seconds'

### With Polars

In [6]:
pl_filter_setup ='''
import polars as pl
pl_acc_data = pl.read_csv("Phones_accelerometer.csv")
'''

pl_filter = timeit(
    setup= pl_filter_setup,
    stmt= "pl_acc_data.filter(pl.col('Device') == 'samsungold_2')",
    number=filter_test_number
)

'''Polars ran the process {filter_test_number} times in {total_time} seconds, for an average of {avg_seconds} seconds'''.format(
    filter_test_number = filter_test_number,
    total_time=round(pl_filter,2), 
    avg_seconds=round((pl_filter/filter_test_number),2 )
    )

'Polars ran the process 150 times in 7.22 seconds, for an average of 0.05 seconds'

### How much faster is Polars compared to Pandas (Pandas Time / Polars Time)

In [7]:
pd_filter / pl_filter

13.598406680079346