# Comparing pandas with cudf

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
from time import time
import cudf

In [None]:
# READ CSV USING PANDAS
t_start = time()
pandas_df = pd.read_csv('../input/tabular-playground-series-jun-2022/data.csv')
pd_duration = time() - t_start
print(f'reading csv dataframe using pandas done in {pd_duration:.2f} seconds')

# READ CSV USING CUDF
t_start = time()
cudg_df = cudf.read_csv('../input/tabular-playground-series-jun-2022/data.csv')
cudf_duration = time() - t_start
print(f'reading csv dataframe using cudf done in {cudf_duration:.2f} seconds')

# COMPARE RESULT
if(cudf_duration > pd_duration):
    print(f'PANDAS IS {int(pd_duration//cudf_duration)} TIMES FASTER THAN CUDF IN READING CSV FILE FOR DATAFRAME!')
elif(cudf_duration < pd_duration):
    print(f'CUDF IS {int(pd_duration//cudf_duration)} TIMES FASTER THAN PANDAS IN READING CSV FILE FOR DATAFRAME!')

In [None]:
# COPY DATAFRAME USING PANDAS
t_start = time()
pandas_df_2 = pandas_df.copy()
pd_duration = time() - t_start
print(f'copy dataframe using pandas done in {pd_duration:.2f} seconds')

# COPY DATAFRAME USING CUDF
t_start = time()
cudg_df_2 = cudg_df.copy()
cudf_duration = time() - t_start
print(f'copy dataframe using cudf done in {cudf_duration:.2f} seconds')

# COMPARE RESULT
if(cudf_duration > pd_duration):
    print(f'PANDAS IS {int(pd_duration//cudf_duration)} TIMES FASTER THAN CUDF IN COPYING DATAFRAME!')
elif(cudf_duration < pd_duration):
    print(f'CUDF IS {int(pd_duration//cudf_duration)} TIMES FASTER THAN PANDAS IN COPYING DATAFRAME!')

# [Info about cudf and more.](https://docs.rapids.ai/api)