In [1]:
# setup
from IPython.display import display, HTML
from pathlib import Path

import numpy as np
import pandas as pd
import polars as pl

data_path = Path("data")

In [9]:
from log_timer_results import log_to_csv

data_csv = Path("data", "python_dev_universe.csv")
data_parquet = Path("data", "python_dev_universe.parquet")

@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def pandas_read_csv_default(datafile=data_csv):
    df = pd.read_csv(datafile)
    print(df.head(1))
    return True

@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def pandas_read_csv_engine_c(datafile=data_csv):
    df = pd.read_csv(datafile, engine="c")
    print(df.head(1))
    return True

@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def pandas_read_csv_engine_python(datafile=data_csv):
    df = pd.read_csv(datafile, engine="python")
    print(df.head(1))
    return True

@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def pandas_read_csv_default_dtype_numpy(datafile=data_csv, dtype_backend="numpy_nullable"):
    df = pd.read_csv(datafile)
    print(df.head(1))
    return True

@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def pandas_read_csv_engine__c_dtype_numpy(datafile=data_csv):
    df = pd.read_csv(datafile, engine="c", dtype_backend="numpy_nullable")
    print(df.head(1))
    return True

@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def pandas_read_csv_engine_python__dtype_numpy(datafile=data_csv):
    df = pd.read_csv(datafile, engine="python", dtype_backend="numpy_nullable")
    print(df.head(1))
    return True

    
@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def pandas_read_csv_engine_pyarrow(datafile=data_csv, engine="pyarrow"):
    df = pd.read_csv(datafile)
    print(df.head(1))
    return True

@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def pandas_read_csv_engine_pyarrow_dtype_numpy(datafile=data_csv, engine="pyarrow", dtype_backend="numpy_nullable"):
    df = pd.read_csv(datafile)
    print(df.head(1))
    return True
    
@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def pandas_read_csv_engine_pyarrow_dtype_pyarrow(datafile=data_csv, engine="pyarrow", dtype_backend="pyarrow"):
    df = pd.read_csv(datafile)
    print(df.head(1))
    return True
    
@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def pandas_read_parquet(datafile=data_parquet):
    df = pd.read_parquet(datafile)
    print(df.head(1))
    return True


In [10]:
pandas_read_csv_default()
pandas_read_csv_engine_c()
pandas_read_csv_engine_python()
pandas_read_csv_default_dtype_numpy()
pandas_read_csv_engine__c_dtype_numpy()
pandas_read_csv_engine_python__dtype_numpy()
pandas_read_csv_engine_pyarrow()
pandas_read_csv_engine_pyarrow_dtype_numpy()
pandas_read_csv_engine_pyarrow_dtype_pyarrow()
pandas_read_parquet()

  customer_id  age  their_lucky_number     occupation psf_membership_status  \
0   A43321819   47                   0  Data Engineer          Contributing   

     education date_started_python  
0  High School          1924-07-20  
  customer_id  age  their_lucky_number     occupation psf_membership_status  \
0   A43321819   47                   0  Data Engineer          Contributing   

     education date_started_python  
0  High School          1924-07-20  
  customer_id  age  their_lucky_number     occupation psf_membership_status  \
0   A43321819   47                   0  Data Engineer          Contributing   

     education date_started_python  
0  High School          1924-07-20  
  customer_id  age  their_lucky_number     occupation psf_membership_status  \
0   A43321819   47                   0  Data Engineer          Contributing   

     education date_started_python  
0  High School          1924-07-20  
  customer_id  age  their_lucky_number     occupation psf_membership

True

In [11]:
@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def polars_read_csv(datafile=data_csv):
    df = pl.read_csv(source=datafile)
    print(df.head(1))
    return True

@log_to_csv(filename="timer_log_results.csv", buffer_size=10, flush_on_return=True)
def polars_read_parquet(datafile=data_parquet):
    df = pl.read_parquet(source=datafile)
    print(df.head(1))
    return True

In [13]:
polars_read_csv()
polars_read_parquet()

shape: (1, 7)
┌─────────────┬─────┬────────────────┬───────────────┬───────────────┬─────────────┬───────────────┐
│ customer_id ┆ age ┆ their_lucky_nu ┆ occupation    ┆ psf_membershi ┆ education   ┆ date_started_ │
│ ---         ┆ --- ┆ mber           ┆ ---           ┆ p_status      ┆ ---         ┆ python        │
│ str         ┆ i64 ┆ ---            ┆ str           ┆ ---           ┆ str         ┆ ---           │
│             ┆     ┆ i64            ┆               ┆ str           ┆             ┆ str           │
╞═════════════╪═════╪════════════════╪═══════════════╪═══════════════╪═════════════╪═══════════════╡
│ A43321819   ┆ 47  ┆ 0              ┆ Data Engineer ┆ Contributing  ┆ High School ┆ 1924-07-20    │
└─────────────┴─────┴────────────────┴───────────────┴───────────────┴─────────────┴───────────────┘
shape: (1, 7)
┌─────────────┬─────┬────────────────┬───────────────┬───────────────┬─────────────┬───────────────┐
│ customer_id ┆ age ┆ their_lucky_nu ┆ occupation    ┆ psf_memb

True