In [1]:
from datetime import datetime

import numpy as np
import polars as pl
from pathlib import Path
from polars import col, lit
import pandas as pd
from pandas.io.common import get_handle

In [4]:
print(f'polars:{pl.__version__}')
print(f'pandas: {pd.__version__}')

polars:0.17.14
pandas: 2.0.1


In [11]:
# Eager execution
edf = pl.read_csv("data/star2002-full.csv", has_header=False)

In [12]:
edf.filter(col("column_1") == 1).select(["column_9"]).head()

column_9
i64
654
61
7
27
1


In [15]:
ldf = pl.scan_csv("data/star2002-full.csv", has_header=False)

In [16]:
ldf.filter(col("column_1") == 1).select(["column_9"]).head().collect()

column_9
i64
654
61
7
27
1


In [18]:
# slice by row number
edf[1]

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16
i64,i64,i64,f64,i64,i64,i64,i64,i64,i64,f64,f64,i64,f64,f64,f64
1,1613423,808,20011000.0,1613424,886,0,0,61,371,20011000.0,23.326479,2288071,-0.24733,0.455916,57.810596


In [19]:
edf[[1]]

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16
i64,i64,i64,f64,i64,i64,i64,i64,i64,i64,f64,f64,i64,f64,f64,f64
1,1613423,808,20011000.0,1613424,886,0,0,61,371,20011000.0,23.326479,2288071,-0.24733,0.455916,57.810596


In [20]:
edf[1, "column_4"]

20011015.222604

In [21]:
edf[1, [2,3]]  # index by (row, column) but returns data frame

column_3,column_4
i64,f64
808,20011000.0


In [22]:
# slice by row and column's name
edf[1:4, "column_4":]

column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16
f64,i64,i64,i64,i64,i64,i64,f64,f64,i64,f64,f64,f64
20011000.0,1613424,886,0,0,61,371,20011000.0,23.326479,2288071,-0.24733,0.455916,57.810596
20011000.0,1613424,638,0,0,7,121,20011000.0,2.4442992,2288071,-0.390961,0.589534,167.75714
20011000.0,1613424,4259,0,0,1024,1302,20011000.0,9.5218678,2288071,-0.290154,0.446027,8.6443624


In [27]:
ldf.filter(
	pl.col('column_6') == 886
).filter(
    pl.col('column_1') == 1
).head().collect()

column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16
i64,i64,i64,f64,i64,i64,i64,i64,i64,i64,f64,f64,i64,f64,f64,f64
1,1613423,808,20011000.0,1613424,886,0,0,61,371,20011000.0,23.326479,2288071,-0.24733,0.455916,57.810596
1,1647391,44306,20011000.0,1647392,886,0,0,105,354,20011000.0,17.984346,2318011,-0.141247,0.313153,56.570755
1,1645260,57473,20011000.0,1645261,886,0,0,20,172,20011000.0,22.391867,2318011,-0.318725,0.457583,171.50362
1,1721465,11764,20011000.0,1721466,886,0,0,63,326,20020000.0,3.2333431,2288073,-0.266839,0.268181,-100.37051
1,1721101,4166,20011000.0,1721102,886,0,0,54,328,20020000.0,8.2055445,2288073,-0.157688,0.434548,-82.486473
