In [1]:
import polars as pl

df = pl.DataFrame(
    {
        "a": [1, 2, 3],
        "b": [
            datetime(2025, 1, 1),
            datetime(2025, 1, 2),
            datetime(2025, 1, 3),
        ],
        "c": [4.0, 5.0, 6.0],
        "d": ["a", "b", "c"],
    }
)

print(df)

shape: (3, 4)
┌─────┬─────────────────────┬─────┬─────┐
│ a   ┆ b                   ┆ c   ┆ d   │
│ --- ┆ ---                 ┆ --- ┆ --- │
│ i64 ┆ datetime[μs]        ┆ f64 ┆ str │
╞═════╪═════════════════════╪═════╪═════╡
│ 1   ┆ 2025-01-01 00:00:00 ┆ 4.0 ┆ a   │
│ 2   ┆ 2025-01-02 00:00:00 ┆ 5.0 ┆ b   │
│ 3   ┆ 2025-01-03 00:00:00 ┆ 6.0 ┆ c   │
└─────┴─────────────────────┴─────┴─────┘


In [17]:
# help(df)
for t in df.dtypes:
    print(t)

Int64
Datetime(time_unit='us', time_zone=None)
Float64
String


In [20]:
df.select(pl.col("a", "b"))

a,b
i64,datetime[μs]
1,2025-01-01 00:00:00
2,2025-01-02 00:00:00
3,2025-01-03 00:00:00


In [25]:
df.filter(
    pl.col("b").is_between(datetime(2025, 1, 1), datetime(2025, 1, 2)),
)

a,b,c,d
i64,datetime[μs],f64,str
1,2025-01-01 00:00:00,4.0,"""a"""
2,2025-01-02 00:00:00,5.0,"""b"""


In [27]:
df.with_columns(pl.col("c").sum().alias("e"), (pl.col("c") + 42).alias("b+42"))

a,b,c,d,e,b+42
i64,datetime[μs],f64,str,f64,f64
1,2025-01-01 00:00:00,4.0,"""a""",15.0,46.0
2,2025-01-02 00:00:00,5.0,"""b""",15.0,47.0
3,2025-01-03 00:00:00,6.0,"""c""",15.0,48.0


In [28]:
df2 = pl.DataFrame({ "x": range(8), "y": ["A", "A", "A", "B", "B", "C", "X", "X"]})

In [31]:
df2.group_by("y", maintain_order=True).len()

y,len
str,u32
"""A""",3
"""B""",2
"""C""",1
"""X""",2


In [32]:
df2.group_by("y", maintain_order=True).agg(
    pl.col("*").count().alias("count col"),
    pl.col("*").sum().alias("sum col"),
)

y,count col,sum col
str,u32,i64
"""A""",3,3
"""B""",2,7
"""C""",1,5
"""X""",2,13


In [33]:
df2

x,y
i64,str
0,"""A"""
1,"""A"""
2,"""A"""
3,"""B"""
4,"""B"""
5,"""C"""
6,"""X"""
7,"""X"""


In [39]:
df_x = df.with_columns(
    (pl.col("a") * pl.col("c")).alias("a * c")
)
df_x

a,b,c,d,a * c
i64,datetime[μs],f64,str,f64
1,2025-01-01 00:00:00,4.0,"""a""",4.0
2,2025-01-02 00:00:00,5.0,"""b""",10.0
3,2025-01-03 00:00:00,6.0,"""c""",18.0


In [40]:
import numpy as np

In [41]:
df = pl.DataFrame(
    {
        "a": range(8),
        "b": np.random.rand(8),
        "d": [1.0, 2.0, float("nan"), float("nan"), 0.0, -5.0, -42.0, None],
    }
)

df2 = pl.DataFrame(
    {
        "x": range(8),
        "y": ["A", "A", "A", "B", "B", "C", "X", "X"],
    }
)

In [42]:
joined = df.join(df2, left_on="a", right_on="x")
joined

a,b,d,y
i64,f64,f64,str
0,0.709137,1.0,"""A"""
1,0.973489,2.0,"""A"""
2,0.524058,,"""A"""
3,0.328025,,"""B"""
4,0.807074,0.0,"""B"""
5,0.210212,-5.0,"""C"""
6,0.389202,-42.0,"""X"""
7,0.906147,,"""X"""
