In [1]:
import polars as pl

In [2]:
from datetime import date

df = pl.DataFrame(
    {
        "name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
        "birthdate": [
            date(1997, 1, 10),
            date(1985, 2, 15),
            date(1983, 3, 22),
            date(1981, 4, 30),
        ],
        "weight": [57.9, 72.5, 53.6, 83.1],  # (kg)
        "height": [1.56, 1.77, 1.65, 1.75],  # (m)
    }
)

## expressions

In [4]:
bmi_expr = pl.col("weight") / (pl.col("height") ** 2)
print(bmi_expr)

[(col("weight")) / (col("height").pow([dyn int: 2]))]


expressions are lazy evaluated and need context to work 

Polars have 4 major context
- select
- with-columns
- group-by
- filter

## context

### select

In [11]:
result = df.select(
   
    bmi=bmi_expr,
    avg_bmi=bmi_expr.mean(),
    ideal_max_bmi=25,
    weight = pl.col("weight")
)
print(result)

shape: (4, 4)
┌───────────┬───────────┬───────────────┬────────┐
│ bmi       ┆ avg_bmi   ┆ ideal_max_bmi ┆ weight │
│ ---       ┆ ---       ┆ ---           ┆ ---    │
│ f64       ┆ f64       ┆ i32           ┆ f64    │
╞═══════════╪═══════════╪═══════════════╪════════╡
│ 23.791913 ┆ 23.438973 ┆ 25            ┆ 57.9   │
│ 23.141498 ┆ 23.438973 ┆ 25            ┆ 72.5   │
│ 19.687787 ┆ 23.438973 ┆ 25            ┆ 53.6   │
│ 27.134694 ┆ 23.438973 ┆ 25            ┆ 83.1   │
└───────────┴───────────┴───────────────┴────────┘


### with_columns

In [12]:
result = df.with_columns(
    bmi=bmi_expr,
    avg_bmi=bmi_expr.mean(),
    ideal_max_bmi=25,
)
result

name,birthdate,weight,height,bmi,avg_bmi,ideal_max_bmi
str,date,f64,f64,f64,f64,i32
"""Alice Archer""",1997-01-10,57.9,1.56,23.791913,23.438973,25
"""Ben Brown""",1985-02-15,72.5,1.77,23.141498,23.438973,25
"""Chloe Cooper""",1983-03-22,53.6,1.65,19.687787,23.438973,25
"""Daniel Donovan""",1981-04-30,83.1,1.75,27.134694,23.438973,25


### filter

In [13]:
result = df.filter(
    pl.col("birthdate").is_between(date(1982, 12, 31), date(1996, 1, 1)),
    pl.col("height") > 1.7,
)
result

name,birthdate,weight,height
str,date,f64,f64
"""Ben Brown""",1985-02-15,72.5,1.77


### group_by

In [14]:
result = df.group_by(
    (pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
).agg(pl.col("name"))
print(result)

shape: (2, 2)
┌────────┬─────────────────────────────────┐
│ decade ┆ name                            │
│ ---    ┆ ---                             │
│ i32    ┆ list[str]                       │
╞════════╪═════════════════════════════════╡
│ 1980   ┆ ["Ben Brown", "Chloe Cooper", … │
│ 1990   ┆ ["Alice Archer"]                │
└────────┴─────────────────────────────────┘
