## Getting Started

In [15]:
import polars as pl
import datetime as dt

In [16]:
df = pl.DataFrame(
    {
        "name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
        "birthdate": [
            dt.date(1997, 1, 10),
            dt.date(1985, 2, 15),
            dt.date(1983, 3, 22),
            dt.date(1981, 4, 30),
        ],
        "weight": [57.9, 72.5, 53.6, 83.1],  # (kg)
        "height": [1.56, 1.77, 1.65, 1.75],  # (m)
    }
)

(df)

name,birthdate,weight,height
str,date,f64,f64
"""Alice Archer""",1997-01-10,57.9,1.56
"""Ben Brown""",1985-02-15,72.5,1.77
"""Chloe Cooper""",1983-03-22,53.6,1.65
"""Daniel Donovan""",1981-04-30,83.1,1.75


In [17]:
df.write_csv('output.csv')

In [3]:
df = pl.read_csv('output.csv', try_parse_dates=True)

In [4]:
(
    df
    .select(
        pl.col("name"),
        pl.col("birthdate").dt.year().alias("birth_year"),
        (pl.col("weight") / (pl.col("height") ** 2)).alias("bmi"),
    )
)

name,birth_year,bmi
str,i32,f64
"""Alice Archer""",1997,23.791913
"""Ben Brown""",1985,23.141498
"""Chloe Cooper""",1983,19.687787
"""Daniel Donovan""",1981,27.134694


In [5]:
(
    df
    .select(
        pl.col("name"),
        (pl.col("weight", "height") * 0.95).round(2).name.suffix("-5%"),
    )
)

name,weight-5%,height-5%
str,f64,f64
"""Alice Archer""",55.01,1.48
"""Ben Brown""",68.88,1.68
"""Chloe Cooper""",50.92,1.57
"""Daniel Donovan""",78.94,1.66


In [6]:
(
    df
    .with_columns(
        birth_year=pl.col("birthdate").dt.year(),
        bmi=pl.col("weight") / (pl.col("height") ** 2),
    )
)

name,birthdate,weight,height,birth_year,bmi
str,date,f64,f64,i32,f64
"""Alice Archer""",1997-01-10,57.9,1.56,1997,23.791913
"""Ben Brown""",1985-02-15,72.5,1.77,1985,23.141498
"""Chloe Cooper""",1983-03-22,53.6,1.65,1983,19.687787
"""Daniel Donovan""",1981-04-30,83.1,1.75,1981,27.134694


In [7]:
(
    df
    .filter(pl.col("birthdate").dt.year() < 1990)
)

name,birthdate,weight,height
str,date,f64,f64
"""Ben Brown""",1985-02-15,72.5,1.77
"""Chloe Cooper""",1983-03-22,53.6,1.65
"""Daniel Donovan""",1981-04-30,83.1,1.75


In [9]:
(
    df
    .filter(
        pl.col("birthdate").is_between(pl.date(1982, 12, 31), pl.date(1996, 1, 1)),
        pl.col("height") > 1.7
    )
)

name,birthdate,weight,height
str,date,f64,f64
"""Ben Brown""",1985-02-15,72.5,1.77


In [18]:
(
    df
    .group_by(
        (pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
        maintain_order=True,
    ).len()
)

decade,len
i32,u32
1990,1
1980,3


In [19]:
(
    df
    .group_by(
        (pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
        maintain_order=True,
    ).agg(
        pl.len().alias("sample_size"),
        pl.col("weight").mean().round(2).alias("avg_weight"),
        pl.col("height").max().alias("tallest"),
    )
)

decade,sample_size,avg_weight,tallest
i32,u32,f64,f64
1990,1,57.9,1.56
1980,3,69.73,1.77


More Complex Queries