In [2]:
import polars as pl

In [3]:
import datetime as dt

In [32]:
df= pl.DataFrame(
    {
        "name": ["alice","christopher","chole"],
        "birthdate": [
            dt.date(1997,1,15),
            dt.date(1996,10,12),
            dt.date(1997,12,10),
        ],
        "weight": [57.6,56.5,49.3],
        "height": [1.56,1.68,1.57],  # converted to meters

    }
)

print(df)

shape: (3, 4)
┌─────────────┬────────────┬────────┬────────┐
│ name        ┆ birthdate  ┆ weight ┆ height │
│ ---         ┆ ---        ┆ ---    ┆ ---    │
│ str         ┆ date       ┆ f64    ┆ f64    │
╞═════════════╪════════════╪════════╪════════╡
│ alice       ┆ 1997-01-15 ┆ 57.6   ┆ 1.56   │
│ christopher ┆ 1996-10-12 ┆ 56.5   ┆ 1.68   │
│ chole       ┆ 1997-12-10 ┆ 49.3   ┆ 1.57   │
└─────────────┴────────────┴────────┴────────┘


In [5]:
df.write_csv("./output.csv")

In [6]:
df_csv=pl.read_csv("./output.csv",try_parse_dates=True)
print(df_csv)

shape: (3, 4)
┌─────────────┬────────────┬────────┬────────┐
│ name        ┆ birthdate  ┆ weight ┆ height │
│ ---         ┆ ---        ┆ ---    ┆ ---    │
│ str         ┆ date       ┆ f64    ┆ i64    │
╞═════════════╪════════════╪════════╪════════╡
│ alice       ┆ 1997-01-15 ┆ 57.6   ┆ 156    │
│ christopher ┆ 1996-10-12 ┆ 56.5   ┆ 168    │
│ chole       ┆ 1997-12-10 ┆ 49.3   ┆ 157    │
└─────────────┴────────────┴────────┴────────┘


In [7]:
pl.col("weight")/(pl.col("height")**2)

In [8]:
result = df.select(
    pl.col("name"),
    pl.col("birthdate").dt.year().alias("birth_year"),
    (pl.col("weight")/(pl.col("height")**2)).alias("bmi"),
)
print(result)

shape: (3, 3)
┌─────────────┬────────────┬──────────┐
│ name        ┆ birth_year ┆ bmi      │
│ ---         ┆ ---        ┆ ---      │
│ str         ┆ i32        ┆ f64      │
╞═════════════╪════════════╪══════════╡
│ alice       ┆ 1997       ┆ 0.002367 │
│ christopher ┆ 1996       ┆ 0.002002 │
│ chole       ┆ 1997       ┆ 0.002    │
└─────────────┴────────────┴──────────┘


In [9]:
result=df.select(
    pl.col("name"),
    (pl.col("weight","height")*0.95).round(2).name.suffix("-5%"),
)

print(result)

shape: (3, 3)
┌─────────────┬───────────┬───────────┐
│ name        ┆ weight-5% ┆ height-5% │
│ ---         ┆ ---       ┆ ---       │
│ str         ┆ f64       ┆ f64       │
╞═════════════╪═══════════╪═══════════╡
│ alice       ┆ 54.72     ┆ 148.2     │
│ christopher ┆ 53.68     ┆ 159.6     │
│ chole       ┆ 46.83     ┆ 149.15    │
└─────────────┴───────────┴───────────┘


In [10]:
result = df.with_columns(
    birth_year=pl.col("birthdate").dt.year(),
    bmi= pl.col("weight")/(pl.col("height")**2),
)
print(result)

shape: (3, 6)
┌─────────────┬────────────┬────────┬────────┬────────────┬──────────┐
│ name        ┆ birthdate  ┆ weight ┆ height ┆ birth_year ┆ bmi      │
│ ---         ┆ ---        ┆ ---    ┆ ---    ┆ ---        ┆ ---      │
│ str         ┆ date       ┆ f64    ┆ i64    ┆ i32        ┆ f64      │
╞═════════════╪════════════╪════════╪════════╪════════════╪══════════╡
│ alice       ┆ 1997-01-15 ┆ 57.6   ┆ 156    ┆ 1997       ┆ 0.002367 │
│ christopher ┆ 1996-10-12 ┆ 56.5   ┆ 168    ┆ 1996       ┆ 0.002002 │
│ chole       ┆ 1997-12-10 ┆ 49.3   ┆ 157    ┆ 1997       ┆ 0.002    │
└─────────────┴────────────┴────────┴────────┴────────────┴──────────┘


In [11]:
result = df.filter(pl.col("birthdate").dt.year()>1996)
print(result)

shape: (2, 4)
┌───────┬────────────┬────────┬────────┐
│ name  ┆ birthdate  ┆ weight ┆ height │
│ ---   ┆ ---        ┆ ---    ┆ ---    │
│ str   ┆ date       ┆ f64    ┆ i64    │
╞═══════╪════════════╪════════╪════════╡
│ alice ┆ 1997-01-15 ┆ 57.6   ┆ 156    │
│ chole ┆ 1997-12-10 ┆ 49.3   ┆ 157    │
└───────┴────────────┴────────┴────────┘


In [17]:
result = df.filter(
    pl.col("birthdate").is_between(dt.date(1982, 12, 31), dt.date(1997, 1, 1)),
    pl.col("height") > 1.5
)
print(result)

shape: (1, 4)
┌─────────────┬────────────┬────────┬────────┐
│ name        ┆ birthdate  ┆ weight ┆ height │
│ ---         ┆ ---        ┆ ---    ┆ ---    │
│ str         ┆ date       ┆ f64    ┆ i64    │
╞═════════════╪════════════╪════════╪════════╡
│ christopher ┆ 1996-10-12 ┆ 56.5   ┆ 168    │
└─────────────┴────────────┴────────┴────────┘


In [13]:
result = df.group_by(
    (pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
    maintain_order=True,
).len()
print(result)

shape: (1, 2)
┌────────┬─────┐
│ decade ┆ len │
│ ---    ┆ --- │
│ i32    ┆ u32 │
╞════════╪═════╡
│ 1990   ┆ 3   │
└────────┴─────┘


In [19]:
result = df.group_by(
    (pl.col("birthdate").dt.year()//10*10).alias("decade"),
    maintain_order=True,
).agg(
    pl.len().alias("sample_size"),
    pl.col("weight").mean().round(2).alias("avg_weight"),
    pl.col("height").max().alias("tallest"),
)
print(result)

shape: (1, 4)
┌────────┬─────────────┬────────────┬─────────┐
│ decade ┆ sample_size ┆ avg_weight ┆ tallest │
│ ---    ┆ ---         ┆ ---        ┆ ---     │
│ i32    ┆ u32         ┆ f64        ┆ i64     │
╞════════╪═════════════╪════════════╪═════════╡
│ 1990   ┆ 3           ┆ 54.47      ┆ 168     │
└────────┴─────────────┴────────────┴─────────┘


In [20]:
result=(
    df.with_columns(
        (pl.col("birthdate").dt.year()//100).alias("decade"),
        pl.col("name").str.split(by=" ").list.first(),
    )
    .select(
        pl.all().exclude("birthdate"),
    )
    .group_by(
        pl.col("decade"),
        maintain_order = True,
    )
    .agg(
        pl.col("name"),
        pl.col("weight","height").mean().round(2).name.prefix("avg_")
    )

)

print(result)

shape: (1, 4)
┌────────┬─────────────────────────────────┬────────────┬────────────┐
│ decade ┆ name                            ┆ avg_weight ┆ avg_height │
│ ---    ┆ ---                             ┆ ---        ┆ ---        │
│ i32    ┆ list[str]                       ┆ f64        ┆ f64        │
╞════════╪═════════════════════════════════╪════════════╪════════════╡
│ 19     ┆ ["alice", "christopher", "chol… ┆ 54.47      ┆ 160.33     │
└────────┴─────────────────────────────────┴────────────┴────────────┘


In [25]:
df2 = pl.DataFrame(
    {
        "name": ["Ben Brown", "Daniel Donovan", "alice", "chole"],
        "parent": [True, False, False, False],
        "siblings": [1, 2, 3, 4],
    }
)

In [26]:
print(df.join(df2, on = "name", how="left"))

shape: (3, 6)
┌─────────────┬────────────┬────────┬────────┬────────┬──────────┐
│ name        ┆ birthdate  ┆ weight ┆ height ┆ parent ┆ siblings │
│ ---         ┆ ---        ┆ ---    ┆ ---    ┆ ---    ┆ ---      │
│ str         ┆ date       ┆ f64    ┆ i64    ┆ bool   ┆ i64      │
╞═════════════╪════════════╪════════╪════════╪════════╪══════════╡
│ alice       ┆ 1997-01-15 ┆ 57.6   ┆ 156    ┆ false  ┆ 3        │
│ christopher ┆ 1996-10-12 ┆ 56.5   ┆ 168    ┆ null   ┆ null     │
│ chole       ┆ 1997-12-10 ┆ 49.3   ┆ 157    ┆ false  ┆ 4        │
└─────────────┴────────────┴────────┴────────┴────────┴──────────┘


In [27]:
df3 = pl.DataFrame(
    {
        "name": ["Ethan Edwards", "Fiona Foster", "Grace Gibson", "Henry Harris"],
        "birthdate": [
            dt.date(1977, 5, 10),
            dt.date(1975, 6, 23),
            dt.date(1973, 7, 22),
            dt.date(1971, 8, 3),
        ],
        "weight": [67.9, 72.5, 57.6, 93.1],  # (kg)
        "height": [1.76, 1.6, 1.66, 1.8],  # (m)
    }
)



In [35]:
print(pl.concat([df, df3], how="vertical"))

shape: (7, 4)
┌───────────────┬────────────┬────────┬────────┐
│ name          ┆ birthdate  ┆ weight ┆ height │
│ ---           ┆ ---        ┆ ---    ┆ ---    │
│ str           ┆ date       ┆ f64    ┆ f64    │
╞═══════════════╪════════════╪════════╪════════╡
│ alice         ┆ 1997-01-15 ┆ 57.6   ┆ 1.56   │
│ christopher   ┆ 1996-10-12 ┆ 56.5   ┆ 1.68   │
│ chole         ┆ 1997-12-10 ┆ 49.3   ┆ 1.57   │
│ Ethan Edwards ┆ 1977-05-10 ┆ 67.9   ┆ 1.76   │
│ Fiona Foster  ┆ 1975-06-23 ┆ 72.5   ┆ 1.6    │
│ Grace Gibson  ┆ 1973-07-22 ┆ 57.6   ┆ 1.66   │
│ Henry Harris  ┆ 1971-08-03 ┆ 93.1   ┆ 1.8    │
└───────────────┴────────────┴────────┴────────┘


In [31]:
print("df schema:", df.schema)
print("df3 schema:", df3.schema)

df schema: Schema([('name', String), ('birthdate', Date), ('weight', Float64), ('height', Int64)])
df3 schema: Schema([('name', String), ('birthdate', Date), ('weight', Float64), ('height', Float64)])


In [37]:
s =pl.Series("ints",[1,2,3,4,5])
print(s)

shape: (5,)
Series: 'ints' [i64]
[
	1
	2
	3
	4
	5
]


In [40]:
from datetime import date

df = pl.DataFrame(
    {
        "name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
        "birthdate": [
            date(1997, 1, 10),
            date(1985, 2, 15),
            date(1983, 3, 22),
            date(1981, 4, 30),
        ],
        "weight": [57.9, 72.5, 53.6, 83.1],  # (kg)
        "height": [1.56, 1.77, 1.65, 1.75],  # (m)
    }
)

print(df)

shape: (4, 4)
┌────────────────┬────────────┬────────┬────────┐
│ name           ┆ birthdate  ┆ weight ┆ height │
│ ---            ┆ ---        ┆ ---    ┆ ---    │
│ str            ┆ date       ┆ f64    ┆ f64    │
╞════════════════╪════════════╪════════╪════════╡
│ Alice Archer   ┆ 1997-01-10 ┆ 57.9   ┆ 1.56   │
│ Ben Brown      ┆ 1985-02-15 ┆ 72.5   ┆ 1.77   │
│ Chloe Cooper   ┆ 1983-03-22 ┆ 53.6   ┆ 1.65   │
│ Daniel Donovan ┆ 1981-04-30 ┆ 83.1   ┆ 1.75   │
└────────────────┴────────────┴────────┴────────┘
