In [1]:
from datetime import date

import polars as pl

In [2]:
df1 = pl.DataFrame(
    {"name": ["Tom", "Lisa", "John"], "has_pet": ["Y", "N", "Y"]}
)
print(df1)

shape: (3, 2)
┌──────┬─────────┐
│ name ┆ has_pet │
│ ---  ┆ ---     │
│ str  ┆ str     │
╞══════╪═════════╡
│ Tom  ┆ Y       │
│ Lisa ┆ N       │
│ John ┆ Y       │
└──────┴─────────┘


In [3]:
df2 = pl.DataFrame(
    {
        "name": ["Lisa", "John", "Vincent", "Mary"],
        "lucky_number": [25, 36, 7, 2],
    }
)
print(df2)

shape: (4, 2)
┌─────────┬──────────────┐
│ name    ┆ lucky_number │
│ ---     ┆ ---          │
│ str     ┆ i64          │
╞═════════╪══════════════╡
│ Lisa    ┆ 25           │
│ John    ┆ 36           │
│ Vincent ┆ 7            │
│ Mary    ┆ 2            │
└─────────┴──────────────┘


## equi joins

In [4]:
print(df1.join(df2, on="name"))
# equivalent to
# df1.join(df2, on="name", how="inner")

shape: (2, 3)
┌──────┬─────────┬──────────────┐
│ name ┆ has_pet ┆ lucky_number │
│ ---  ┆ ---     ┆ ---          │
│ str  ┆ str     ┆ i64          │
╞══════╪═════════╪══════════════╡
│ Lisa ┆ N       ┆ 25           │
│ John ┆ Y       ┆ 36           │
└──────┴─────────┴──────────────┘


In [5]:
print(df1.join(df2, on="name", how="inner"))

shape: (2, 3)
┌──────┬─────────┬──────────────┐
│ name ┆ has_pet ┆ lucky_number │
│ ---  ┆ ---     ┆ ---          │
│ str  ┆ str     ┆ i64          │
╞══════╪═════════╪══════════════╡
│ Lisa ┆ N       ┆ 25           │
│ John ┆ Y       ┆ 36           │
└──────┴─────────┴──────────────┘


In [6]:
print(df1.join(df2, on="name", how="left"))

shape: (3, 3)
┌──────┬─────────┬──────────────┐
│ name ┆ has_pet ┆ lucky_number │
│ ---  ┆ ---     ┆ ---          │
│ str  ┆ str     ┆ i64          │
╞══════╪═════════╪══════════════╡
│ Tom  ┆ Y       ┆ null         │
│ Lisa ┆ N       ┆ 25           │
│ John ┆ Y       ┆ 36           │
└──────┴─────────┴──────────────┘


In [7]:
# print(df1.join(df2, on="name", how="right"))

In [8]:
# print(df1.join(df2, on="name", how="full"))

In [9]:
print(df1.join(df2, on="name", how="semi"))

shape: (2, 2)
┌──────┬─────────┐
│ name ┆ has_pet │
│ ---  ┆ ---     │
│ str  ┆ str     │
╞══════╪═════════╡
│ Lisa ┆ N       │
│ John ┆ Y       │
└──────┴─────────┘


In [10]:
# print(df1.join(df2, on="name", how="anti"))

In [11]:
df3 = df2.select(
    pl.col("name").str.to_lowercase().alias("uname"), "lucky_number"
)
print(df3)

shape: (4, 2)
┌─────────┬──────────────┐
│ uname   ┆ lucky_number │
│ ---     ┆ ---          │
│ str     ┆ i64          │
╞═════════╪══════════════╡
│ lisa    ┆ 25           │
│ john    ┆ 36           │
│ vincent ┆ 7            │
│ mary    ┆ 2            │
└─────────┴──────────────┘


In [12]:
# print(df1.join(df3, left_on="name", right_on="uname"))

In [13]:
print(
    df1.join(
        df3, left_on="name", right_on=pl.col("uname").str.to_titlecase()
    )
)

shape: (2, 4)
┌──────┬─────────┬───────┬──────────────┐
│ name ┆ has_pet ┆ uname ┆ lucky_number │
│ ---  ┆ ---     ┆ ---   ┆ ---          │
│ str  ┆ str     ┆ str   ┆ i64          │
╞══════╪═════════╪═══════╪══════════════╡
│ Lisa ┆ N       ┆ lisa  ┆ 25           │
│ John ┆ Y       ┆ john  ┆ 36           │
└──────┴─────────┴───────┴──────────────┘


In [14]:
print(
    df1.join(
        df3, left_on=pl.col("name").str.to_lowercase(), right_on="uname"
    )
)

shape: (2, 4)
┌──────┬─────────┬───────┬──────────────┐
│ name ┆ has_pet ┆ uname ┆ lucky_number │
│ ---  ┆ ---     ┆ ---   ┆ ---          │
│ str  ┆ str     ┆ str   ┆ i64          │
╞══════╪═════════╪═══════╪══════════════╡
│ Lisa ┆ N       ┆ lisa  ┆ 25           │
│ John ┆ Y       ┆ john  ┆ 36           │
└──────┴─────────┴───────┴──────────────┘


### cartesian product

In [15]:
# print(df1.join(df2, how="cross"))

## non-equi joins

In [16]:
df4 = pl.DataFrame({"name": ["Caroline", "Bob"], "lucky_number2": [3, 40]})
print(df4)

shape: (2, 2)
┌──────────┬───────────────┐
│ name     ┆ lucky_number2 │
│ ---      ┆ ---           │
│ str      ┆ i64           │
╞══════════╪═══════════════╡
│ Caroline ┆ 3             │
│ Bob      ┆ 40            │
└──────────┴───────────────┘


In [17]:
print(df2)

shape: (4, 2)
┌─────────┬──────────────┐
│ name    ┆ lucky_number │
│ ---     ┆ ---          │
│ str     ┆ i64          │
╞═════════╪══════════════╡
│ Lisa    ┆ 25           │
│ John    ┆ 36           │
│ Vincent ┆ 7            │
│ Mary    ┆ 2            │
└─────────┴──────────────┘


In [18]:
print(
    df2.join_where(df4, pl.col("lucky_number").lt(pl.col("lucky_number2")))
)

shape: (5, 4)
┌─────────┬──────────────┬────────────┬───────────────┐
│ name    ┆ lucky_number ┆ name_right ┆ lucky_number2 │
│ ---     ┆ ---          ┆ ---        ┆ ---           │
│ str     ┆ i64          ┆ str        ┆ i64           │
╞═════════╪══════════════╪════════════╪═══════════════╡
│ Mary    ┆ 2            ┆ Caroline   ┆ 3             │
│ Mary    ┆ 2            ┆ Bob        ┆ 40            │
│ Vincent ┆ 7            ┆ Bob        ┆ 40            │
│ Lisa    ┆ 25           ┆ Bob        ┆ 40            │
│ John    ┆ 36           ┆ Bob        ┆ 40            │
└─────────┴──────────────┴────────────┴───────────────┘


In [19]:
print(
    df4.join_where(df2, pl.col("lucky_number").lt(pl.col("lucky_number2")))
)

shape: (5, 4)
┌──────────┬───────────────┬────────────┬──────────────┐
│ name     ┆ lucky_number2 ┆ name_right ┆ lucky_number │
│ ---      ┆ ---           ┆ ---        ┆ ---          │
│ str      ┆ i64           ┆ str        ┆ i64          │
╞══════════╪═══════════════╪════════════╪══════════════╡
│ Bob      ┆ 40            ┆ John       ┆ 36           │
│ Bob      ┆ 40            ┆ Lisa       ┆ 25           │
│ Bob      ┆ 40            ┆ Vincent    ┆ 7            │
│ Bob      ┆ 40            ┆ Mary       ┆ 2            │
│ Caroline ┆ 3             ┆ Mary       ┆ 2            │
└──────────┴───────────────┴────────────┴──────────────┘


## asof join

In [20]:
package_a = pl.DataFrame(
    {
        "date": [
            date(2025, 5, 14),
            date(2025, 6, 16),
            date(2025, 7, 29),
            date(2025, 8, 16),
            date(2025, 9, 2),
        ],
        "version": ["v1.1.2", "v1.2.1", "v1.2.2", "v1.3.1", "v1.4.5"],
    }
)
print(package_a)

shape: (5, 2)
┌────────────┬─────────┐
│ date       ┆ version │
│ ---        ┆ ---     │
│ date       ┆ str     │
╞════════════╪═════════╡
│ 2025-05-14 ┆ v1.1.2  │
│ 2025-06-16 ┆ v1.2.1  │
│ 2025-07-29 ┆ v1.2.2  │
│ 2025-08-16 ┆ v1.3.1  │
│ 2025-09-02 ┆ v1.4.5  │
└────────────┴─────────┘


In [21]:
package_b = pl.DataFrame(
    {
        "date": [date(2025, 6, 1), date(2025, 7, 5), date(2025, 8, 19)],
        "version": ["v2.0.1", "v2.0.2", "v2.0.3"],
    }
)
print(package_b)

shape: (3, 2)
┌────────────┬─────────┐
│ date       ┆ version │
│ ---        ┆ ---     │
│ date       ┆ str     │
╞════════════╪═════════╡
│ 2025-06-01 ┆ v2.0.1  │
│ 2025-07-05 ┆ v2.0.2  │
│ 2025-08-19 ┆ v2.0.3  │
└────────────┴─────────┘


In [22]:
print(package_b.join_asof(package_a, on="date", strategy="backward"))

shape: (3, 3)
┌────────────┬─────────┬───────────────┐
│ date       ┆ version ┆ version_right │
│ ---        ┆ ---     ┆ ---           │
│ date       ┆ str     ┆ str           │
╞════════════╪═════════╪═══════════════╡
│ 2025-06-01 ┆ v2.0.1  ┆ v1.1.2        │
│ 2025-07-05 ┆ v2.0.2  ┆ v1.2.1        │
│ 2025-08-19 ┆ v2.0.3  ┆ v1.3.1        │
└────────────┴─────────┴───────────────┘


In [23]:
print(
    package_b.join_asof(
        package_a, on="date", strategy="backward", coalesce=False
    )
)

shape: (3, 4)
┌────────────┬─────────┬────────────┬───────────────┐
│ date       ┆ version ┆ date_right ┆ version_right │
│ ---        ┆ ---     ┆ ---        ┆ ---           │
│ date       ┆ str     ┆ date       ┆ str           │
╞════════════╪═════════╪════════════╪═══════════════╡
│ 2025-06-01 ┆ v2.0.1  ┆ 2025-05-14 ┆ v1.1.2        │
│ 2025-07-05 ┆ v2.0.2  ┆ 2025-06-16 ┆ v1.2.1        │
│ 2025-08-19 ┆ v2.0.3  ┆ 2025-08-16 ┆ v1.3.1        │
└────────────┴─────────┴────────────┴───────────────┘
