# Combining Expressions

In [1]:
import polars as pl
pl.__version__  # The book is built with Polars version 1.20.0

'1.20.0'

In [2]:
fruit = pl.read_csv("data/fruit.csv")
fruit.filter(pl.col("is_round") & (pl.col("weight") > 1000))

name,weight,color,is_round,origin
str,i64,str,bool,str
"""Cantaloupe""",2500,"""orange""",True,"""Africa"""
"""Watermelon""",5000,"""green""",True,"""Africa"""


## Inline Operators Versus Methods

In [3]:
(
    pl.DataFrame({"i": [6.0, 0, 2, 2.5], "j": [7.0, 1, 2, 3]}).with_columns(
        (pl.col("i") * pl.col("j")).alias("*"),
        pl.col("i").mul(pl.col("j")).alias("Expr.mul()"),
    )
)

i,j,*,Expr.mul()
f64,f64,f64,f64
6.0,7.0,42.0,42.0
0.0,1.0,0.0,0.0
2.0,2.0,4.0,4.0
2.5,3.0,7.5,7.5


## Arithmetic Operations

In [4]:
fruit.select(pl.col("name"), (pl.col("weight") / 1000))

name,weight
str,f64
"""Avocado""",0.2
"""Banana""",0.12
"""Blueberry""",0.001
"""Cantaloupe""",2.5
"""Cranberry""",0.002
"""Elderberry""",0.001
"""Orange""",0.13
"""Papaya""",1.0
"""Peach""",0.15
"""Watermelon""",5.0


In [5]:
pl.Config(float_precision=2, tbl_cell_numeric_alignment="RIGHT")

(
    pl.DataFrame({"i": [0.0, 2, 2, -2, -2], "j": [1, 2, 3, 4, -5]}).with_columns(
        (pl.col("i") + pl.col("j")).alias("i + j"),
        (pl.col("i") - pl.col("j")).alias("i - j"),
        (pl.col("i") * pl.col("j")).alias("i * j"),
        (pl.col("i") / pl.col("j")).alias("i / j"),
        (pl.col("i") // pl.col("j")).alias("i // j"),
        (pl.col("i") ** pl.col("j")).alias("i ** j"),
        (pl.col("j") % 2).alias("j % 2"),
        pl.col("i").dot(pl.col("j")).alias("i ⋅ j"),
    )
)

i,j,i + j,i - j,i * j,i / j,i // j,i ** j,j % 2,i ⋅ j
f64,i64,f64,f64,f64,f64,f64,f64,i64,f64
0.0,1,1.0,-1.0,0.0,0.0,0.0,0.0,1,12.0
2.0,2,4.0,0.0,4.0,1.0,1.0,4.0,0,12.0
2.0,3,5.0,-1.0,6.0,0.67,0.0,8.0,1,12.0
-2.0,4,2.0,-6.0,-8.0,-0.5,-1.0,16.0,0,12.0
-2.0,-5,-7.0,3.0,10.0,0.4,0.0,-0.03,1,12.0


In [6]:
pl.Config.set_float_precision()
pl.Config.set_tbl_cell_numeric_alignment(None)

polars.config.Config

## Comparison Operations

In [7]:
pl.select(pl.lit("a") > pl.lit("b"))

literal
bool
False


In [8]:
(
    fruit.select(
        pl.col("name"),
        pl.col("weight"),
    ).filter(pl.col("weight") >= 1000)
)

name,weight
str,i64
"""Cantaloupe""",2500
"""Papaya""",1000
"""Watermelon""",5000


In [9]:
x = 4
3 < x < 5

True

In [10]:
# This raises a TypeError:
# pl.select(pl.lit(3) < pl.lit(x) < pl.lit(5))

In [11]:
pl.select((pl.lit(3) < pl.lit(x)) & (pl.lit(x) < pl.lit(5))).item()

True

In [12]:
pl.select(pl.lit(x).is_between(3, 5)).item()

True

In [13]:
(
    pl.DataFrame(
        {"a": [-273.15, 0, 42, 100], "b": [1.4142, 2.7183, 42, 3.1415]}
    ).with_columns(
        (pl.col("a") == pl.col("b")).alias("a == b"),
        (pl.col("a") <= pl.col("b")).alias("a <= b"),
        (pl.all() > 0).name.suffix(" > 0"),
        ((pl.col("b") - pl.lit(2).sqrt()).abs() < 1e-3).alias("b ≈ √2"),
        ((1 < pl.col("b")) & (pl.col("b") < 3)).alias("1 < b < 3"),
    )
)

a,b,a == b,a <= b,a > 0,b > 0,b ≈ √2,1 < b < 3
f64,f64,bool,bool,bool,bool,bool,bool
-273.15,1.4142,False,True,False,True,True,True
0.0,2.7183,False,True,False,True,False,True
42.0,42.0,True,True,True,True,False,False
100.0,3.1415,False,False,True,True,False,False


In [14]:
pl.select(
    bool_num=pl.lit(True) > 0,
    time_time=pl.time(23, 58) > pl.time(0, 0),
    datetime_date=pl.datetime(1969, 7, 21, 2, 56) < pl.date(1976, 7, 20),
    str_num=pl.lit("5") < pl.lit(3).cast(pl.String),
    datetime_time=pl.datetime(1999, 1, 1).dt.time() != pl.time(0, 0),
).transpose(
    include_header=True, header_name="comparison", column_names=["allowed"]
)

comparison,allowed
str,bool
"""bool_num""",True
"""time_time""",True
"""datetime_date""",True
"""str_num""",False
"""datetime_time""",False


## Boolean Algebra Operations

In [15]:
x = 7
p = pl.lit(3) < pl.lit(x)  # True
q = pl.lit(x) < pl.lit(5)  # False
pl.select(p & q).item()

False

In [16]:
(
    pl.DataFrame(
        {"p": [True, True, False, False], "q": [True, False, True, False]}
    ).with_columns(
        (pl.col("p") & pl.col("q")).alias("p & q"),
        (pl.col("p") | pl.col("q")).alias("p | q"),
        (~pl.col("p")).alias("~p"),
        (pl.col("p") ^ pl.col("q")).alias("p ^ q"),
        (~(pl.col("p") & pl.col("q"))).alias("p ↑ q"),
        ((pl.col("p").or_(pl.col("q"))).not_()).alias("p ↓ q"),
    )
)

p,q,p & q,p | q,~p,p ^ q,p ↑ q,p ↓ q
bool,bool,bool,bool,bool,bool,bool,bool
True,True,True,True,False,False,False,False
True,False,False,True,False,True,True,False
False,True,False,True,True,True,True,False
False,False,False,False,True,False,True,True


## Bitwise Operations

In [17]:
pl.select(pl.lit(10) | pl.lit(34)).item()

42

In [18]:
bits = pl.DataFrame(
    {"x": [1, 1, 0, 0, 7, 10], "y": [1, 0, 1, 0, 2, 34]},
    schema={"x": pl.UInt8, "y": pl.UInt8},
).with_columns(
    (pl.col("x") & pl.col("y")).alias("x & y"),
    (pl.col("x") | pl.col("y")).alias("x | y"),
    (~pl.col("x")).alias("~x"),
    (pl.col("x") ^ pl.col("y")).alias("x ^ y"),
)
bits

x,y,x & y,x | y,~x,x ^ y
u8,u8,u8,u8,u8,u8
1,1,1,1,254,0
1,0,0,1,254,1
0,1,0,1,255,1
0,0,0,0,255,0
7,2,2,7,248,5
10,34,2,42,245,40


In [19]:
bits.select(pl.all().map_elements("{0:08b}".format, return_dtype=pl.String))

x,y,x & y,x | y,~x,x ^ y
str,str,str,str,str,str
"""00000001""","""00000001""","""00000001""","""00000001""","""11111110""","""00000000"""
"""00000001""","""00000000""","""00000000""","""00000001""","""11111110""","""00000001"""
"""00000000""","""00000001""","""00000000""","""00000001""","""11111111""","""00000001"""
"""00000000""","""00000000""","""00000000""","""00000000""","""11111111""","""00000000"""
"""00000111""","""00000010""","""00000010""","""00000111""","""11111000""","""00000101"""
"""00001010""","""00100010""","""00000010""","""00101010""","""11110101""","""00101000"""


## Using Functions

In [20]:
scientists = pl.DataFrame(
    {
        "first_name": ["George", "Grace", "John", "Kurt", "Ada"],
        "last_name": ["Boole", "Hopper", "Tukey", "Gödel", "Lovelace"],
        "country": [
            "England",
            "United States",
            "United States",
            "Austria-Hungary",
            "England",
        ],
    }
)
scientists

first_name,last_name,country
str,str,str
"""George""","""Boole""","""England"""
"""Grace""","""Hopper""","""United States"""
"""John""","""Tukey""","""United States"""
"""Kurt""","""Gödel""","""Austria-Hungary"""
"""Ada""","""Lovelace""","""England"""


In [21]:
scientists.select(
    concat_list=pl.concat_list(pl.col("^*_name$")),
    struct=pl.struct(pl.all()),
)

concat_list,struct
list[str],struct[3]
"[""George"", ""Boole""]","{""George"",""Boole"",""England""}"
"[""Grace"", ""Hopper""]","{""Grace"",""Hopper"",""United States""}"
"[""John"", ""Tukey""]","{""John"",""Tukey"",""United States""}"
"[""Kurt"", ""Gödel""]","{""Kurt"",""Gödel"",""Austria-Hungary""}"
"[""Ada"", ""Lovelace""]","{""Ada"",""Lovelace"",""England""}"


In [22]:
scientists.select(
    concat_str=pl.concat_str(pl.all(), separator=" "),
    format=pl.format("{}, {} from {}", "last_name", "first_name", "country"),
)

concat_str,format
str,str
"""George Boole England""","""Boole, George from England"""
"""Grace Hopper United States""","""Hopper, Grace from United Stat…"
"""John Tukey United States""","""Tukey, John from United States"""
"""Kurt Gödel Austria-Hungary""","""Gödel, Kurt from Austria-Hunga…"
"""Ada Lovelace England""","""Lovelace, Ada from England"""


In [23]:
prefs = pl.DataFrame(
    {
        "id": [1, 7, 42, 101, 999],
        "has_pet": [True, False, True, False, True],
        "likes_travel": [False, False, False, False, True],
        "likes_movies": [True, False, True, False, True],
        "likes_books": [False, False, True, True, True],
    }
).with_columns(
    all=pl.all_horizontal(pl.exclude("id")),
    any=pl.any_horizontal(pl.exclude("id")),
)

prefs

id,has_pet,likes_travel,likes_movies,likes_books,all,any
i64,bool,bool,bool,bool,bool,bool
1,True,False,True,False,False,True
7,False,False,False,False,False,False
42,True,False,True,True,False,True
101,False,False,False,True,False,True
999,True,True,True,True,True,True


In [24]:
prefs.select(
    sum=pl.sum_horizontal(pl.all()),
    max=pl.max_horizontal(pl.all()),
    min=pl.min_horizontal(pl.all()),
)

sum,max,min
i64,i64,i64
4,1,0
7,7,0
46,42,0
103,101,0
1005,999,1


### When, Then, Otherwise

In [25]:
prefs.select(
    pl.col("id"),
    likes_what=pl.when(pl.all_horizontal(pl.col("^likes_.*$")))
    .then(pl.lit("Likes everything"))
    .when(pl.any_horizontal(pl.col("^likes_.*$")))
    .then(pl.lit("Likes something"))
    .otherwise(pl.lit("Likes nothing")),
)

id,likes_what
i64,str
1,"""Likes something"""
7,"""Likes nothing"""
42,"""Likes something"""
101,"""Likes something"""
999,"""Likes everything"""


In [26]:
orders = pl.DataFrame(
    {
        "order_amount": [500, 750, 1200, 800, 1100],
        "status": [
            "Approved",
            "Processing",
            "Processing",
            "Declined",
            "Processing",
        ],
    }
)
orders.with_columns(
    status=pl.when(pl.col("order_amount") > 1000).then(pl.lit("Flagged"))
)

order_amount,status
i64,str
500,
750,
1200,"""Flagged"""
800,
1100,"""Flagged"""


In [27]:
orders.with_columns(
    status=pl.when(pl.col("order_amount") > 1000)
    .then(pl.lit("Flagged"))
    .otherwise(pl.col("status"))
)

order_amount,status
i64,str
500,"""Approved"""
750,"""Processing"""
1200,"""Flagged"""
800,"""Declined"""
1100,"""Flagged"""
