# Case function
By the end of this lecture you will be able to:
- `single` column `case` function
- `two` columns `case` with `AND` and `OR`

In [26]:
import polars as pl

In [27]:
csv_file = "../Files/Sample_Superstore.csv"

In [28]:
df = pl.read_csv(csv_file)

In [29]:
df.head(3)

Row_ID,Order_ID,Order_Date,Ship_Date,Ship_Mode,Customer_ID,Customer_Name,Segment,Country,City,State,Postal_Code,Region,Product_ID,Category,Sub_Category,Product_Name,Sales,Quantity,Discount,Profit
i64,str,str,str,str,str,str,str,str,str,str,i64,str,str,str,str,str,f64,i64,f64,f64
1,,,"""11-11-2016""","""Second Class""","""CG-12520""","""Claire Gute""","""Consumer""","""United States""","""Henderson""","""Kentucky""",42420,"""South""","""FUR-BO-10001798""","""Furniture""","""Bookcases""","""Bush Somerset Collection Bookc…",261.96,2,0.0,41.9136
2,"""CA-2016-152156""","""08-11-2016""","""11-11-2016""","""Second Class""","""CG-12520""","""Claire Gute""","""Consumer""","""United States""","""Henderson""","""Kentucky""",42420,"""South""","""FUR-CH-10000454""","""Furniture""","""Chairs""","""Hon Deluxe Fabric Upholstered …",731.94,3,0.0,219.582
3,"""CA-2016-138688""","""12-06-2016""",,,"""DV-13045""","""Darrin Van Huff""","""Corporate""",,"""Los Angeles""","""California""",90036,"""West""","""OFF-LA-10000240""","""Office Supplies""","""Labels""","""Self-Adhesive Address Labels f…",14.62,2,0.0,6.8714


### Single column case function
This version calculates total profit per region and assigns a simple category.

In [30]:
(
    df.group_by("Region")
    .agg(pl.sum("Profit").alias("total_profit"))
    .with_columns(
        pl.when(pl.col("total_profit") > 100000)
        .then(pl.lit("Best"))
        .when(pl.col("total_profit") > 75000)
        .then(pl.lit("Better"))
        .when(pl.col("total_profit") > 45000)
        .then(pl.lit("Good"))
        .otherwise(pl.lit("Not_Good"))
        .alias("Profit_category")
    )
)

Region,total_profit,Profit_category
str,f64,str
"""South""",46749.4303,"""Good"""
"""Central""",39706.3625,"""Not_Good"""
"""East""",91522.78,"""Better"""
"""West""",108418.4489,"""Best"""


### Two columns `Case` function with `AND`
Aggregates both profit and discount and applies `AND`

In [32]:
(
    df.group_by("Region")
    .agg(
        pl.sum("Profit").alias("total_profit"),
        pl.sum("Discount").alias("total_discount")
    )
    .with_columns(
        pl.when((pl.col("total_profit") > 100000) & (pl.col("total_discount") > 500))
        .then(pl.lit("Best"))
        .when((pl.col("total_profit") > 75000) & (pl.col("total_discount") > 250))
        .then(pl.lit("Better"))
        .when((pl.col("total_profit") > 45000) & (pl.col("total_discount") > 100))
        .then(pl.lit("Good"))
        .otherwise(pl.lit("Not_Good"))
        .alias("performance_category")
    )
)

Region,total_profit,total_discount,performance_category
str,f64,f64,str
"""Central""",39706.3625,558.34,"""Not_Good"""
"""South""",46749.4303,238.55,"""Good"""
"""East""",91522.78,414.0,"""Better"""
"""West""",108418.4489,350.2,"""Better"""


### Two columns case with OR
Aggregates both profit and discount and applies `OR`

In [33]:
(
    df.group_by("Region")
    .agg(
        pl.sum("Profit").alias("total_profit"),
        pl.sum("Discount").alias("total_discount")
    )
    .with_columns(
        pl.when((pl.col("total_profit") > 100000) | (pl.col("total_discount") > 500))
        .then(pl.lit("Best"))
        .when((pl.col("total_profit") > 75000) | (pl.col("total_discount") > 250))
        .then(pl.lit("Better"))
        .when((pl.col("total_profit") > 45000 ) | (pl.col("total_discount") > 100))
        .then(pl.lit("Good"))
        .otherwise(pl.lit("Not_Good"))
        .alias("Performance_category")
    )
)

Region,total_profit,total_discount,Performance_category
str,f64,f64,str
"""East""",91522.78,414.0,"""Better"""
"""South""",46749.4303,238.55,"""Good"""
"""West""",108418.4489,350.2,"""Best"""
"""Central""",39706.3625,558.34,"""Best"""
