# Filtering rows : Boolean Column

By the end of this lecture you will be able to:
- apply conditions with the `filter` method
- add a row number column with `Boolean`


In [14]:
import polars as pl

In [15]:
csv_file = '../../../Files/Sample_Superstore-1.csv'

In [16]:
df = pl.read_csv(csv_file)

In [18]:
df

Row_ID,Order_ID,Order_Date,Ship_Date,Ship_Mode,Customer_ID,Customer_Name,Segment,Country,City,State,Postal_Code,Region,Product_ID,Category,Sub_Category,Product_Name,Sales,Quantity,Discount,Profit
i64,str,str,str,str,str,str,str,str,str,str,i64,str,str,str,str,str,f64,i64,f64,f64
1,,,"""11-11-2016""","""Second Class""","""CG-12520""","""Claire Gute""","""Consumer""","""United States""","""Henderson""","""Kentucky""",42420,"""South""","""FUR-BO-10001798""","""Furniture""","""Bookcases""","""Bush Somerset Collection Bookc…",261.96,2,0.0,41.9136
2,"""CA-2016-152156""","""08-11-2016""","""11-11-2016""","""Second Class""","""CG-12520""","""Claire Gute""","""Consumer""","""United States""","""Henderson""","""Kentucky""",42420,"""South""","""FUR-CH-10000454""","""Furniture""","""Chairs""","""Hon Deluxe Fabric Upholstered …",731.94,3,0.0,219.582
3,"""CA-2016-138688""","""12-06-2016""",,,"""DV-13045""","""Darrin Van Huff""","""Corporate""",,"""Los Angeles""","""California""",90036,"""West""","""OFF-LA-10000240""","""Office Supplies""","""Labels""","""Self-Adhesive Address Labels f…",14.62,2,0.0,6.8714
4,,"""11-10-2015""",,"""Standard Class""","""SO-20335""","""Sean O'Donnell""","""Consumer""","""United States""","""Fort Lauderdale""","""Florida""",33311,"""South""","""FUR-TA-10000577""","""Furniture""","""Tables""","""Bretford CR4500 Series Slim Re…",957.5775,5,0.45,-383.031
5,"""US-2015-108966""","""11-10-2015""","""18-10-2015""","""Standard Class""","""SO-20335""","""Sean O'Donnell""","""Consumer""","""United States""",,"""Florida""",33311,"""South""","""OFF-ST-10000760""","""Office Supplies""","""Storage""","""Eldon Fold 'N Roll Cart System""",22.368,2,0.2,2.5164
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
9990,"""CA-2014-110422""","""21-01-2014""","""23-01-2014""","""Second Class""","""TB-21400""","""Tom Boeckenhauer""","""Consumer""","""United States""","""Miami""","""Florida""",33180,"""South""","""FUR-FU-10001889""","""Furniture""","""Furnishings""","""Ultra Door Pull Handle""",25.248,3,0.2,4.1028
9991,"""CA-2017-121258""","""26-02-2017""","""03-03-2017""","""Standard Class""","""DB-13060""","""Dave Brooks""","""Consumer""","""United States""","""Costa Mesa""","""California""",92627,"""West""","""FUR-FU-10000747""","""Furniture""","""Furnishings""","""Tenex B1-RE Series Chair Mats …",91.96,2,0.0,15.6332
9992,"""CA-2017-121258""","""26-02-2017""","""03-03-2017""","""Standard Class""","""DB-13060""","""Dave Brooks""","""Consumer""","""United States""","""Costa Mesa""","""California""",92627,"""West""","""TEC-PH-10003645""","""Technology""","""Phones""","""Aastra 57i VoIP phone""",258.576,2,0.2,19.3932
9993,"""CA-2017-121258""","""26-02-2017""","""03-03-2017""","""Standard Class""","""DB-13060""","""Dave Brooks""","""Consumer""","""United States""","""Costa Mesa""","""California""",92627,"""West""","""OFF-PA-10004041""","""Office Supplies""","""Paper""","""It's Hot Message Books with St…",29.6,4,0.0,13.32


### Filtering on a Boolean column
We can filter for `True` values on a Boolean column by passing the column as an expression to `filter` without a condition

In [19]:
(
    df
    
    .filter(
        pl.col("Is_Return") == True
    )
    .select("Customer_Name", "Profit", "Is_Return")
    .head()
)

ColumnNotFoundError: unable to find column "Is_Return"; valid columns: ["Row_ID", "Order_ID", "Order_Date", "Ship_Date", "Ship_Mode", "Customer_ID", "Customer_Name", "Segment", "Country", "City", "State", "Postal_Code", "Region", "Product_ID", "Category", "Sub_Category", "Product_Name", "Sales", "Quantity", "Discount", "Profit"]

Resolved plan until failure:

	---> FAILED HERE RESOLVING 'filter' <---
DF ["Row_ID", "Order_ID", "Order_Date", "Ship_Date"]; PROJECT */21 COLUMNS

We can negate a filter condition with `~`

In [31]:
(
    df
    .filter(
        ~pl.col("Is_Return")
    )
    .select("Customer_Name","Quantity", "Profit", "Is_Return")
    .head()
)

Customer_Name,Quantity,Profit,Is_Return
str,i64,f64,bool
"""Sean O'Donnell""",2,2.5164,False
"""Andrew Allen""",3,5.4432,False
"""Odella Nelson""",2,19.7714,False
"""Odella Nelson""",2,8.2062,False
"""Patrick O'Donnell""",4,8.4784,False


or with the `not_` expression

In [33]:
(
    df
    .filter(
        pl.col("Is_Return").not_()
    )
    .select("Customer_Name","Quantity", "Profit", "Is_Return")
    .head()
)

Customer_Name,Quantity,Profit,Is_Return
str,i64,f64,bool
"""Sean O'Donnell""",2,2.5164,False
"""Andrew Allen""",3,5.4432,False
"""Odella Nelson""",2,19.7714,False
"""Odella Nelson""",2,8.2062,False
"""Patrick O'Donnell""",4,8.4784,False


### Using `AND` and `OR` 

In [36]:
(
    df
    .filter(
        ((pl.col('Quantity') >= 2) | (pl.col('Profit') <= 1000)) & (pl.col('Is_Return') == False)
    )
    .select("Customer_Name","Quantity", "Profit", "Is_Return")
    .head(2)
)

Customer_Name,Quantity,Profit,Is_Return
str,i64,f64,bool
"""Sean O'Donnell""",2,2.5164,False
"""Andrew Allen""",3,5.4432,False
