# Chapter 11: Filtering and Sorting Rows

In [None]:
import polars as pl
pl.__version__  # The book is built with Polars version 1.20.0

In [None]:
tools = pl.read_csv("data/tools.csv")
tools

## Filtering Rows

### Filtering Based on Expressions

In [None]:
tools.filter(pl.col("cordless") & (pl.col("brand") == "Makita"))  

In [None]:
tools.filter(pl.col("cordless"), pl.col("brand") == "Makita")

### Filtering Based on Column Names

In [None]:
tools.filter("cordless")

### Filtering Based on Constraints

In [None]:
tools.filter(cordless=True, brand="Makita")

## Sorting Rows

### Sorting Based on a Single Column

In [None]:
tools.sort("price")

### Sorting in Reverse

In [None]:
tools.sort("price", descending=True)

In [None]:
# This raises a TypeError:
# tools.sort("price", ascending=False)

### Sorting Based on Multiple Columns

In [None]:
tools.sort("brand", "price")

In [None]:
tools.sort("brand", "price", descending=[False, True])

### Sorting Based on Expressions

In [None]:
tools.sort(pl.col("rpm") / pl.col("price"))

### Sorting Nested Data Types

In [None]:
lists = pl.DataFrame({"lists": [[2, 2], [2, 1, 3], [1]]})
lists.sort("lists")

In [None]:
structs = pl.DataFrame(
    {
        "structs": [
            {"a": 1, "b": 2, "c": 3},
            {"a": 1, "b": 3, "c": 1},
            {"a": 1, "b": 1, "c": 2},
        ]
    }
)
structs.sort("structs")

In [None]:
tools_collection = tools.group_by("brand").agg(collection=pl.struct(pl.all()))
tools_collection

In [None]:
tools_collection.sort(pl.col("collection").list.len(), descending=True)

In [None]:
tools_collection.sort(
    pl.col("collection")
    .list.eval(pl.element().struct.field("price"))
    .list.mean()
)

In [None]:
tools_collection.with_columns(
    mean_price=pl.col("collection")
    .list.eval(pl.element().struct.field("price"))
    .list.mean()
).sort("mean_price")

## Related Row Operations

### Filtering Missing Values

In [None]:
tools.drop_nulls("rpm").height

In [None]:
tools.filter(pl.all_horizontal(pl.all().is_not_null())).height

### Slicing

In [None]:
tools.with_row_index().gather_every(2).head(3)

### Top and Bottom

In [None]:
tools.top_k(3, by="price")

### Sampling

In [None]:
tools.sample(fraction=0.2)

### Semi-Joins

In [None]:
saws = pl.DataFrame(
    {
        "tool": [
            "Table Saw",
            "Plunge Cut Saw",
            "Miter Saw",
            "Jigsaw",
            "Bandsaw",
            "Chainsaw",
            "Seesaw",
        ]
    }
)
tools.join(saws, how="semi", on="tool")

## Takeaways