- Author: Benjamin Du
- Date: 2021-10-14 21:13:48
- Modified: 2021-11-20 14:47:09
- Title: Tips on Polars
- Slug: tips-on-polars
- Category: Computer Science
- Tags: Computer Science, programming, Rust, polars, DataFrame, big data


**Things on this page are fragmentary and immature notes/thoughts of the author. Please read with your own judgement!**

In [1]:
!pip3 install polars

Defaulting to user installation because normal site-packages is not writeable
Collecting polars
  Downloading polars-0.10.26-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (9.9 MB)
     |████████████████████████████████| 9.9 MB 2.6 MB/s            
Installing collected packages: polars
Successfully installed polars-0.10.26


In [1]:
import polars as pl

In [2]:
df = pl.read_csv("https://j.mp/iriscsv")
df

sepal_length,sepal_width,petal_length,petal_width,species
f64,f64,f64,f64,str
5.1,3.5,1.4,0.2,"""setosa"""
4.9,3,1.4,0.2,"""setosa"""
4.7,3.2,1.3,0.2,"""setosa"""
4.6,3.1,1.5,0.2,"""setosa"""
5,3.6,1.4,0.2,"""setosa"""
5.4,3.9,1.7,0.4,"""setosa"""
4.6,3.4,1.4,0.3,"""setosa"""
5,3.4,1.5,0.2,"""setosa"""
4.4,2.9,1.4,0.2,"""setosa"""
4.9,3.1,1.5,0.1,"""setosa"""


In [6]:
df.filter(pl.col("sepal_length") > 5) \
      .groupby("species") \
      .sum()

species,sepal_length_sum,sepal_width_sum,petal_length_sum,petal_width_sum
str,f64,f64,f64,f64
"""setosa""",116.9,81.7,33.2,6.1000000000000005
"""virginica""",324.49999999999994,146.2,273.1,99.59999999999998
"""versicolor""",281.9,131.8,202.9,63.3


In [7]:
df = pl.DataFrame(
    {
        "A": [1, 2, 3, 4, 5],
        "fruits": ["banana", "banana", "apple", "apple", "banana"],
        "B": [5, 4, 3, 2, 1],
        "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
    }
)
df

A,fruits,B,cars
i64,str,i64,str
1,"""banana""",5,"""beetle"""
2,"""banana""",4,"""audi"""
3,"""apple""",3,"""beetle"""
4,"""apple""",2,"""beetle"""
5,"""banana""",1,"""beetle"""


In [8]:
df.sort("fruits").select(
        [
            "fruits",
            "cars",
            pl.lit("fruits").alias("literal_string_fruits"),
            pl.col("B").filter(pl.col("cars") == "beetle").sum(),
            pl.col("A").filter(pl.col("B") > 2).sum().over("cars").alias("sum_A_by_cars"),     # groups by "cars"
            pl.col("A").sum().over("fruits").alias("sum_A_by_fruits"),                         # groups by "fruits"
            pl.col("A").reverse().over("fruits").flatten().alias("rev_A_by_fruits"),           # groups by "fruits
            pl.col("A").sort_by("B").over("fruits").flatten().alias("sort_A_by_B_by_fruits"),  # groups by "fruits"
        ]
    )

fruits,cars,literal_string_fruits,B,sum_A_by_cars,sum_A_by_fruits,rev_A_by_fruits,sort_A_by_B_by_fruits
str,str,str,i64,i64,i64,i64,i64
"""apple""","""beetle""","""fruits""",11,4,7,4,4
"""apple""","""beetle""","""fruits""",11,4,7,3,3
"""banana""","""beetle""","""fruits""",11,4,8,5,5
"""banana""","""audi""","""fruits""",11,2,8,2,2
"""banana""","""beetle""","""fruits""",11,4,8,1,1


## References 

[polars @ GitHub](https://github.com/pola-rs/polars)


[Polars - Official Documentation](https://www.pola.rs/)