In [1]:
from polars import DataFrame, Series
import pandas as pd

# Small intro to Polars

In [2]:
df = DataFrame({"name": ["ham", "foo", "bar", "ham", "ham", "foo"], 
                "value": [1, 2, 3, 4, 5, 6], 
                })
df

+-------+-------+
| name  | value |
| ---   | ---   |
| str   | i64   |
| "ham" | 1     |
+-------+-------+
| "foo" | 2     |
+-------+-------+
| "bar" | 3     |
+-------+-------+
| "ham" | 4     |
+-------+-------+
| "ham" | 5     |
+-------+-------+
| "foo" | 6     |
+-------+-------+

We can check the head and tail of the DataFrame

In [3]:
df.head(3)

+-------+-------+
| name  | value |
| ---   | ---   |
| str   | i64   |
| "ham" | 1     |
+-------+-------+
| "foo" | 2     |
+-------+-------+
| "bar" | 3     |
+-------+-------+

In [4]:
df.tail(3)

+-------+-------+
| name  | value |
| ---   | ---   |
| str   | i64   |
| "ham" | 4     |
+-------+-------+
| "ham" | 5     |
+-------+-------+
| "foo" | 6     |
+-------+-------+

We can select a column and slice the results

In [5]:
df["name"][:3]

Series: str 
[
	"ham"
	"foo"
	"bar"
]

We can do standard arithmetic with the series.

In [6]:
print(df["value"] * 12)
print(df["value"] * df["value"])

Series: i64 
[
	12
	24
	36
	48
	60
	72
]
Series: i64 
[
	1
	4
	9
	16
	25
	36
]


Or do aggregations

In [7]:
print(df["value"].mean())
print(df["value"].min())
print(df["value"].max())

3.5
1
6


We can create Series that are nullable. 

In [11]:
s = Series("nullable", [None, 12., 3., None, 4., 5.], nullable=True)
s

Series: f64 
[
	null
	12
	3
	null
	4
	5
]

The null values are ignored with agrregations

In [13]:
s.sum()

24.0

We can stack that Series on top of our DataFrame

In [14]:
df.hstack([s])
df

+-------+-------+----------+
| name  | value | nullable |
| ---   | ---   | ---      |
| str   | i64   | f64      |
| "ham" | 1     | null     |
+-------+-------+----------+
| "foo" | 2     | 12       |
+-------+-------+----------+
| "bar" | 3     | 3        |
+-------+-------+----------+
| "ham" | 4     | null     |
+-------+-------+----------+
| "ham" | 5     | 4        |
+-------+-------+----------+
| "foo" | 6     | 5        |
+-------+-------+----------+

Or remove columns

In [16]:
df_new = df.drop("nullable")
df_new

+-------+-------+
| name  | value |
| ---   | ---   |
| str   | i64   |
| "ham" | 1     |
+-------+-------+
| "foo" | 2     |
+-------+-------+
| "bar" | 3     |
+-------+-------+
| "ham" | 4     |
+-------+-------+
| "ham" | 5     |
+-------+-------+
| "foo" | 6     |
+-------+-------+

We can do groupby split apply operations

In [18]:
gb = df.groupby(by="name", select="value", agg="mean")
gb

+-------+------------+
| name  | value_mean |
| ---   | ---        |
| str   | f64        |
| "foo" | 4          |
+-------+------------+
| "ham" | 3.333      |
+-------+------------+
| "bar" | 3          |
+-------+------------+

And join the result with the original DataFrame

In [20]:
df.join(gb, left_on="name", right_on="name", how="left")

+-------+-------+----------+------------+
| name  | value | nullable | value_mean |
| ---   | ---   | ---      | ---        |
| str   | i64   | f64      | f64        |
| "ham" | 1     | null     | 3.333      |
+-------+-------+----------+------------+
| "foo" | 2     | 12       | 4          |
+-------+-------+----------+------------+
| "bar" | 3     | 3        | 3          |
+-------+-------+----------+------------+
| "ham" | 4     | null     | 3.333      |
+-------+-------+----------+------------+
| "ham" | 5     | 4        | 3.333      |
+-------+-------+----------+------------+
| "foo" | 6     | 5        | 4          |
+-------+-------+----------+------------+

In [22]:
df["nullable"].to_list()

[None, 12.0, 3.0, None, 4.0, 5.0]

In [21]:
df.to_csv("out.csv")