In [1]:
import polars as pl
from mimesis import Person, Address, Numeric
import duckdb
import pyarrow

In [2]:
peep = Person()
adrs = Address()
num = Numeric()

In [3]:
data = []
for _ in range(100_000):
    data.append({'name':peep.full_name(), 'address':adrs.address(), 'zip_cd':adrs.zip_code()
                 , 'state':adrs.state(), 'net_worth':num.integer_number(5000,50000)})

In [4]:
df = pl.DataFrame(data)

In [17]:
sql = """
    select zip_cd, count(distinct name) as name_cnt
        ,count(distinct address) as address_cnt
        ,sum(net_worth) as net_worth_tot
    from df
    group by all
    order by 1
    limit 5
"""
duckdb.sql(sql).show()

┌─────────┬──────────┬─────────────┬───────────────┐
│ zip_cd  │ name_cnt │ address_cnt │ net_worth_tot │
│ varchar │  int64   │    int64    │    int128     │
├─────────┼──────────┼─────────────┼───────────────┤
│ 00001   │        2 │           2 │         59113 │
│ 00004   │        1 │           1 │         10175 │
│ 00005   │        2 │           2 │         33547 │
│ 00006   │        2 │           2 │         46552 │
│ 00008   │        1 │           1 │         26245 │
└─────────┴──────────┴─────────────┴───────────────┘



In [16]:
#achive the same thing with polars syntax
res = df.group_by(pl.col('zip_cd')).agg(pl.col('name').n_unique().name.suffix("_cnt")
                                        ,pl.col('address').n_unique().name.suffix("_cnt")
                                        ,pl.col('net_worth').sum().name.suffix("_tot")
                            ).sort("zip_cd").limit(5)
print(res)


shape: (5, 4)
┌────────┬──────────┬─────────────┬───────────────┐
│ zip_cd ┆ name_cnt ┆ address_cnt ┆ net_worth_tot │
│ ---    ┆ ---      ┆ ---         ┆ ---           │
│ str    ┆ u32      ┆ u32         ┆ i64           │
╞════════╪══════════╪═════════════╪═══════════════╡
│ 00001  ┆ 2        ┆ 2           ┆ 59113         │
│ 00004  ┆ 1        ┆ 1           ┆ 10175         │
│ 00005  ┆ 2        ┆ 2           ┆ 33547         │
│ 00006  ┆ 2        ┆ 2           ┆ 46552         │
│ 00008  ┆ 1        ┆ 1           ┆ 26245         │
└────────┴──────────┴─────────────┴───────────────┘
