In [1]:
import pandas as pd
import polars as pl
import polars.selectors as cs
from itertools import groupby

## 1. Repeat operations

In [2]:
lucky_numbers = [5, 93, 42, 55, 74]

In [3]:
[number + 1 for number in lucky_numbers]

[6, 94, 43, 56, 75]

In [4]:
list(map(lambda x: x + 1, lucky_numbers))

[6, 94, 43, 56, 75]

In [5]:
df_lucky_numbers = pl.DataFrame({"lucky_number": lucky_numbers})
print(df_lucky_numbers.select(pl.col("lucky_number").add(1)))

shape: (5, 1)
┌──────────────┐
│ lucky_number │
│ ---          │
│ i64          │
╞══════════════╡
│ 6            │
│ 94           │
│ 43           │
│ 56           │
│ 75           │
└──────────────┘


## 2. Filter

In [6]:
[number for number in lucky_numbers if number > 50]

[93, 55, 74]

In [7]:
list(filter(lambda x: x > 50, lucky_numbers))

[93, 55, 74]

In [8]:
print(df_lucky_numbers.filter(pl.col("lucky_number") > 50))

shape: (3, 1)
┌──────────────┐
│ lucky_number │
│ ---          │
│ i64          │
╞══════════════╡
│ 93           │
│ 55           │
│ 74           │
└──────────────┘


## 3. Group by

In [9]:
names = ["May", "Jeff", "Cathy", "Jack", "David"]

groups = []
uniquekeys = []

for k, g in groupby(sorted(names, key=len), key=len):
    groups.append(list(g))
    uniquekeys.append(k)

print(uniquekeys)
print(groups)

[3, 4, 5]
[['May'], ['Jeff', 'Jack'], ['Cathy', 'David']]


In [10]:
df_names = pl.DataFrame({"name": names})

print(
    df_names.group_by(pl.col("name").str.len_bytes().alias("len"))
    .agg(pl.col("name"))
    .sort(pl.col("len"))
)

shape: (3, 2)
┌─────┬────────────────────┐
│ len ┆ name               │
│ --- ┆ ---                │
│ u32 ┆ list[str]          │
╞═════╪════════════════════╡
│ 3   ┆ ["May"]            │
│ 4   ┆ ["Jeff", "Jack"]   │
│ 5   ┆ ["Cathy", "David"] │
└─────┴────────────────────┘


## 4. Select

In [11]:
data = {"name": names, "lucky_number": lucky_numbers}

In [12]:
df = pl.DataFrame(data)
print(df.select(cs.by_name("name") | cs.numeric()))

shape: (5, 2)
┌───────┬──────────────┐
│ name  ┆ lucky_number │
│ ---   ┆ ---          │
│ str   ┆ i64          │
╞═══════╪══════════════╡
│ May   ┆ 5            │
│ Jeff  ┆ 93           │
│ Cathy ┆ 42           │
│ Jack  ┆ 55           │
│ David ┆ 74           │
└───────┴──────────────┘


## 5. Linkedin post

In [13]:
# https://www.linkedin.com/posts/bbelderbos_happy-mondaypython-can-be-so-concise-and-activity-7309836082458583041-mTYW?utm_source=share&utm_medium=member_desktop&rcm=ACoAABBLRAgBsmngU5ejiiht8wZNrBdjxbUCAZ4
import itertools
import collections


df_text = pl.DataFrame(
    {
        "text": [
            "Tags: #Coding #ProblemSolving",
            "Tags: #OpenSource #Collaboration #Efficiency",
            "Tags: #ProblemSolving #Efficiency",
        ]
    }
)

tags = df_text["text"].str.extract_all(r"#\w+").to_list()
tags_flattened = (
    tag.lower() for tag in itertools.chain.from_iterable(tags)
)
most_common_tags = collections.Counter(tags_flattened)
most_common_tags

Counter({'#problemsolving': 2,
         '#efficiency': 2,
         '#coding': 1,
         '#opensource': 1,
         '#collaboration': 1})

In [14]:
print(
    df_text.select(
        pl.col("text")
        .str.extract_all(r"#\w+")
        .list.eval(pl.element().str.to_lowercase())
        .explode()
        .value_counts(sort=True)
        .struct.unnest()
    )
)

shape: (5, 2)
┌─────────────────┬───────┐
│ text            ┆ count │
│ ---             ┆ ---   │
│ str             ┆ u32   │
╞═════════════════╪═══════╡
│ #problemsolving ┆ 2     │
│ #efficiency     ┆ 2     │
│ #coding         ┆ 1     │
│ #opensource     ┆ 1     │
│ #collaboration  ┆ 1     │
└─────────────────┴───────┘


## Remarks

### Remark1

In [15]:
groups, uniquekeys = [], []

for k, g in groupby(sorted(names, key=len), key=len):
    groups.append(list(g))
    uniquekeys.append(k)

print(f"{uniquekeys=}")
print(f"{groups=}")

uniquekeys=[3, 4, 5]
groups=[['May'], ['Jeff', 'Jack'], ['Cathy', 'David']]


## codepanda

## 1. Repeat operations

In [16]:
df_lucky_numbers = pd.DataFrame({"lucky_number": lucky_numbers})

print(
    df_lucky_numbers.assign(
        lucky_number=lambda df_: df_.lucky_number.add(1)
    )
)

   lucky_number
0             6
1            94
2            43
3            56
4            75


## 2. Filter

In [17]:
print(df_lucky_numbers.query("lucky_number > 50"))

   lucky_number
1            93
3            55
4            74


## 3. Group by

In [18]:
df_names = pd.DataFrame({"name": names})

print(
    df_names.assign(len=lambda df_: df_.name.str.len())
    .groupby("len")
    .agg(list)
    .reset_index()
)

   len            name
0    3           [May]
1    4    [Jeff, Jack]
2    5  [Cathy, David]


## 4. Select

In [19]:
import pandas as pd

df = pd.DataFrame(data)
print(df.loc[:, ["name", "lucky_number"]])

    name  lucky_number
0    May             5
1   Jeff            93
2  Cathy            42
3   Jack            55
4  David            74
