In [1]:
#| code-fold: true
from IPython.core.interactiveshell import InteractiveShell

# `ast_node_interactivity` is a setting that determines how the return value of the last line in a cell is displayed
# with `last_expr_or_assign`, the return value of the last expression is displayed unless it is assigned to a variable
InteractiveShell.ast_node_interactivity = "last_expr_or_assign"

There's an excellent blog post on why Pandas feels clunky for those coming from R:

<https://www.sumsar.net/blog/pandas-feels-clunky-when-coming-from-r/>

In Python, I find `ibis` to be a much more natural fit for those coming from R. It provides a similar syntax and functionality, making it easier to transition between the two languages.

In [2]:
import ibis

`_` in ibis is a special variable that refers to the last expression evaluated
this is useful for chaining operations or for using the result of the last expression in subsequent operations


In [3]:
from ibis import _

ibis.options.interactive = True

Here's the equivalent code in `ibis` for the example provided in the blog post:



In [4]:
df = ibis.read_csv("purchases.csv")

In [5]:
df.amount.sum()

┌───────┐
│ [1;36m17210[0m │
└───────┘

In [6]:
df.group_by("country").aggregate(total=_.amount.sum())

In [7]:
(
    df
    .group_by("country")
    .aggregate(total=(_.amount - _.discount).sum())
)

In [8]:
(
    df
    .mutate(median=_.amount.median())
    .filter(_.amount <= _.median * 10)
    .group_by("country")
    .aggregate(total=(_.amount - _.discount).sum())
)

In [9]:
(
    df
    .join(
        df.group_by("country").aggregate(median=_.amount.median()),
        predicates=["country"]
    )
    .filter(_.amount <= _.median * 10)
    .group_by("country")
    .aggregate(total=(_.amount - _.discount).sum())
    .order_by("country")
)