In [None]:
import ibis

con = ibis.connect("duckdb://penguins.ddb")

ibis.options.interactive = True



# Create table

In [None]:

con.create_table(
    "penguins", ibis.examples.penguins.fetch().to_pyarrow(), overwrite=True
)

# Load table

In [None]:
penguins = con.table("penguins")

In [None]:
penguins_pdf = penguins.execute()

# Filter

In [None]:
penguins.filter(penguins.species == "Adelie")

In [None]:
penguins_pdf.loc[
    penguins_pdf.species == "Adelie"
].head()

In [None]:
penguins.filter((penguins.island == "Torgersen") & (penguins.species == "Adelie"))

In [None]:
penguins_pdf.loc[
    (penguins_pdf.island == "Torgersen") & (penguins_pdf.species == "Adelie")
].head()

# Select

In [None]:
penguins.select("species", "island", "year")


In [None]:
penguins_pdf[["species", "island", "year"]].head()

# Mutate

In [None]:
penguins.mutate(
    bill_length_cm=penguins.bill_length_mm / 10,
    continent=ibis.literal("Antarctica")
)

In [None]:
penguins_pdf.assign(
    bill_length_cm=penguins_pdf.bill_length_mm / 10,
    continent="Antarctica"
).head()

# Selectors

In [None]:
import ibis.selectors as s

penguins.mutate(bill_length_cm=penguins.bill_length_mm / 10).select(
    ~s.matches("bill_length_mm")
    # match every column except `bill_length_mm`
)

In [None]:
penguins_pdf.assign(
    bill_length_cm=penguins_pdf.bill_length_mm / 10,
)[[col for col in penguins_pdf.columns if col != "bill_length_mm"]].head()

In [None]:
penguins.select("island", s.numeric())


In [None]:
penguins_pdf.select_dtypes("number").head()

# Order by

In [None]:
penguins.order_by(penguins.flipper_length_mm).select(
    "species", "island", "flipper_length_mm"
)

In [None]:
penguins_pdf.sort_values(["flipper_length_mm"])[["species", "island", "flipper_length_mm"]].head()

# Aggregates

In [None]:
penguins.flipper_length_mm.mean()


In [None]:
penguins_pdf.flipper_length_mm.mean()

In [None]:
penguins.aggregate([penguins.flipper_length_mm.mean(), penguins.bill_depth_mm.max()])

In [None]:
penguins_pdf.aggregate(
    {"flipper_length_mm": "mean", "bill_depth_mm": "max"}
)

# Group by

In [None]:
penguins.group_by(["species", "island"]).aggregate(penguins.bill_length_mm.mean().name("mean_bill_length_mm"))

In [None]:
penguins_pdf.groupby(["species", "island"]).agg(mean_bill_length_mm=("bill_length_mm", "mean"))