## Quick start

https://narwhals-dev.github.io/narwhals/installation/

In [2]:
from __future__ import annotations
import pandas as pd
import polars as pl
import pyarrow as pa
import narwhals as nw
from narwhals.typing import IntoFrame

In [3]:
def agnostic_get_columns(df_native: IntoFrame) -> list[str]:
    df = nw.from_native(df_native)
    column_names = df.columns
    return column_names

In [12]:
data = {"a": [1, 2, 3], "b": [4, 5, 6]}
df_pandas = pd.DataFrame(data)
df_polars = pl.DataFrame(data)
table_pa = pa.table(data)

In [5]:
print("pandas output")
print(agnostic_get_columns(df_pandas))

print("Polars output")
print(agnostic_get_columns(df_polars))

print("PyArrow output")
print(agnostic_get_columns(table_pa))

pandas output
['a', 'b']
Polars output
['a', 'b']
PyArrow output
['a', 'b']


This is the simplest possible example of a dataframe-agnostic function - as we'll soon see, we can do much more advanced things.

## DataFrame

To write a dataframe-agnostic function, the steps you'll want to follow are:

1. Initialise a Narwhals DataFrame or LazyFrame by passing your dataframe to `nw.from_native`. All the calculations stay lazy if we start with a lazy dataframe - Narwhals will never automatically trigger computation without you asking it to.

    Note: if you need eager execution, make sure to pass `eager_only=True` to `nw.from_native`.

2. Express your logic using the subset of the Polars API supported by Narwhals.     

3. If you need to return a dataframe to the user in its original library, call `nw.to_native`.

Steps 1 and 3 are so common that we provide a utility `@nw.narwhalify` decorator, which allows you to only explicitly write step 2.

Let's explore this with some simple examples.



### Example 1: descriptive statistics

Just like in Polars, we can pass expressions to DataFrame.select or LazyFrame.select.

In [8]:
from narwhals.typing import IntoFrameT

def func(df: IntoFrameT) -> IntoFrameT:
    return (
        nw.from_native(df)
        .select(
            a_sum=nw.col("a").sum(),
            a_mean=nw.col("a").mean(),
            a_std=nw.col("a").std(),
        )
        .to_native()
    )

In [None]:
# check in pandas
df = pd.DataFrame({"a":[1,1,2]})
print(func(df))

   a_sum    a_mean    a_std
0      4  1.333333  0.57735


In [None]:
# check in polars
df = pl.DataFrame({"a": [1,1,2]})
print(func(df))

shape: (1, 3)
┌───────┬──────────┬─────────┐
│ a_sum ┆ a_mean   ┆ a_std   │
│ ---   ┆ ---      ┆ ---     │
│ i64   ┆ f64      ┆ f64     │
╞═══════╪══════════╪═════════╡
│ 4     ┆ 1.333333 ┆ 0.57735 │
└───────┴──────────┴─────────┘


In [None]:
# check in PyArrow
table = pa.table({"a": [1,1,2]})
print(func(table))

pyarrow.Table
a_sum: int64
a_mean: double
a_std: double
----
a_sum: [[4]]
a_mean: [[1.3333333333333333]]
a_std: [[0.5773502691896257]]


### Example 2: group-by and mean


Just like in Polars, we can pass expressions to GroupBy.agg. 

In [13]:
def func(df: IntoFrameT) -> IntoFrameT:
    return(
        nw.from_native(df).group_by("a").agg(nw.col("b").mean()).sort("a").to_native()
    )

In [None]:
# check in pandas
df = pd.DataFrame({"a": [1, 1, 2], "b": [4, 5, 6]})
print(func(df))

   a    b
0  1  4.5
1  2  6.0


In [None]:
# check in polars
df = pl.DataFrame({"a": [1, 1, 2], "b": [4, 5, 6]})
print(func(df))

shape: (2, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 1   ┆ 4.5 │
│ 2   ┆ 6.0 │
└─────┴─────┘


In [17]:
#check in PyArrow
table = pa.table({"a": [1, 1, 2], "b": [4, 5, 6]})
print(func(table))

pyarrow.Table
a: int64
b: double
----
a: [[1,2]]
b: [[4.5,6]]
