# Top-level function/constructor

In [12]:
import narwhals as  nw

# struct() constructor function as per user use
nw.struct("a", "b", "c")

struct(a, b, c)

In [13]:
# struct() constructor function
# to be saved in narwhals/functions.py
from typing import Iterable
from narwhals.typing import IntoExpr
from narwhals._expression_parsing import parse_expr
from narwhals._expression import Expr, ExprNode
from narwhals._utils import flatten

def struct(
    exprs: IntoExpr | Iterable[IntoExpr],
    *more_exprs: IntoExpr,
) -> Expr:
    """
    Horizontally combine multiple columns into a single struct column.

    Arguments: (removed concat_struct)
        exprs: One or more expressions to combine into a struct. Strings are treated as column names.
        *more_exprs: Additional columns or expressions, passed as positional arguments.

    Returns:
        An expression that produces a single struct column containing the given fields.

    Example:
        >>> import pandas as pd
        >>> import narwhals as nw
        >>>
        >>> data = {
        ...     "a": [1, 2, 3],
        ...     "b": ["dogs", "cats", None],
        ...     "c": ["play", "swim", "walk"],
        ... }
        >>> df_native = pd.DataFrame(data)
        >>> (
        ...     nw.from_native(df_native).select(
        ...         nw.struct("a", "b").alias("my_struct")
        ...     )
        ... )
        ┌──────────────────────────┐
        |     Narwhals DataFrame   |
        |--------------------------|
        |     my_struct            |
        | 0  {'a': 1, 'b': 'dogs'} |
        | 1  {'a': 2, 'b': 'cats'} |
        | 2  {'a': 3, 'b': None}   |
        └──────────────────────────┘
    """
    flat_exprs = flatten([*flatten([exprs]), *more_exprs])
    parsed_exprs = [parse_expr(e) for e in flat_exprs]
    node = ExprNode(kind="struct", exprs=parsed_exprs) # Build a symbolic ExprNode representing "make a struct"
    return Expr([node]) # Return an Expr object wrapping the node

ImportError: cannot import name 'parse_expr' from 'narwhals._expression_parsing' (/Users/maria/Documents/OpenSource/Narwhals/narwhals/narwhals/_expression_parsing.py)

# Backend implementation of nw.struct(...)

## A. Pandas

Based on MarcoGorelli suggestions:

Pandas doesn’t support struct columns natively, but you we can emulate it by using Arrow-backed dataframes in Pandas (via `convert_dtypes(dtype_backend='pyarrow')`)

In [None]:
import pandas as pd
import pyarrow.compute as pc
import pyarrow as pa

# Backend implementation of nw.struct(...) in Pandas

def struct(
        self, *exprs: PandasLikeExpr
    ) -> PandasLikeExpr:
        import pyarrow.compute as pc
        import pandas as pd

        def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
            # Evaluate each expression to a PandasLikeSeries
            series_list = [s for _expr in exprs for s in _expr(df)]
            if not series_list:
                msg = "At least one expression must be passed to `struct`"
                raise ValueError(msg)

            # Horizontally concatenate the series into a native DataFrame.
            # Removed concat_struct usage
                (s.to_frame() for s in series_list), how="horizontal"
            )._native_frame
            df_arrow = df.convert_dtypes(dtype_backend="pyarrow")
            arrays = [df_arrow[col].array._pa_array for col in df.columns]
            struct_array = pc.make_struct(*arrays, field_names=df.columns)
            struct_series = struct_array.to_pandas(
                types_mapper=lambda x: pd.ArrowDtype(x)
            )
            result = PandasLikeSeries(
                struct_series, implementation=self._implementation, version=self._version
            ).alias("struct")
            return [result]

        return self._expr._from_callable(
            func=func,
            evaluate_output_names=combine_evaluate_output_names(*exprs),
            alias_output_names=combine_alias_output_names(*exprs),
            context=self,
        )
# Example
df = pd.DataFrame({
    "a": [1, 2, 3],
    "b": ["x", "y", "z"],
    "c": [True, False, True],
})

struct_df = make_struct_df_pd(df, columns=["a", "b", "c"], struct_col_name="my_struct")

print(struct_df)
print(struct_df.dtypes)

                        my_struct
0   {'a': 1, 'b': 'x', 'c': True}
1  {'a': 2, 'b': 'y', 'c': False}
2   {'a': 3, 'b': 'z', 'c': True}
my_struct    struct<a: int64, b: string, c: bool>[pyarrow]
dtype: object


## B. Polars

Polars natively supports struct expressions and has a built-in pl.struct(...) function:

In [11]:
import polars as pl

# Backend implementation of nw.struct(...) in Polars

def struct(self, *exprs: PolarsExpr) -> PolarsExpr:
        pl_exprs = [expr._native_expr for expr in exprs]
        return self._expr(pl.struct(pl_exprs), version=self._version)


# Example

df = pl.DataFrame({
    "a": [1, 2, 3],
    "b": ["x", "y", "z"],
    "c": [True, False, True],
})

struct_df = make_struct_df_pl(df, columns=["a", "b", "c"], struct_col_name="my_struct")

print(struct_df)
print(struct_df.dtypes)

NameError: name 'PolarsExpr' is not defined

## C. Arrow

Unlike Pandas and Polars, Arrow doesn’t use “dataframes” in the same way — instead it uses **pyarrow.Tables** and **pyarrow.Arrays**.

In [None]:
import pyarrow as pa
import pyarrow.compute as pc

# Backend implementation of nw.struct(...) in Arrow

def make_struct_table_pa(table: pa.Table, columns: list[str], struct_col_name: str = "struct") -> pa.Table:
    arrays = [table[column].combine_chunks() for column in columns] # Combine each column into a single Arrow Array
    struct_array = pc.make_struct(*arrays, field_names=columns) # Unpack the arrays into make_struct
    return pa.table({struct_col_name: struct_array}) # Return a new Table with only the struct column

# Example
table = pa.table({
    "a": [1, 2, 3],
    "b": ["x", "y", "z"],
    "c": [True, False, True],
})

struct_table = make_struct_table_pa(table, columns=["a", "b"], struct_col_name="my_struct")


print(struct_table.to_pydict()) # Only for visualization
print(struct_table)

{'my_struct': [{'a': 1, 'b': 'x'}, {'a': 2, 'b': 'y'}, {'a': 3, 'b': 'z'}]}
pyarrow.Table
my_struct: struct<a: int64, b: string>
  child 0, a: int64
  child 1, b: string
----
my_struct: [
  -- is_valid: all not null
  -- child 0 type: int64
[1,2,3]
  -- child 1 type: string
["x","y","z"]]


In [None]:
import pyarrow as pa
import narwhals as nw

table = pa.table({
    "a": [1, 2, 3],
    "b": ["x", "y", "z"]
})

df = nw.from_native(table)

result = df.select(
    nw.struct("a", "b").alias("my_struct")
)

print(result.to_native())

pyarrow.Table
my_struct: struct<a: int64, b: string>
  child 0, a: int64
  child 1, b: string
----
my_struct: [
  -- is_valid: all not null
  -- child 0 type: int64
[1,2,3]
  -- child 1 type: string
["x","y","z"]]


In [None]:
import narwhals as nw
from narwhals import Expr

assert isinstance(nw.struct(["a", "b"]), Expr)

In [None]:
expr = nw.struct(["a", "b"])
print(type(expr))

<class 'narwhals.expr.Expr'>


In [None]:
import polars as pl
df = pl.DataFrame({
    "a": [1, 2, 3],
    "b": ["x", "y", "z"],
    "c": [True, False, True],
})

df.select(nw.struct(["a", "b"]).alias("my_struct"))

TypeError: cannot create expression literal for value of type Expr.

Hint: Pass `allow_object=True` to accept any value and create a literal of type Object.