# nw.struct function

## Top-level function

In [3]:
def struct(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
    r"""Horizontally combine multiple columns into a single struct column.

    Arguments:
        exprs: One or more expressions to combine into a struct. Strings are treated as column names.
        *more_exprs: Additional columns or expressions, passed as positional arguments.

    Returns:
        An expression that produces a single struct column containing the given fields.

    Example:
        >>> import pandas as pd
        >>> import narwhals as nw
        >>>
        >>> data = {
        ...     "a": [1, 2, 3],
        ...     "b": ["dogs", "cats", None],
        ...     "c": ["play", "swim", "walk"],
        ... }
        >>> df_native = pd.DataFrame(data)
        >>> (
        ...     nw.from_native(df_native).select(
        ...         nw.struct([nw.col("a") * 2, nw.col("b"), nw.col("c")]).alias(
        ...             "my_struct"
        ...         )
        ...     )
        ... )
        ┌──────────────────────────┐
        |     Narwhals DataFrame   |
        |--------------------------|
        |     my_struct            |
        | 0  {'a': 1, 'b': 'dogs'} |
        | 1  {'a': 2, 'b': 'cats'} |
        | 2  {'a': 3, 'b': None}   |
        └──────────────────────────┘
    """
    flat_exprs = flatten([*flatten([exprs]), *more_exprs])
    return _expr_with_horizontal_op("struct", *flat_exprs)

NameError: name 'IntoExpr' is not defined

## Backends

### A. Pandas

In [None]:
# --- all imports from namespace = pandas_like ---

from __future__ import annotations

import operator
import warnings
from functools import reduce
from itertools import chain
from typing import TYPE_CHECKING, Any, Literal, Protocol, overload

from narwhals._compliant import EagerNamespace
from narwhals._expression_parsing import (
    combine_alias_output_names,
    combine_evaluate_output_names,
)
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
from narwhals._pandas_like.expr import PandasLikeExpr
from narwhals._pandas_like.selectors import PandasSelectorNamespace
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals._pandas_like.typing import NativeDataFrameT, NativeSeriesT
from narwhals._pandas_like.utils import is_non_nullable_boolean
from narwhals._utils import zip_strict

if TYPE_CHECKING:
    from collections.abc import Iterable, Sequence

    from typing_extensions import TypeAlias

    from narwhals._utils import Implementation, Version
    from narwhals.typing import IntoDType, NonNestedLiteral

# --- def struct() ---

def struct(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
        import pandas as pd
        import pyarrow.compute as pc

        def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
            # Evaluate each expression to a PandasLikeSeries
            series_list = [s for _expr in exprs for s in _expr(df)]

            # Horizontally concatenate the series into a native DataFrame.
            df = self.concat(
                (s.to_frame() for s in series_list), how="horizontal"
            )._native_frame
            df_arrow = df.convert_dtypes(dtype_backend="pyarrow")
            arrays = [df_arrow[col].array._pa_array for col in df.columns]
            struct_array = pc.make_struct(*arrays, field_names=df.columns)
            struct_series = struct_array.to_pandas(
                types_mapper=lambda x: pd.ArrowDtype(x)
            )
            result = PandasLikeSeries(
                struct_series, implementation=self._implementation, version=self._version
            ).alias("struct")
            return [result]

        return self._expr._from_callable(
            func=func,
            evaluate_output_names=combine_evaluate_output_names(*exprs),
            alias_output_names=combine_alias_output_names(*exprs),
            context=self,
        )


### B. Polars

In [None]:
# --- all imports from namespace = polars ---

from __future__ import annotations

import operator
from typing import TYPE_CHECKING, Any, Literal, cast, overload

import polars as pl

from narwhals._polars.expr import PolarsExpr
from narwhals._polars.series import PolarsSeries
from narwhals._polars.utils import extract_args_kwargs, narwhals_to_native_dtype
from narwhals._utils import Implementation, requires, zip_strict
from narwhals.dependencies import is_numpy_array_2d
from narwhals.dtypes import DType

if TYPE_CHECKING:
    from collections.abc import Iterable, Sequence
    from datetime import timezone

    from typing_extensions import TypeIs

    from narwhals._compliant import CompliantSelectorNamespace
    from narwhals._polars.dataframe import Method, PolarsDataFrame, PolarsLazyFrame
    from narwhals._polars.typing import FrameT
    from narwhals._utils import Version, _LimitedContext
    from narwhals.typing import Into1DArray, IntoDType, IntoSchema, TimeUnit, _2DArray


# --- def struct() ---

def struct(self, *exprs: PolarsExpr) -> PolarsExpr:
    pl_exprs = [expr._native_expr for expr in exprs]
    return self._expr(pl.struct(pl_exprs), version=self._version)

### C. Arrow

In [None]:
# --- all imports from namespace = arrow ---

from __future__ import annotations

import operator
from functools import reduce
from itertools import chain
from typing import TYPE_CHECKING, Literal

import pyarrow as pa
import pyarrow.compute as pc

from narwhals._arrow.dataframe import ArrowDataFrame
from narwhals._arrow.expr import ArrowExpr
from narwhals._arrow.selectors import ArrowSelectorNamespace
from narwhals._arrow.series import ArrowSeries
from narwhals._arrow.utils import cast_to_comparable_string_types
from narwhals._compliant import EagerNamespace
from narwhals._expression_parsing import (
    combine_alias_output_names,
    combine_evaluate_output_names,
)
from narwhals._utils import Implementation

if TYPE_CHECKING:
    from collections.abc import Iterator, Sequence

    from narwhals._arrow.typing import ChunkedArrayAny, Incomplete, ScalarAny
    from narwhals._utils import Version
    from narwhals.typing import IntoDType, NonNestedLiteral


# --- def struct() ---

def struct(self, *exprs: ArrowExpr) -> ArrowExpr:
    def func(df: ArrowDataFrame) -> list[ArrowSeries]:
        series = list(chain.from_iterable(expr(df) for expr in exprs))
        arrays = [s._native_series.combine_chunks() for s in series]
        name = series[0].name
        struct_array = pc.make_struct(*arrays, field_names=[s.name for s in series])
        return [self._series(struct_array, name=name, version=self._version)]

    return self._expr._from_callable(
        func=func,
        evaluate_output_names=combine_evaluate_output_names(*exprs),
        alias_output_names=combine_alias_output_names(*exprs),
        context=self,
    )

## In use

After `nw.form_native(df)`, by using `.select()` or `.with_columns()` we get a new df only with the struct column or the original df with a new struct column at the end.

### Pandas

In [6]:
import narwhals as nw
import pandas as pd

df_native_pd = pd.DataFrame({
    "a": [1, 2, 3],
    "b": ["x", "y", "z"],
    "c": [True, False, True],
})

column_struct_pd = nw.from_native(df_native_pd).select(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("t"))
df_struct_pd = nw.from_native(df_native_pd).with_columns(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("t"))

print(column_struct_pd)
print(df_struct_pd)

┌─────────────────────────────────┐
|       Narwhals DataFrame        |
|---------------------------------|
|                                t|
|0   {'a': 1, 'b': 'x', 'c': True}|
|1  {'a': 2, 'b': 'y', 'c': False}|
|2   {'a': 3, 'b': 'z', 'c': True}|
└─────────────────────────────────┘
┌──────────────────────────────────────────────┐
|              Narwhals DataFrame              |
|----------------------------------------------|
|   a  b      c                               t|
|0  1  x   True   {'a': 1, 'b': 'x', 'c': True}|
|1  2  y  False  {'a': 2, 'b': 'y', 'c': False}|
|2  3  z   True   {'a': 3, 'b': 'z', 'c': True}|
└──────────────────────────────────────────────┘


### Polars

In [15]:
import narwhals as nw
import polars as pl

df_native_pl = pl.DataFrame({
    "a": [1, 2, 3],
    "b": ["x", "y", "z"],
    "c": [True, False, True],
})

column_struct_pl = nw.from_native(df_native_pl).select(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("t"))
df_struct_pl = nw.from_native(df_native_pl).with_columns(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("t"))


print(column_struct_pl)
print(df_struct_pl)

┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|shape: (3, 1)     |
|┌───────────────┐ |
|│ t             │ |
|│ ---           │ |
|│ struct[3]     │ |
|╞═══════════════╡ |
|│ {1,"x",true}  │ |
|│ {2,"y",false} │ |
|│ {3,"z",true}  │ |
|└───────────────┘ |
└──────────────────┘
┌─────────────────────────────────────┐
|         Narwhals DataFrame          |
|-------------------------------------|
|shape: (3, 4)                        |
|┌─────┬─────┬───────┬───────────────┐|
|│ a   ┆ b   ┆ c     ┆ t             │|
|│ --- ┆ --- ┆ ---   ┆ ---           │|
|│ i64 ┆ str ┆ bool  ┆ struct[3]     │|
|╞═════╪═════╪═══════╪═══════════════╡|
|│ 1   ┆ x   ┆ true  ┆ {1,"x",true}  │|
|│ 2   ┆ y   ┆ false ┆ {2,"y",false} │|
|│ 3   ┆ z   ┆ true  ┆ {3,"z",true}  │|
|└─────┴─────┴───────┴───────────────┘|
└─────────────────────────────────────┘


### Arrow

In [1]:
import narwhals as nw
import pyarrow as pa

table_native_pa = pa.table({
    "a": [1, 2, 3],
    "b": ["x", "y", "z"],
    "c": [True, False, True],
})

column_struct_pa = nw.from_native(table_native_pa).select(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("t"))
df_struct_pa = nw.from_native(table_native_pa).with_columns(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("t"))

print(column_struct_pa)
print(df_struct_pa)

┌───────────────────────────────────────┐
|          Narwhals DataFrame           |
|---------------------------------------|
|pyarrow.Table                          |
|t: struct<a: int64, b: string, c: bool>|
|  child 0, a: int64                    |
|  child 1, b: string                   |
|  child 2, c: bool                     |
|----                                   |
|t: [                                   |
|  -- is_valid: all not null            |
|  -- child 0 type: int64               |
|[1,2,3]                                |
|  -- child 1 type: string              |
|["x","y","z"]                          |
|  -- child 2 type: bool                |
|[true,false,true]]                     |
└───────────────────────────────────────┘
┌───────────────────────────────────────┐
|          Narwhals DataFrame           |
|---------------------------------------|
|pyarrow.Table                          |
|a: int64                               |
|b: string                        

## Raise errors

Struct must have the same type of variable per column in each row. Original df cannot contain mixed types.

### A. Pandas

AttributeError: 'NumpyExtensionArray' object has no attribute '_pa_array'

1. Narwhals’ Pandas backend attempts to create a struct column by first coverting each DataFrame to an Arrow table.
2. If a column contains mixed types, it becomes **"object" dtype**, which is **not Arrow-compatible** and cannot create an arrow array.


In [4]:
import narwhals as nw
import pandas as pd

df_native_pd = pd.DataFrame({
    "a": [1, 2, 3],
    "b": ["x", 2, "z"],
    "c": [True, False, True],
})

column_struct_pd = nw.from_native(df_native_pd).select(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("t"))

print(column_struct_pd)

TypeError: unexpected value while building Series of type str; found value of type int: 2

Hint: ensure all values in each column have the same dtype.

We will raise an error to explain that each column must contain a single dtype:

In [None]:
# Check for consistent types within each column
            for col in df.columns:
                values = df[col].tolist()
                non_null_values = [v for v in values if v is not None]
                if not non_null_values:
                    continue  # all nulls, skip
                first_type = type(non_null_values[0])
                for v in non_null_values[1:]:
                    if type(v) != first_type:
                        raise TypeError(
                            f"unexpected value while building Series of type {first_type.__name__}; "
                            f"found value of type {type(v).__name__}: {v}\n\n"
                            f"Hint: ensure all values in each column have the same dtype."
                        )

### B. Polars

TypeError: unexpected value while building Series of type String; found value of type Int64: 2

Hint: Try setting `strict=False` to allow passing data with mixed types.

**Polars requires each column to have a consistent type.**

In [23]:
import narwhals as nw
import polars as pl

df_native_pl = pl.DataFrame({
    "a": [1, 2, 3],
    "b": ["x", 2, "z"],
    "c": [True, False, True],
})

column_struct_pl = nw.from_native(df_native_pl).select(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("t"))

print(column_struct_pl)

TypeError: unexpected value while building Series of type String; found value of type Int64: 2

Hint: Try setting `strict=False` to allow passing data with mixed types.

Now let's add the `strict = False` while creating the polars df:
- It creates the df with mixed types (OK)
- **Column contains both ints and strings → coerced to str (Attention!!)**
- So it runs struct() without raising error

Polars documentation on Dataframe:

*Throw an error if any data value does not exactly match the given or inferred data type for that column. If set to False, values that do not match the data type are cast to that data type or, if casting is not possible, set to null instead.*

In [40]:
import narwhals as nw
import polars as pl

df_native_pl = pl.DataFrame({
    "a": [1, 2, 3],
    "b": ["x", 2, "z"],
    "c": [True, False, True],
}, strict = False)

column_struct_pl = nw.from_native(df_native_pl).select(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("t"))

print(df_native_pl)
print(column_struct_pl)

shape: (3, 3)
┌─────┬─────┬───────┐
│ a   ┆ b   ┆ c     │
│ --- ┆ --- ┆ ---   │
│ i64 ┆ str ┆ bool  │
╞═════╪═════╪═══════╡
│ 1   ┆ x   ┆ true  │
│ 2   ┆ 2   ┆ false │
│ 3   ┆ z   ┆ true  │
└─────┴─────┴───────┘
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|shape: (3, 1)     |
|┌───────────────┐ |
|│ t             │ |
|│ ---           │ |
|│ struct[3]     │ |
|╞═══════════════╡ |
|│ {1,"x",true}  │ |
|│ {2,"2",false} │ |
|│ {3,"z",true}  │ |
|└───────────────┘ |
└──────────────────┘


### C. Arrow

In [2]:
import narwhals as nw
import pyarrow as pa

table_native_pa = pa.table({
    "a": [1, 2, 3],
    "b": ["x", 2, "z"],
    "c": [True, False, True],
})

column_struct_pa = nw.from_native(table_native_pa).select(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("t"))
df_struct_pa = nw.from_native(table_native_pa).with_columns(nw.struct([nw.col("a"), nw.col("b"), nw.col("c")]).alias("t"))

print(column_struct_pa)
print(df_struct_pa)

ArrowTypeError: Expected bytes, got a 'int' object