In [1]:
import numpy as np
import pandas as pd
import polars as pl


data = {
    "int64": [0, 1, 2, 3, 500],
    "float64": [6.1, 7.2, 8.3, 9.4, 10.5],
    "bool": [True, False, False, True, True],
    "null": [5, 10, None, None, 30],
    "nan": [11.1, np.nan, float("nan"), 33.3, 22.2],
}
df = pl.DataFrame(data)
print(df)

shape: (5, 5)
┌───────┬─────────┬───────┬──────┬──────┐
│ int64 ┆ float64 ┆ bool  ┆ null ┆ nan  │
│ ---   ┆ ---     ┆ ---   ┆ ---  ┆ ---  │
│ i64   ┆ f64     ┆ bool  ┆ i64  ┆ f64  │
╞═══════╪═════════╪═══════╪══════╪══════╡
│ 0     ┆ 6.1     ┆ true  ┆ 5    ┆ 11.1 │
│ 1     ┆ 7.2     ┆ false ┆ 10   ┆ NaN  │
│ 2     ┆ 8.3     ┆ false ┆ null ┆ NaN  │
│ 3     ┆ 9.4     ┆ true  ┆ null ┆ 33.3 │
│ 500   ┆ 10.5    ┆ true  ┆ 30   ┆ 22.2 │
└───────┴─────────┴───────┴──────┴──────┘


## pl.Int & pl.UInt

In [2]:
print(
    df.select(
        "int64",
        pl.col("int64").cast(pl.Int32).alias("Int32"),
        pl.col("int64").cast(pl.UInt32).alias("UInt32"),
    )
)

shape: (5, 3)
┌───────┬───────┬────────┐
│ int64 ┆ Int32 ┆ UInt32 │
│ ---   ┆ ---   ┆ ---    │
│ i64   ┆ i32   ┆ u32    │
╞═══════╪═══════╪════════╡
│ 0     ┆ 0     ┆ 0      │
│ 1     ┆ 1     ┆ 1      │
│ 2     ┆ 2     ┆ 2      │
│ 3     ┆ 3     ┆ 3      │
│ 500   ┆ 500   ┆ 500    │
└───────┴───────┴────────┘


In [3]:
# InvalidOperationError: conversion from `i64` to `i8` failed in column 'int64' for 1 out of 5 values: [500]
# print(df.select("int64", pl.col("int64").cast(pl.Int8).alias("Int8")))

In [4]:
print(df.select("int64", pl.col("int64").cast(pl.Date).alias("date")))

shape: (5, 2)
┌───────┬────────────┐
│ int64 ┆ date       │
│ ---   ┆ ---        │
│ i64   ┆ date       │
╞═══════╪════════════╡
│ 0     ┆ 1970-01-01 │
│ 1     ┆ 1970-01-02 │
│ 2     ┆ 1970-01-03 │
│ 3     ┆ 1970-01-04 │
│ 500   ┆ 1971-05-16 │
└───────┴────────────┘


## pl.Float

In [5]:
print(
    df.select(
        "float64",
        pl.col("float64").cast(pl.Float32).alias("float32"),
    )
)

shape: (5, 2)
┌─────────┬─────────┐
│ float64 ┆ float32 │
│ ---     ┆ ---     │
│ f64     ┆ f32     │
╞═════════╪═════════╡
│ 6.1     ┆ 6.1     │
│ 7.2     ┆ 7.2     │
│ 8.3     ┆ 8.3     │
│ 9.4     ┆ 9.4     │
│ 10.5    ┆ 10.5    │
└─────────┴─────────┘


In [6]:
print(
    df.select(
        "int64",
        pl.col("int64").truediv(pl.col("int64")).alias("int64 / int64"),
    )
)

shape: (5, 2)
┌───────┬───────────────┐
│ int64 ┆ int64 / int64 │
│ ---   ┆ ---           │
│ i64   ┆ f64           │
╞═══════╪═══════════════╡
│ 0     ┆ NaN           │
│ 1     ┆ 1.0           │
│ 2     ┆ 1.0           │
│ 3     ┆ 1.0           │
│ 500   ┆ 1.0           │
└───────┴───────────────┘


### NaN

In [7]:
print(
    df.select(
        "nan",
        pl.col("nan").is_nan().alias("is_nan"),
        pl.col("nan").fill_nan(10).alias("fill_10"),
        pl.col("nan")
        .fill_nan(pl.col("int64").mul(100))
        .alias("fill_expr"),
    )
)

shape: (5, 4)
┌──────┬────────┬─────────┬───────────┐
│ nan  ┆ is_nan ┆ fill_10 ┆ fill_expr │
│ ---  ┆ ---    ┆ ---     ┆ ---       │
│ f64  ┆ bool   ┆ f64     ┆ f64       │
╞══════╪════════╪═════════╪═══════════╡
│ 11.1 ┆ false  ┆ 11.1    ┆ 11.1      │
│ NaN  ┆ true   ┆ 10.0    ┆ 100.0     │
│ NaN  ┆ true   ┆ 10.0    ┆ 200.0     │
│ 33.3 ┆ false  ┆ 33.3    ┆ 33.3      │
│ 22.2 ┆ false  ┆ 22.2    ┆ 22.2      │
└──────┴────────┴─────────┴───────────┘


## pl.Bool

In [8]:
print(
    df.select(
        "bool",
        pl.col("bool").not_().alias("~bool"),
        pl.col("bool").cast(pl.UInt8).alias("bool_to_uint8"),
        pl.col("bool").sum().alias("sum"),
    )
)

shape: (5, 4)
┌───────┬───────┬───────────────┬─────┐
│ bool  ┆ ~bool ┆ bool_to_uint8 ┆ sum │
│ ---   ┆ ---   ┆ ---           ┆ --- │
│ bool  ┆ bool  ┆ u8            ┆ u32 │
╞═══════╪═══════╪═══════════════╪═════╡
│ true  ┆ false ┆ 1             ┆ 3   │
│ false ┆ true  ┆ 0             ┆ 3   │
│ false ┆ true  ┆ 0             ┆ 3   │
│ true  ┆ false ┆ 1             ┆ 3   │
│ true  ┆ false ┆ 1             ┆ 3   │
└───────┴───────┴───────────────┴─────┘


## pl.null

In [9]:
print(
    df.select(
        pl.col("null", "float64"),
        pl.col("null").fill_null(10).alias("fill_10"),
        pl.col("null").fill_null(pl.col("float64")).alias("from_float64"),
        pl.col("null").fill_null(strategy="forward").alias("forward"),
        pl.col("null").fill_null(strategy="backward").alias("backward"),
        pl.col("null").interpolate().alias("interpolate"),
    )
)

shape: (5, 7)
┌──────┬─────────┬─────────┬──────────────┬─────────┬──────────┬─────────────┐
│ null ┆ float64 ┆ fill_10 ┆ from_float64 ┆ forward ┆ backward ┆ interpolate │
│ ---  ┆ ---     ┆ ---     ┆ ---          ┆ ---     ┆ ---      ┆ ---         │
│ i64  ┆ f64     ┆ i64     ┆ f64          ┆ i64     ┆ i64      ┆ f64         │
╞══════╪═════════╪═════════╪══════════════╪═════════╪══════════╪═════════════╡
│ 5    ┆ 6.1     ┆ 5       ┆ 5.0          ┆ 5       ┆ 5        ┆ 5.0         │
│ 10   ┆ 7.2     ┆ 10      ┆ 10.0         ┆ 10      ┆ 10       ┆ 10.0        │
│ null ┆ 8.3     ┆ 10      ┆ 8.3          ┆ 10      ┆ 30       ┆ 16.666667   │
│ null ┆ 9.4     ┆ 10      ┆ 9.4          ┆ 10      ┆ 30       ┆ 23.333333   │
│ 30   ┆ 10.5    ┆ 30      ┆ 30.0         ┆ 30      ┆ 30       ┆ 30.0        │
└──────┴─────────┴─────────┴──────────────┴─────────┴──────────┴─────────────┘


## pl.nan

In [10]:
print(df.select(pl.col("null", "nan").mean()))

shape: (1, 2)
┌──────┬─────┐
│ null ┆ nan │
│ ---  ┆ --- │
│ f64  ┆ f64 │
╞══════╪═════╡
│ 15.0 ┆ NaN │
└──────┴─────┘


In [11]:
print(
    df.with_columns(pl.col("nan").fill_nan(None)).select(
        pl.col("null", "nan").mean()
    )
)

shape: (1, 2)
┌──────┬──────┐
│ null ┆ nan  │
│ ---  ┆ ---  │
│ f64  ┆ f64  │
╞══════╪══════╡
│ 15.0 ┆ 22.2 │
└──────┴──────┘


In [12]:
df

int64,float64,bool,null,nan
i64,f64,bool,i64,f64
0,6.1,True,5.0,11.1
1,7.2,False,10.0,
2,8.3,False,,
3,9.4,True,,33.3
500,10.5,True,30.0,22.2


In [13]:
# InvalidOperationError: conversion from `f64` to `i32` failed in column 'nan' for 2 out of 5 values: [NaN, NaN]
# df.select(pl.col("nan").cast(pl.Int32))

## codepanda

In [14]:
df_pd = pd.DataFrame({"v0_int64": [0]}, dtype="int64").assign(
    v1_Int64=lambda df_: df_.v0_int64.astype({"v0_int64": "Int64"}),
    v2_int64pyarrow=lambda df_: df_.v0_int64.astype(
        {"v0_int64": "int64[pyarrow]"}
    ),
)

print(df_pd.dtypes)

v0_int64                    int64
v1_Int64                    Int64
v2_int64pyarrow    int64[pyarrow]
dtype: object
