In [1]:
from datetime import date, datetime, time, timedelta

import pandas as pd
import polars as pl

In [2]:
# ColumnNotFoundError
# Need to use `date` object in Python
# df = pl.dataframe(
#     pl.date_range("2025-01-01", "2025-12-31", interval="1mo", eager=True)
# )

In [3]:
df = pl.DataFrame(
    {
        "date": pl.date_range(
            date(2025, 1, 1), date(2025, 6, 1), interval="1mo", eager=True
        ),
        "date_str": [
            "2025-07-05",
            "2025-08-05",
            "2025-09-10",
            "2025-10-10",
            "2025-11-20",
            "2025-12-20",
        ],
        "datetime": pl.datetime_range(
            datetime(2025, 1, 1),
            datetime(2025, 1, 2),
            interval="4h",
            closed="left",
            eager=True,
        ),
        "datetime_utc": pl.datetime_range(
            datetime(2025, 1, 1),
            datetime(2025, 6, 1),
            interval="1mo",
            eager=True,
            time_zone="UTC",
        ),
        "time": pl.time_range(
            time(13, 0, 0),
            time(13, 25, 0),
            interval=timedelta(minutes=5),
            eager=True,
        ),
    }
)
print(df)

shape: (6, 5)
┌────────────┬────────────┬─────────────────────┬─────────────────────────┬──────────┐
│ date       ┆ date_str   ┆ datetime            ┆ datetime_utc            ┆ time     │
│ ---        ┆ ---        ┆ ---                 ┆ ---                     ┆ ---      │
│ date       ┆ str        ┆ datetime[μs]        ┆ datetime[μs, UTC]       ┆ time     │
╞════════════╪════════════╪═════════════════════╪═════════════════════════╪══════════╡
│ 2025-01-01 ┆ 2025-07-05 ┆ 2025-01-01 00:00:00 ┆ 2025-01-01 00:00:00 UTC ┆ 13:00:00 │
│ 2025-02-01 ┆ 2025-08-05 ┆ 2025-01-01 04:00:00 ┆ 2025-02-01 00:00:00 UTC ┆ 13:05:00 │
│ 2025-03-01 ┆ 2025-09-10 ┆ 2025-01-01 08:00:00 ┆ 2025-03-01 00:00:00 UTC ┆ 13:10:00 │
│ 2025-04-01 ┆ 2025-10-10 ┆ 2025-01-01 12:00:00 ┆ 2025-04-01 00:00:00 UTC ┆ 13:15:00 │
│ 2025-05-01 ┆ 2025-11-20 ┆ 2025-01-01 16:00:00 ┆ 2025-05-01 00:00:00 UTC ┆ 13:20:00 │
│ 2025-06-01 ┆ 2025-12-20 ┆ 2025-01-01 20:00:00 ┆ 2025-06-01 00:00:00 UTC ┆ 13:25:00 │
└────────────┴────────────┴──

## to_string & dt

In [4]:
print(
    df.select(
        pl.col("date"),
        pl.col("date").dt.strftime("%Y/%m/%d").alias("strftime"),
        pl.col("date").dt.month().alias("month"),  # not `dt.month`,
        pl.col("date").cast(pl.Int32).alias("date_in_days"),
        pl.col("datetime").cast(pl.Int64).alias("datetime_in_microsecs"),
    )
)

shape: (6, 5)
┌────────────┬────────────┬───────┬──────────────┬───────────────────────┐
│ date       ┆ strftime   ┆ month ┆ date_in_days ┆ datetime_in_microsecs │
│ ---        ┆ ---        ┆ ---   ┆ ---          ┆ ---                   │
│ date       ┆ str        ┆ i8    ┆ i32          ┆ i64                   │
╞════════════╪════════════╪═══════╪══════════════╪═══════════════════════╡
│ 2025-01-01 ┆ 2025/01/01 ┆ 1     ┆ 20089        ┆ 1735689600000000      │
│ 2025-02-01 ┆ 2025/02/01 ┆ 2     ┆ 20120        ┆ 1735704000000000      │
│ 2025-03-01 ┆ 2025/03/01 ┆ 3     ┆ 20148        ┆ 1735718400000000      │
│ 2025-04-01 ┆ 2025/04/01 ┆ 4     ┆ 20179        ┆ 1735732800000000      │
│ 2025-05-01 ┆ 2025/05/01 ┆ 5     ┆ 20209        ┆ 1735747200000000      │
│ 2025-06-01 ┆ 2025/06/01 ┆ 6     ┆ 20240        ┆ 1735761600000000      │
└────────────┴────────────┴───────┴──────────────┴───────────────────────┘


## to_date, duration & dt 

In [5]:
df

date,date_str,datetime,datetime_utc,time
date,str,datetime[μs],"datetime[μs, UTC]",time
2025-01-01,"""2025-07-05""",2025-01-01 00:00:00,2025-01-01 00:00:00 UTC,13:00:00
2025-02-01,"""2025-08-05""",2025-01-01 04:00:00,2025-02-01 00:00:00 UTC,13:05:00
2025-03-01,"""2025-09-10""",2025-01-01 08:00:00,2025-03-01 00:00:00 UTC,13:10:00
2025-04-01,"""2025-10-10""",2025-01-01 12:00:00,2025-04-01 00:00:00 UTC,13:15:00
2025-05-01,"""2025-11-20""",2025-01-01 16:00:00,2025-05-01 00:00:00 UTC,13:20:00
2025-06-01,"""2025-12-20""",2025-01-01 20:00:00,2025-06-01 00:00:00 UTC,13:25:00


In [6]:
print(
    df.with_columns(
        pl.col("date_str").str.to_date().alias("to_date"),
    )
    .with_columns(
        pl.col("date").sub(pl.col("to_date")).alias("duration"),
    )
    .select(
        pl.col("date_str", "to_date", "duration"),
        pl.col("duration").dt.total_days().alias("duration_days"),
        pl.col("duration").dt.total_hours().alias("duration_hours"),
    )
)

shape: (6, 5)
┌────────────┬────────────┬──────────────┬───────────────┬────────────────┐
│ date_str   ┆ to_date    ┆ duration     ┆ duration_days ┆ duration_hours │
│ ---        ┆ ---        ┆ ---          ┆ ---           ┆ ---            │
│ str        ┆ date       ┆ duration[μs] ┆ i64           ┆ i64            │
╞════════════╪════════════╪══════════════╪═══════════════╪════════════════╡
│ 2025-07-05 ┆ 2025-07-05 ┆ -185d        ┆ -185          ┆ -4440          │
│ 2025-08-05 ┆ 2025-08-05 ┆ -185d        ┆ -185          ┆ -4440          │
│ 2025-09-10 ┆ 2025-09-10 ┆ -193d        ┆ -193          ┆ -4632          │
│ 2025-10-10 ┆ 2025-10-10 ┆ -192d        ┆ -192          ┆ -4608          │
│ 2025-11-20 ┆ 2025-11-20 ┆ -203d        ┆ -203          ┆ -4872          │
│ 2025-12-20 ┆ 2025-12-20 ┆ -202d        ┆ -202          ┆ -4848          │
└────────────┴────────────┴──────────────┴───────────────┴────────────────┘


## pl.Expr.dt.combine

In [7]:
print(
    df.select(
        "date",
        "time",
        pl.col("date")
        .dt.combine(pl.col("time"))
        .alias("combined_datetime"),
        pl.col("time").cast(pl.Int64).alias("time_in_nanosecs"),
    )
)

shape: (6, 4)
┌────────────┬──────────┬─────────────────────┬──────────────────┐
│ date       ┆ time     ┆ combined_datetime   ┆ time_in_nanosecs │
│ ---        ┆ ---      ┆ ---                 ┆ ---              │
│ date       ┆ time     ┆ datetime[μs]        ┆ i64              │
╞════════════╪══════════╪═════════════════════╪══════════════════╡
│ 2025-01-01 ┆ 13:00:00 ┆ 2025-01-01 13:00:00 ┆ 46800000000000   │
│ 2025-02-01 ┆ 13:05:00 ┆ 2025-02-01 13:05:00 ┆ 47100000000000   │
│ 2025-03-01 ┆ 13:10:00 ┆ 2025-03-01 13:10:00 ┆ 47400000000000   │
│ 2025-04-01 ┆ 13:15:00 ┆ 2025-04-01 13:15:00 ┆ 47700000000000   │
│ 2025-05-01 ┆ 13:20:00 ┆ 2025-05-01 13:20:00 ┆ 48000000000000   │
│ 2025-06-01 ┆ 13:25:00 ┆ 2025-06-01 13:25:00 ┆ 48300000000000   │
└────────────┴──────────┴─────────────────────┴──────────────────┘


## timezone

In [8]:
# https://docs.pola.rs/api/python/stable/reference/api/polars.datatypes.Datetime.html
import zoneinfo

zoneinfo.available_timezones()

{'Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau',
 'Africa/Blantyre',
 'Africa/Brazzaville',
 'Africa/Bujumbura',
 'Africa/Cairo',
 'Africa/Casablanca',
 'Africa/Ceuta',
 'Africa/Conakry',
 'Africa/Dakar',
 'Africa/Dar_es_Salaam',
 'Africa/Djibouti',
 'Africa/Douala',
 'Africa/El_Aaiun',
 'Africa/Freetown',
 'Africa/Gaborone',
 'Africa/Harare',
 'Africa/Johannesburg',
 'Africa/Juba',
 'Africa/Kampala',
 'Africa/Khartoum',
 'Africa/Kigali',
 'Africa/Kinshasa',
 'Africa/Lagos',
 'Africa/Libreville',
 'Africa/Lome',
 'Africa/Luanda',
 'Africa/Lubumbashi',
 'Africa/Lusaka',
 'Africa/Malabo',
 'Africa/Maputo',
 'Africa/Maseru',
 'Africa/Mbabane',
 'Africa/Mogadishu',
 'Africa/Monrovia',
 'Africa/Nairobi',
 'Africa/Ndjamena',
 'Africa/Niamey',
 'Africa/Nouakchott',
 'Africa/Ouagadougou',
 'Africa/Porto-Novo',
 'Africa/Sao_Tome',
 'Africa/Timbuktu',
 'Africa/

In [9]:
print(
    df.select(
        pl.col("datetime_utc"),
        pl.col("datetime_utc")
        .dt.convert_time_zone("Asia/Taipei")
        .alias("convert_tz_tpe"),
        pl.col("datetime_utc")
        .dt.replace_time_zone("Asia/Taipei")
        .alias("replace_tz_tpe"),
    )
)

shape: (6, 3)
┌─────────────────────────┬───────────────────────────┬───────────────────────────┐
│ datetime_utc            ┆ convert_tz_tpe            ┆ replace_tz_tpe            │
│ ---                     ┆ ---                       ┆ ---                       │
│ datetime[μs, UTC]       ┆ datetime[μs, Asia/Taipei] ┆ datetime[μs, Asia/Taipei] │
╞═════════════════════════╪═══════════════════════════╪═══════════════════════════╡
│ 2025-01-01 00:00:00 UTC ┆ 2025-01-01 08:00:00 CST   ┆ 2025-01-01 00:00:00 CST   │
│ 2025-02-01 00:00:00 UTC ┆ 2025-02-01 08:00:00 CST   ┆ 2025-02-01 00:00:00 CST   │
│ 2025-03-01 00:00:00 UTC ┆ 2025-03-01 08:00:00 CST   ┆ 2025-03-01 00:00:00 CST   │
│ 2025-04-01 00:00:00 UTC ┆ 2025-04-01 08:00:00 CST   ┆ 2025-04-01 00:00:00 CST   │
│ 2025-05-01 00:00:00 UTC ┆ 2025-05-01 08:00:00 CST   ┆ 2025-05-01 00:00:00 CST   │
│ 2025-06-01 00:00:00 UTC ┆ 2025-06-01 08:00:00 CST   ┆ 2025-06-01 00:00:00 CST   │
└─────────────────────────┴───────────────────────────┴───────

In [10]:
print(
    df.select(
        pl.col("datetime_utc"),
        pl.col("datetime_utc").dt.replace_time_zone(None).alias("no_tz"),
    )
)

shape: (6, 2)
┌─────────────────────────┬─────────────────────┐
│ datetime_utc            ┆ no_tz               │
│ ---                     ┆ ---                 │
│ datetime[μs, UTC]       ┆ datetime[μs]        │
╞═════════════════════════╪═════════════════════╡
│ 2025-01-01 00:00:00 UTC ┆ 2025-01-01 00:00:00 │
│ 2025-02-01 00:00:00 UTC ┆ 2025-02-01 00:00:00 │
│ 2025-03-01 00:00:00 UTC ┆ 2025-03-01 00:00:00 │
│ 2025-04-01 00:00:00 UTC ┆ 2025-04-01 00:00:00 │
│ 2025-05-01 00:00:00 UTC ┆ 2025-05-01 00:00:00 │
│ 2025-06-01 00:00:00 UTC ┆ 2025-06-01 00:00:00 │
└─────────────────────────┴─────────────────────┘


## group_by_dynamic()

In [11]:
print(df.group_by_dynamic("date", every="2mo").agg(pl.col("time")))

shape: (3, 2)
┌────────────┬──────────────────────┐
│ date       ┆ time                 │
│ ---        ┆ ---                  │
│ date       ┆ list[time]           │
╞════════════╪══════════════════════╡
│ 2025-01-01 ┆ [13:00:00, 13:05:00] │
│ 2025-03-01 ┆ [13:10:00, 13:15:00] │
│ 2025-05-01 ┆ [13:20:00, 13:25:00] │
└────────────┴──────────────────────┘


In [12]:
print(
    df.group_by_dynamic("date", every="2mo", period="3mo").agg(
        pl.col("time")
    )
)

shape: (3, 2)
┌────────────┬────────────────────────────────┐
│ date       ┆ time                           │
│ ---        ┆ ---                            │
│ date       ┆ list[time]                     │
╞════════════╪════════════════════════════════╡
│ 2025-01-01 ┆ [13:00:00, 13:05:00, 13:10:00] │
│ 2025-03-01 ┆ [13:10:00, 13:15:00, 13:20:00] │
│ 2025-05-01 ┆ [13:20:00, 13:25:00]           │
└────────────┴────────────────────────────────┘


In [13]:
print(
    df.group_by_dynamic(
        "date", every="2mo", group_by=pl.col("date_str").str.slice(-2)
    ).agg(pl.col("time"))
)

shape: (3, 3)
┌──────────┬────────────┬──────────────────────┐
│ date_str ┆ date       ┆ time                 │
│ ---      ┆ ---        ┆ ---                  │
│ str      ┆ date       ┆ list[time]           │
╞══════════╪════════════╪══════════════════════╡
│ 05       ┆ 2025-01-01 ┆ [13:00:00, 13:05:00] │
│ 10       ┆ 2025-03-01 ┆ [13:10:00, 13:15:00] │
│ 20       ┆ 2025-05-01 ┆ [13:20:00, 13:25:00] │
└──────────┴────────────┴──────────────────────┘


## df.upsample()

In [14]:
with pl.Config(tbl_rows=20):
    print(
        df.select("datetime", "date_str").upsample("datetime", every="2h")
    )

shape: (11, 2)
┌─────────────────────┬────────────┐
│ datetime            ┆ date_str   │
│ ---                 ┆ ---        │
│ datetime[μs]        ┆ str        │
╞═════════════════════╪════════════╡
│ 2025-01-01 00:00:00 ┆ 2025-07-05 │
│ 2025-01-01 02:00:00 ┆ null       │
│ 2025-01-01 04:00:00 ┆ 2025-08-05 │
│ 2025-01-01 06:00:00 ┆ null       │
│ 2025-01-01 08:00:00 ┆ 2025-09-10 │
│ 2025-01-01 10:00:00 ┆ null       │
│ 2025-01-01 12:00:00 ┆ 2025-10-10 │
│ 2025-01-01 14:00:00 ┆ null       │
│ 2025-01-01 16:00:00 ┆ 2025-11-20 │
│ 2025-01-01 18:00:00 ┆ null       │
│ 2025-01-01 20:00:00 ┆ 2025-12-20 │
└─────────────────────┴────────────┘


In [15]:
with pl.Config(tbl_rows=20):
    print(
        df.select("datetime", "date_str")
        .upsample("datetime", every="2h")
        .fill_null(strategy="forward")
    )

shape: (11, 2)
┌─────────────────────┬────────────┐
│ datetime            ┆ date_str   │
│ ---                 ┆ ---        │
│ datetime[μs]        ┆ str        │
╞═════════════════════╪════════════╡
│ 2025-01-01 00:00:00 ┆ 2025-07-05 │
│ 2025-01-01 02:00:00 ┆ 2025-07-05 │
│ 2025-01-01 04:00:00 ┆ 2025-08-05 │
│ 2025-01-01 06:00:00 ┆ 2025-08-05 │
│ 2025-01-01 08:00:00 ┆ 2025-09-10 │
│ 2025-01-01 10:00:00 ┆ 2025-09-10 │
│ 2025-01-01 12:00:00 ┆ 2025-10-10 │
│ 2025-01-01 14:00:00 ┆ 2025-10-10 │
│ 2025-01-01 16:00:00 ┆ 2025-11-20 │
│ 2025-01-01 18:00:00 ┆ 2025-11-20 │
│ 2025-01-01 20:00:00 ┆ 2025-12-20 │
└─────────────────────┴────────────┘


## codepanda

In [16]:
idx = pd.date_range("2025-08-01", "2025-09-15")
df_pd = pd.DataFrame({"n": range(1, idx.size + 1)}).set_index(idx)
print(df_pd)

             n
2025-08-01   1
2025-08-02   2
2025-08-03   3
2025-08-04   4
2025-08-05   5
2025-08-06   6
2025-08-07   7
2025-08-08   8
2025-08-09   9
2025-08-10  10
2025-08-11  11
2025-08-12  12
2025-08-13  13
2025-08-14  14
2025-08-15  15
2025-08-16  16
2025-08-17  17
2025-08-18  18
2025-08-19  19
2025-08-20  20
2025-08-21  21
2025-08-22  22
2025-08-23  23
2025-08-24  24
2025-08-25  25
2025-08-26  26
2025-08-27  27
2025-08-28  28
2025-08-29  29
2025-08-30  30
2025-08-31  31
2025-09-01  32
2025-09-02  33
2025-09-03  34
2025-09-04  35
2025-09-05  36
2025-09-06  37
2025-09-07  38
2025-09-08  39
2025-09-09  40
2025-09-10  41
2025-09-11  42
2025-09-12  43
2025-09-13  44
2025-09-14  45
2025-09-15  46


In [17]:
print(df_pd.resample("W-WED").max())

             n
2025-08-06   6
2025-08-13  13
2025-08-20  20
2025-08-27  27
2025-09-03  34
2025-09-10  41
2025-09-17  46


## ref

In [18]:
with pl.Config(tbl_rows=20):
    print(
        pl.DataFrame(
            {
                "rule": [
                    "1ns",
                    "1us",
                    "1ms ",
                    "1s",
                    "1m ",
                    "1h",
                    "1d ",
                    "1w",
                    "1mo ",
                    "1q ",
                    "1y",
                    "1i",
                ],
                "representation": [
                    "1 nanosecond",
                    "1 microsecond",
                    "1 millisecond",
                    "1 second",
                    "1 minute",
                    "1 hour",
                    "1 calendar day",
                    "1 calendar week",
                    "1 calendar month",
                    "1 calendar quarter",
                    "1 calendar year",
                    "1 index count",
                ],
            }
        )
    )

shape: (12, 2)
┌──────┬────────────────────┐
│ rule ┆ representation     │
│ ---  ┆ ---                │
│ str  ┆ str                │
╞══════╪════════════════════╡
│ 1ns  ┆ 1 nanosecond       │
│ 1us  ┆ 1 microsecond      │
│ 1ms  ┆ 1 millisecond      │
│ 1s   ┆ 1 second           │
│ 1m   ┆ 1 minute           │
│ 1h   ┆ 1 hour             │
│ 1d   ┆ 1 calendar day     │
│ 1w   ┆ 1 calendar week    │
│ 1mo  ┆ 1 calendar month   │
│ 1q   ┆ 1 calendar quarter │
│ 1y   ┆ 1 calendar year    │
│ 1i   ┆ 1 index count      │
└──────┴────────────────────┘
