In [18]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F

In [19]:
spark = SparkSession.builder.master("local[*]").getOrCreate()

In [20]:
df = spark.createDataFrame([("2025-03-15 14:30:45",)], ["timestamp"])
df = df.withColumn("timestamp", F.col("timestamp").cast("timestamp"))

## Extracting Date & Time Components

In [21]:
df.select(
    F.col("timestamp"),
    F.year("timestamp").alias("year"),
    F.month("timestamp").alias("month"),
    F.dayofmonth("timestamp").alias("day"),
    F.hour("timestamp").alias("hour"),
    F.minute("timestamp").alias("minute"),
    F.second("timestamp").alias("second"),
    F.date_format("timestamp", "yyyy-MM-dd").alias("formatted_date"),
).show()

+-------------------+----+-----+---+----+------+------+--------------+
|          timestamp|year|month|day|hour|minute|second|formatted_date|
+-------------------+----+-----+---+----+------+------+--------------+
|2025-03-15 14:30:45|2025|    3| 15|  14|    30|    45|    2025-03-15|
+-------------------+----+-----+---+----+------+------+--------------+



In [22]:
df.select(F.current_date().alias("current_date"), F.current_timestamp().alias("current_timestamp")).show(truncate=False)

+------------+--------------------------+
|current_date|current_timestamp         |
+------------+--------------------------+
|2025-03-15  |2025-03-15 02:06:32.817587|
+------------+--------------------------+



## Date airthmetic


In [23]:
df.select(
    F.current_date().alias("today"),
    F.date_add(F.current_date(), 7).alias("next_week"),
    F.date_sub(F.current_date(), 7).alias("last_week"),
    F.add_months(F.current_date(), 2).alias("two_months_later"),
).show()

+----------+----------+----------+----------------+
|     today| next_week| last_week|two_months_later|
+----------+----------+----------+----------------+
|2025-03-15|2025-03-22|2025-03-08|      2025-05-15|
+----------+----------+----------+----------------+



## Date Difference & Date Operations


In [24]:
df2 = spark.createDataFrame([("2025-03-15", "2025-05-15")], ["start_date", "end_date"])
df2 = df2.withColumn("start_date", F.col("start_date").cast("date"))
df2 = df2.withColumn("end_date", F.col("end_date").cast("date"))

df2.select(
    F.col("start_date"),
    F.col("end_date"),
    F.datediff(F.col("end_date"), F.col("start_date")).alias("days_difference"),
    F.months_between(F.col("end_date"), F.col("start_date")).alias("months_difference"),
).show()

+----------+----------+---------------+-----------------+
|start_date|  end_date|days_difference|months_difference|
+----------+----------+---------------+-----------------+
|2025-03-15|2025-05-15|             61|              2.0|
+----------+----------+---------------+-----------------+



## Formatting Dates

In [25]:
df.select(
    F.current_date().alias("original_date"),
    F.date_format(F.current_date(), "yyyy/MM/dd").alias("formatted_date"),
    F.date_format(F.current_date(), "EEEE, MMMM dd yyyy").alias("long_format")
).show(truncate=False)

+-------------+--------------+-----------------------+
|original_date|formatted_date|long_format            |
+-------------+--------------+-----------------------+
|2025-03-15   |2025/03/15    |Saturday, March 15 2025|
+-------------+--------------+-----------------------+



## Converting Between Date, Timestamp, and String

In [26]:
df = spark.createDataFrame([("2025-03-15 14:30:45",)], ["string_timestamp"])

df.select(
    F.col("string_timestamp"),
    F.to_date(F.col("string_timestamp")).alias("as_date"),
    F.to_timestamp(F.col("string_timestamp")).alias("as_timestamp")
).show(truncate=False)

+-------------------+----------+-------------------+
|string_timestamp   |as_date   |as_timestamp       |
+-------------------+----------+-------------------+
|2025-03-15 14:30:45|2025-03-15|2025-03-15 14:30:45|
+-------------------+----------+-------------------+



## Extracting Weekday & Quarter

In [27]:
df.select(
    F.current_date().alias("date"),
    F.dayofweek(F.current_date()).alias("day_of_week"),  # Sunday=1, Monday=2, etc.
    F.quarter(F.current_date()).alias("quarter_of_year")
).show()

+----------+-----------+---------------+
|      date|day_of_week|quarter_of_year|
+----------+-----------+---------------+
|2025-03-15|          7|              1|
+----------+-----------+---------------+



## Handling Null Dates

In [28]:
df = spark.createDataFrame([(None,), ("2025-03-15",)], ["date_col"])
df = df.withColumn("date_col", F.col("date_col").cast("date"))

df.select(
    F.col("date_col"),
    F.coalesce(F.col("date_col"), F.lit("1900-01-01")).alias("filled_date")
).show()

+----------+-----------+
|  date_col|filled_date|
+----------+-----------+
|      NULL| 1900-01-01|
|2025-03-15| 2025-03-15|
+----------+-----------+

