<a href="https://colab.research.google.com/github/nitiksha/PySpark_code_practice/blob/main/date_func.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DateType, TimestampType
from datetime import date, datetime
import pyspark.sql.functions as F

# Start Spark session
spark = SparkSession.builder.appName('DateFunctionPractice').getOrCreate()

# Sample data with string, date, and timestamp columns
data = [
    ("Alice", date(2025, 8, 21), datetime(2025, 8, 21, 10, 0, 0)),
    ("Bob", date(2025, 2, 15), datetime(2025, 2, 15, 15, 30, 0)),
    ("Cathy", date(2024, 12, 25), datetime(2024, 12, 25, 18, 45, 0)),
    ("David", date(2023, 1, 1), datetime(2023, 1, 1, 23, 59, 59))
]

schema = StructType([
    StructField("name", StringType(), True),
    StructField("hire_date", DateType(), True),        # Date column
    StructField("last_login", TimestampType(), True)   # Timestamp column
])

df = spark.createDataFrame(data, schema=schema)

df.show(truncate=False)
df.printSchema()



+-----+----------+-------------------+
|name |hire_date |last_login         |
+-----+----------+-------------------+
|Alice|2025-08-21|2025-08-21 10:00:00|
|Bob  |2025-02-15|2025-02-15 15:30:00|
|Cathy|2024-12-25|2024-12-25 18:45:00|
|David|2023-01-01|2023-01-01 23:59:59|
+-----+----------+-------------------+

root
 |-- name: string (nullable = true)
 |-- hire_date: date (nullable = true)
 |-- last_login: timestamp (nullable = true)



In [11]:
df = df.withColumn("current_date", F.current_date())
df.withColumn("add_date", F.date_add(F.current_date(),5)).show()
df.withColumn("sub_date", F.date_sub(F.current_date(),5)).show()

+-----+----------+-------------------+------------+----------+
| name| hire_date|         last_login|current_date|  add_date|
+-----+----------+-------------------+------------+----------+
|Alice|2025-08-21|2025-08-21 10:00:00|  2025-08-23|2025-08-28|
|  Bob|2025-02-15|2025-02-15 15:30:00|  2025-08-23|2025-08-28|
|Cathy|2024-12-25|2024-12-25 18:45:00|  2025-08-23|2025-08-28|
|David|2023-01-01|2023-01-01 23:59:59|  2025-08-23|2025-08-28|
+-----+----------+-------------------+------------+----------+

+-----+----------+-------------------+------------+----------+
| name| hire_date|         last_login|current_date|  sub_date|
+-----+----------+-------------------+------------+----------+
|Alice|2025-08-21|2025-08-21 10:00:00|  2025-08-23|2025-08-18|
|  Bob|2025-02-15|2025-02-15 15:30:00|  2025-08-23|2025-08-18|
|Cathy|2024-12-25|2024-12-25 18:45:00|  2025-08-23|2025-08-18|
|David|2023-01-01|2023-01-01 23:59:59|  2025-08-23|2025-08-18|
+-----+----------+-------------------+------------+---

In [15]:
df.withColumn("year", F.year(F.lit('2025-08-28'))).show()

+-----+----------+-------------------+------------+----+
| name| hire_date|         last_login|current_date|year|
+-----+----------+-------------------+------------+----+
|Alice|2025-08-21|2025-08-21 10:00:00|  2025-08-23|2025|
|  Bob|2025-02-15|2025-02-15 15:30:00|  2025-08-23|2025|
|Cathy|2024-12-25|2024-12-25 18:45:00|  2025-08-23|2025|
|David|2023-01-01|2023-01-01 23:59:59|  2025-08-23|2025|
+-----+----------+-------------------+------------+----+



In [17]:
df.withColumn("date_diff", F.datediff(F.current_date(),F.lit('2025-08-28'))).show()

+-----+----------+-------------------+------------+---------+
| name| hire_date|         last_login|current_date|date_diff|
+-----+----------+-------------------+------------+---------+
|Alice|2025-08-21|2025-08-21 10:00:00|  2025-08-23|       -5|
|  Bob|2025-02-15|2025-02-15 15:30:00|  2025-08-23|       -5|
|Cathy|2024-12-25|2024-12-25 18:45:00|  2025-08-23|       -5|
|David|2023-01-01|2023-01-01 23:59:59|  2025-08-23|       -5|
+-----+----------+-------------------+------------+---------+

