# DATE FUNCTION 

In [None]:
#ðŸ“… PySpark Date Functions â€“ Complete Table

| Function                       | Type           | Description                          | Example                                      |
| ------------------------------ | -------------- | ------------------------------------ | -------------------------------------------- |
| `current_date()`               | Current        | Returns today's date                 | `df.select(current_date())`                  |
| `current_timestamp()`          | Current        | Returns current timestamp            | `df.select(current_timestamp())`             |
| `date_format(col, fmt)`        | Format         | Converts date to custom format       | `df.select(date_format("dt", "yyyy-MM-dd"))` |
| `to_date(col)`                 | Conversion     | Converts string to date              | `df.select(to_date("str_date"))`             |
| `to_timestamp(col)`            | Conversion     | Converts string to timestamp         | `df.select(to_timestamp("str_ts"))`          |          |
| `year(col)`                    | Extract        | Extracts year                        | `df.select(year("dt"))`                      |
| `month(col)`                   | Extract        | Extracts month number                | `df.select(month("dt"))`                     |
| `dayofmonth(col)`              | Extract        | Extracts day of month                | `df.select(dayofmonth("dt"))`                |
| `dayofyear(col)`               | Extract        | Extracts day of year                 | `df.select(dayofyear("dt"))`                 |
| `dayofweek(col)`               | Extract        | Extracts weekday (1=Sun)             | `df.select(dayofweek("dt"))`                 |
| `weekofyear(col)`              | Extract        | ISO week number                      | `df.select(weekofyear("dt"))`                |
| `hour(col)`                    | Extract (time) | Extract hour from timestamp          | `df.select(hour("ts"))`                      |
| `minute(col)`                  | Extract (time) | Extract minute                       | `df.select(minute("ts"))`                    |
| `second(col)`                  | Extract (time) | Extract second                       | `df.select(second("ts"))`                    |
| `add_months(col, n)`           | Arithmetic     | Adds n months to date                | `df.select(add_months("dt", 2))`             |
| `date_add(col, n)`             | Arithmetic     | Adds n days                          | `df.select(date_add("dt", 5))`               |
| `date_sub(col, n)`             | Arithmetic     | Subtracts n days                     | `df.select(date_sub("dt", 5))`               |
| `datediff(end, start)`         | Arithmetic     | Days between two dates               | `df.select(datediff("end_dt", "start_dt"))`  |
| `months_between(end, start)`   | Arithmetic     | Months difference                    | `df.select(months_between("d1", "d2"))`      |
| `next_day(col, day)`           | Utility        | Returns next given weekday           | `df.select(next_day("dt", "Monday"))`        |
| `last_day(col)`                | Utility        | Last day of month                    | `df.select(last_day("dt"))`                  |
| `trunc(col, fmt)`              | Utility        | Truncate date to month/year          | `df.select(trunc("dt", "MM"))`               |
| `date_trunc(fmt, ts)`          | Utility        | Truncate timestamp to hour/day/month | `df.select(date_trunc("hour", "ts"))`        |
| `from_utc_timestamp(ts, zone)` | Timezone       | Converts UTC to timezone             | `df.select(from_utc_timestamp("ts", "IST"))` |
| `to_utc_timestamp(ts, zone)`   | Timezone       | Converts timezone to UTC             | `df.select(to_utc_timestamp("ts", "IST"))`   |


# cuurent_date()

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

spark = SparkSession.builder.appName("date").getOrCreate()

df = spark.read.format("csv")\
               .option("inferSchema",True)\
               .option('header',True)\
               .load("C:/Git files/My git files/PySpark/files/products.csv")

df = df.withColumn("current_date",current_date())
df = df.withColumn("last_date",last_day(col("current_date")))
df.select("current_date","last_date").show()

# Date_add()

In [None]:
df = df.withColumn("week_after",date_add('current_date',7))
df.select("current_date","last_date","week_after").show(5)

# Date_sub()

In [None]:
df = df.withColumn("week_before",date_sub('current_date',7))
df = df.withColumn("week_neg_before",date_add('current_date',-7))
df.select("current_date","last_date","week_after","week_before","week_neg_before").show(5)

# Date Diff

In [None]:
df = df.withColumn("date_diff",date_diff("week_after","week_before"))
df.select("week_after","week_before","date_diff").show(5)

# Date formate change

In [22]:
df = df.withColumn("week_before_formate_change",date_format("week_before",'dd-MM-yyyy'))
df.select("week_before_formate_change","week_before").show(5)

+--------------------------+-----------+
|week_before_formate_change|week_before|
+--------------------------+-----------+
|                23-11-2025| 2025-11-23|
|                23-11-2025| 2025-11-23|
|                23-11-2025| 2025-11-23|
|                23-11-2025| 2025-11-23|
|                23-11-2025| 2025-11-23|
+--------------------------+-----------+
only showing top 5 rows

