## Introduction to PySpark DateTime Functions
PySpark Date Time Functions are built-in methods in the pyspark.sql.functions module that enable efficient handling of date and time operations and conversions within DataFrames.

%md
### Links and Resources
- [Datetime Functions](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/functions.html#datetime-functions)
- [Datetime Patterns](https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html)

In [0]:
from pyspark.sql.functions import col

schema = "name string, hire_date string"

data = [
    ("Alice", "15-06-2020"),
    ("Bob", "25-09-2018"),
    ("Charlie", "05-12-2022")
    ]

df = spark.createDataFrame(data, schema)

df.printSchema

df.show()

+-------+----------+
|   name| hire_date|
+-------+----------+
|  Alice|15-06-2020|
|    Bob|25-09-2018|
|Charlie|05-12-2022|
+-------+----------+



In [0]:
from pyspark.sql.functions import to_date, to_timestamp, date_format, curdate, now, datediff, months_between

In [0]:
# to_date

df = df.withColumn("hire_date", to_date("hire_date", "dd-MM-yyyy"))

df.display()

name,hire_date
Alice,2020-06-15
Bob,2018-09-25
Charlie,2022-12-05


In [0]:
# to_timestamp

df = df.withColumn("hire_timestamp", to_timestamp("hire_date"))

df.display()

name,hire_date,hire_timestamp
Alice,2020-06-15,2020-06-15T00:00:00Z
Bob,2018-09-25,2018-09-25T00:00:00Z
Charlie,2022-12-05,2022-12-05T00:00:00Z


In [0]:
# date_format

df.withColumn("year_month", date_format("hire_date", "dd, MMMM yyyy")).display()

name,hire_date,hire_timestamp,year_month
Alice,2020-06-15,2020-06-15T00:00:00Z,"15, June 2020"
Bob,2018-09-25,2018-09-25T00:00:00Z,"25, September 2018"
Charlie,2022-12-05,2022-12-05T00:00:00Z,"05, December 2022"


In [0]:
# curdate and now

df.withColumn("current_date", curdate()).withColumn("current_timestamp", now()).display()

name,hire_date,hire_timestamp,current_date,current_timestamp
Alice,2020-06-15,2020-06-15T00:00:00Z,2025-03-04,2025-03-04T10:12:23.24Z
Bob,2018-09-25,2018-09-25T00:00:00Z,2025-03-04,2025-03-04T10:12:23.24Z
Charlie,2022-12-05,2022-12-05T00:00:00Z,2025-03-04,2025-03-04T10:12:23.24Z


In [0]:
# datediff

df.withColumn("days_since_hired", datediff(now(),"hire_date")).display()

name,hire_date,hire_timestamp,days_since_hired
Alice,2020-06-15,2020-06-15T00:00:00Z,1723
Bob,2018-09-25,2018-09-25T00:00:00Z,2352
Charlie,2022-12-05,2022-12-05T00:00:00Z,820


In [0]:
# months_between
 
df.withColumn("months_since_hired", months_between(now(),"hire_date")).display()

name,hire_date,hire_timestamp,months_since_hired
Alice,2020-06-15,2020-06-15T00:00:00Z,56.6589378
Bob,2018-09-25,2018-09-25T00:00:00Z,77.33635715
Charlie,2022-12-05,2022-12-05T00:00:00Z,26.98151844
