In [5]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[1]").appName("Examples.com").getOrCreate()

from pyspark.sql.functions import *

df=spark.createDataFrame(
        data = [ ("1","2019-06-24 12:01:19.000")],
        schema=["id","input_timestamp"])
df.printSchema()

#Timestamp String to DateType
df.withColumn("timestamp",to_timestamp("input_timestamp")).show(truncate=False)
# Using Cast to convert TimestampType to DateType
df.withColumn('timestamp_string',to_timestamp('input_timestamp').cast('string')).show(truncate=False)

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)

+---+-----------------------+-------------------+
|id |input_timestamp        |timestamp          |
+---+-----------------------+-------------------+
|1  |2019-06-24 12:01:19.000|2019-06-24 12:01:19|
+---+-----------------------+-------------------+

+---+-----------------------+-------------------+
|id |input_timestamp        |timestamp_string   |
+---+-----------------------+-------------------+
|1  |2019-06-24 12:01:19.000|2019-06-24 12:01:19|
+---+-----------------------+-------------------+



In [8]:
df.select(to_timestamp(lit('06-24-2019 12:01:19.000'),'MM-dd-yyyy HH:mm:ss.SSSS').alias('time')).show()

+-------------------+
|               time|
+-------------------+
|2019-06-24 12:01:19|
+-------------------+



In [10]:
spark.sql("select to_timestamp('2019-06-24 12:01:19.000') as timestamp").show()
#SQL CAST timestamp string to TimestampType
spark.sql("select timestamp('2019-06-24 12:01:19.000') as timestamp").show()
#SQL Custom string to TimestampType
spark.sql("select to_timestamp('06-24-2019 12:01:19.000','MM-dd-yyyy HH:mm:ss.SSSS') as timestamp").show()

+-------------------+
|          timestamp|
+-------------------+
|2019-06-24 12:01:19|
+-------------------+

+-------------------+
|          timestamp|
+-------------------+
|2019-06-24 12:01:19|
+-------------------+

+-------------------+
|          timestamp|
+-------------------+
|2019-06-24 12:01:19|
+-------------------+



In [11]:
df=spark.createDataFrame(
        data = [ ("1","2019-06-24 12:01:19.000")],
        schema=["id","input_timestamp"])
df.printSchema()

root
 |-- id: string (nullable = true)
 |-- input_timestamp: string (nullable = true)



In [16]:
from pyspark.sql.functions import *

#Timestamp String to DateType
df.withColumn("date_type",to_date("input_timestamp")).show(truncate=False)

#Timestamp Type to DateType
df.withColumn("date_type",to_date(current_timestamp())).show(truncate=False)

df.select(to_date(lit('06-24-2019 12:01:19.000'),'MM-dd-yyyy HH:mm:ss.SSSS').alias('time')).show()

+---+-----------------------+----------+
|id |input_timestamp        |date_type |
+---+-----------------------+----------+
|1  |2019-06-24 12:01:19.000|2019-06-24|
+---+-----------------------+----------+

+---+-----------------------+----------+
|id |input_timestamp        |date_type |
+---+-----------------------+----------+
|1  |2019-06-24 12:01:19.000|2023-09-19|
+---+-----------------------+----------+

+----------+
|      time|
+----------+
|2019-06-24|
+----------+



In [17]:
df.withColumn("ts",to_timestamp(col("input_timestamp"))).withColumn("datetype",to_date(col("ts"))) \
  .show(truncate=False)

+---+-----------------------+-------------------+----------+
|id |input_timestamp        |ts                 |datetype  |
+---+-----------------------+-------------------+----------+
|1  |2019-06-24 12:01:19.000|2019-06-24 12:01:19|2019-06-24|
+---+-----------------------+-------------------+----------+



In [18]:
df.withColumn('date_type', col('input_timestamp').cast('date')).show(truncate=False)
df.withColumn('date_type',to_timestamp('input_timestamp').cast('date')).show(truncate=False)

+---+-----------------------+----------+
|id |input_timestamp        |date_type |
+---+-----------------------+----------+
|1  |2019-06-24 12:01:19.000|2019-06-24|
+---+-----------------------+----------+

+---+-----------------------+----------+
|id |input_timestamp        |date_type |
+---+-----------------------+----------+
|1  |2019-06-24 12:01:19.000|2019-06-24|
+---+-----------------------+----------+



In [25]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('com').getOrCreate()
from pyspark.sql.functions import *

df=spark.createDataFrame([["1"]],["id"])
df.select(current_date().alias("current_date"),
          date_format(current_date(),"yyyy MM dd").alias("yyyy MM dd"),
          date_format(current_timestamp(),"MM/dd/yyyy hh:mm").alias("MM/dd/yyyy"),
          date_format(current_timestamp(),"yyyy MMM dd").alias("yyyy MMMM dd"),
          date_format(current_timestamp(),"yyyy MMMM dd E").alias("yyyy MMMM dd E")).show()

#SQL
spark.sql("select current_date() as current_date, "+
      "date_format(current_timestamp(),'yyyy MM dd') as yyyy_MM_dd, "+
      "date_format(current_timestamp(),'MM/dd/yyyy hh:mm') as MM_dd_yyyy, "+
      "date_format(current_timestamp(),'yyyy MMM dd') as yyyy_MMMM_dd, "+
      "date_format(current_timestamp(),'yyyy MMMM dd E') as yyyy_MMMM_dd_E").show()


+------------+----------+----------------+------------+--------------------+
|current_date|yyyy MM dd|      MM/dd/yyyy|yyyy MMMM dd|      yyyy MMMM dd E|
+------------+----------+----------------+------------+--------------------+
|  2023-09-19|2023 09 19|09/19/2023 04:15| 2023 Sep 19|2023 September 19...|
+------------+----------+----------------+------------+--------------------+

+------------+----------+----------------+------------+--------------------+
|current_date|yyyy_MM_dd|      MM_dd_yyyy|yyyy_MMMM_dd|      yyyy_MMMM_dd_E|
+------------+----------+----------------+------------+--------------------+
|  2023-09-19|2023 09 19|09/19/2023 04:15| 2023 Sep 19|2023 September 19...|
+------------+----------+----------------+------------+--------------------+



In [26]:
# datediff
from pyspark.sql.functions import *
data = [("1","2019-07-01"),("2","2019-06-24"),("3","2019-08-24")]
df=spark.createDataFrame(data=data,schema=["id","date"])

df.select(col("date"),current_date().alias("current_date"),
      datediff(current_date(),col("date")).alias("datediff")).show()

+----------+------------+--------+
|      date|current_date|datediff|
+----------+------------+--------+
|2019-07-01|  2023-09-19|    1541|
|2019-06-24|  2023-09-19|    1548|
|2019-08-24|  2023-09-19|    1487|
+----------+------------+--------+



In [29]:
#months between
from pyspark.sql.functions import *
df.withColumn("datesDiff", datediff(current_date(),col("date"))) \
  .withColumn("montsDiff", months_between(current_date(),col("date"))) \
  .withColumn("montsDiff_round",round(months_between(current_date(),col("date")),2)) \
  .withColumn("yearsDiff",months_between(current_date(),col("date"))/lit(12)) \
  .withColumn("yearsDiff_round",round(months_between(current_date(),col("date"))/lit(12),2)) \
  .show()

+---+----------+---------+-----------+---------------+-----------------+---------------+
| id|      date|datesDiff|  montsDiff|montsDiff_round|        yearsDiff|yearsDiff_round|
+---+----------+---------+-----------+---------------+-----------------+---------------+
|  1|2019-07-01|     1541|50.58064516|          50.58|4.215053763333334|           4.22|
|  2|2019-06-24|     1548|50.83870968|          50.84|       4.23655914|           4.24|
|  3|2019-08-24|     1487|48.83870968|          48.84|4.069892473333334|           4.07|
+---+----------+---------+-----------+---------------+-----------------+---------------+



In [30]:
spark.sql("select round(months_between('2019-07-01',current_date())/12,2) as years_diff").show()

+----------+
|years_diff|
+----------+
|     -4.22|
+----------+

