In [6]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('date-format').getOrCreate()

In [7]:
from pyspark.sql.functions import *

df=spark.createDataFrame([["1"]],["id"])
df.select(current_date().alias("current_date"), \
      date_format(current_timestamp(),"yyyy MM dd").alias("yyyy MM dd"), \
      date_format(current_timestamp(),"MM/dd/yyyy hh:mm").alias("MM/dd/yyyy"), \
      date_format(current_timestamp(),"MM/dd/yyyy hh:mm:SS").alias("MM/dd/yyyy"), \
      date_format(current_timestamp(),"yyyy MMM dd").alias("yyyy MMMM dd"), \
      date_format(current_timestamp(),"yyyy MMMM dd E").alias("yyyy MMMM dd E") \
   ).show()

+------------+----------+----------------+-------------------+------------+------------------+
|current_date|yyyy MM dd|      MM/dd/yyyy|         MM/dd/yyyy|yyyy MMMM dd|    yyyy MMMM dd E|
+------------+----------+----------------+-------------------+------------+------------------+
|  2025-08-09|2025 08 09|08/09/2025 04:03|08/09/2025 04:03:34| 2025 Aug 09|2025 August 09 Sat|
+------------+----------+----------------+-------------------+------------+------------------+



In [8]:
df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
df.select(to_date(df.t, format='yyyy-MM-dd HH:mm:ss').alias('date')).collect()

[Row(date=datetime.date(1997, 2, 28))]

In [9]:
spark.createDataFrame(
    [("12/11/2022 01:15:00",), ("2021-12-11 11:15:12",), ("05/12/2022 17:35:22",), ("07/12/2022 17:35:22",)], ["strt_tm"]
).createOrReplaceTempView("table1")

spark.table("table1").show()

spark.sql("""
select coalesce(
            to_timestamp(strt_tm, 'dd/MM/yyyy HH:mm:ss'), 
            to_timestamp(strt_tm, 'yyyy-MM-dd HH:mm:ss')
        ) as start_time,
        coalesce(
            to_date(strt_tm, 'dd/MM/yyyy HH:mm:ss'), 
            to_date(strt_tm, 'yyyy-MM-dd HH:mm:ss')
        ) as start_date
from table1
""")

+-------------------+
|            strt_tm|
+-------------------+
|12/11/2022 01:15:00|
|2021-12-11 11:15:12|
|05/12/2022 17:35:22|
|07/12/2022 17:35:22|
+-------------------+



DataFrame[start_time: timestamp, start_date: date]

In [10]:
df.select(to_timestamp(lit('06-24-2019 12:01:19.000'),'MM-dd-yyyy HH:mm:ss.SSSS')).show()

+---------------------------------------------------------------+
|to_timestamp(06-24-2019 12:01:19.000, MM-dd-yyyy HH:mm:ss.SSSS)|
+---------------------------------------------------------------+
|                                            2019-06-24 12:01:19|
+---------------------------------------------------------------+



In [11]:
df.select(to_timestamp(lit('06/24/2019 12:01:19.000'),'MM/dd/yyyy HH:mm:ss.SSSS')).show()

+---------------------------------------------------------------+
|to_timestamp(06/24/2019 12:01:19.000, MM/dd/yyyy HH:mm:ss.SSSS)|
+---------------------------------------------------------------+
|                                            2019-06-24 12:01:19|
+---------------------------------------------------------------+



### Reading different date time formats

In [48]:
t1 = spark.createDataFrame(
    [
        ("19994557","05/12/2022 17:35:22","08/10/2017 10:07:22"),
        ("19994557","04/08/2017 10:07:42","08/10/2017 12:37:04"),
    ], 
     ["Id","TransactionDate","UpdateDate"]
)

t1.show(truncate=False)

+--------+-------------------+-------------------+
|Id      |TransactionDate    |UpdateDate         |
+--------+-------------------+-------------------+
|19994557|05/12/2022 17:35:22|08/10/2017 10:07:22|
|19994557|04/08/2017 10:07:42|08/10/2017 12:37:04|
+--------+-------------------+-------------------+



In [49]:
from pyspark.sql import functions as F

df_t2 = t1.withColumn('TransactionDate_2',
                      F.when(F.to_timestamp(F.col('TransactionDate'),'M/d/y H:m:s').isNotNull(),
                             F.regexp_replace(F.to_timestamp(F.col('TransactionDate'),'M/d/y H:m:s'), r"T"," "))
                      .otherwise(F.when(F.to_timestamp(F.col('TransactionDate'),'y-M-d H:m:s.SSS').isNotNull(),
                                        F.regexp_replace(F.to_timestamp(F.col('TransactionDate'),'y-M-d H:m:s.SSS'), r"T"," "))
                                .otherwise(F.when(F.to_timestamp(F.col('TransactionDate'),'y-M-d').isNotNull(),
                                        F.regexp_replace(F.to_timestamp(F.col('TransactionDate'),'y-M-d'), r"T"," ")))))                                        

# df_t2.show(truncate=True)

df_t3 = df_t2.withColumn('UpdateDate_new',
                      F.when(F.to_timestamp(F.col('UpdateDate'),'M/d/y H:m:s').isNotNull(),
                             F.regexp_replace(F.to_timestamp(F.col('UpdateDate'),'M/d/y H:m:s'), r"T"," "))
                      .otherwise(F.when(F.to_timestamp(F.col('UpdateDate'),'y-M-d H:m:s.SSS').isNotNull(),
                                        F.regexp_replace(F.to_timestamp(F.col('UpdateDate'),'y-M-d H:m:s.SSS'), r"T"," "))
                                .otherwise(F.when(F.to_timestamp(F.col('UpdateDate'),'y-M-d').isNotNull(),
                                        F.regexp_replace(F.to_timestamp(F.col('UpdateDate'),'y-M-d'), r"T"," ")))))

df_t3.show(truncate=False)

+--------+-------------------+-------------------+-------------------+-------------------+
|Id      |TransactionDate    |UpdateDate         |TransactionDate_2  |UpdateDate_new     |
+--------+-------------------+-------------------+-------------------+-------------------+
|19994557|05/12/2022 17:35:22|08/10/2017 10:07:22|2022-05-12 17:35:22|2017-08-10 10:07:22|
|19994557|04/08/2017 10:07:42|08/10/2017 12:37:04|2017-04-08 10:07:42|2017-08-10 12:37:04|
+--------+-------------------+-------------------+-------------------+-------------------+

