In [0]:
from pyspark.sql.functions import *

### 1. Convert a String Column (yyyy-MM-dd) to a Date Type Column yes/NO

In [0]:
data = [("2024-02-09",), ("2023-12-31",)]
df = spark.createDataFrame(data, ["date_str"])
df.display()

date_str
2024-02-09
2023-12-31


In [0]:
df = df.withColumn("date_col", to_date(col('date_str'),'yyyy-MM-dd'))
df.display()

date_str,date_col
2024-02-09,2024-02-09
2023-12-31,2023-12-31


### 2. Extract Year, Month, and Day from a Date Column

In [0]:
df.withColumn('Year', year(col('date_col')))\
    .withColumn('Month', month(col('date_col')))\
       .withColumn('Day', dayofmonth(col('date_col'))).display()

date_str,date_col,Year,Month,Day
2024-02-09,2024-02-09,2024,2,9
2023-12-31,2023-12-31,2023,12,31


### 3. Find the Difference (in Days) Between Two Date Columns

In [0]:
data = [("2024-02-09", "2023-12-31")]
df = spark.createDataFrame(data, ["date1", "date2"])\
         .withColumn("date1", to_date(col("date1"), "yyyy-MM-dd"))\
         .withColumn("date2", to_date(col("date2"), "yyyy-MM-dd"))
df.display()

date1,date2
2024-02-09,2023-12-31


In [0]:
df.withColumn("days_diff", datediff(col('date1'),col('date2'))).display()

date1,date2,days_diff
2024-02-09,2023-12-31,40


### 4. Add or Subtract n Days to/from a Date Column

In [0]:
df.withColumn('4daysadd', date_add(col('date1'),4))\
    .withColumn('4dayssub', date_sub(col('date1'),4)).display()

date1,date2,4daysadd,4dayssub
2024-02-09,2023-12-31,2024-02-13,2024-02-05


### 5. Convert a Timestamp Column to a Date Column

In [0]:
df.withColumn("date_only", to_date(col("date1"))).display() # this will only give us the date not the time part

date1,date2,date_only
2024-02-09,2023-12-31,2024-02-09


### 6. Find the First and Last Day of the Month

In [0]:
df.withColumn('firstdateofmonth', trunc(col('date1'),'month'))\
    .withColumn('lastdate', last_day(col('date1')))\
        .withColumn('firstdateofyear', trunc(col('date1'),'year')).display()

date1,date2,firstdateofmonth,lastdate,firstdateofyear
2024-02-09,2023-12-31,2024-02-01,2024-02-29,2024-01-01


### 7. Get Current Date and Current Timestamp

In [0]:
df.withColumn('current_date',current_date())\
    .withColumn('current_timestamp', current_timestamp()).display()

date1,date2,current_date,current_timestamp
2024-02-09,2023-12-31,2025-02-10,2025-02-10T17:43:46.731+0000


### 8. Calculate Months Between Two Dates

In [0]:
df.withColumn("months_diff", round(months_between(col("date1"), col("date2")),2)).display()

date1,date2,months_diff
2024-02-09,2023-12-31,1.29


### 9. Convert a Date Column to a Specific Format (MM/dd/yyyy)

In [0]:
df.withColumn('formatted_date',date_format(col('date1'),'MM/dd/yyyy')).display()

date1,date2,formatted_date
2024-02-09,2023-12-31,02/09/2024


### 10. Extract the Week Number from a Date Column

In [0]:
df.withColumn('weekofyear', weekofyear(col('date1'))).display()

date1,date2,weekofyear
2024-02-09,2023-12-31,6


### 11. Find the Day of the Week (Monday, Tuesday, etc.) from a Date Column

In [0]:
df.withColumn("day_of_week", date_format(col("date1"),'EEEE')).display()

date1,date2,day_of_week
2024-02-09,2023-12-31,Friday


### 12. Check if a Given Year is a Leap Year

In [0]:
df.withColumn("is_leap_year", expr("year(date1) % 4 = 0 AND (year(date1) % 100 <> 0 OR year(date1) % 400 = 0)")).display()

date1,date2,is_leap_year
2024-02-09,2023-12-31,True


### 13. Filter Records Based on a Specific Date Range

In [0]:
df.filter((col("date1") >= "2024-01-01") & (col("date1") <= "2024-12-31")).display()

date1,date2
2024-02-09,2023-12-31


### 14. Find the Difference (in Hours, Minutes, Seconds) Between Two Timestamp Columns

In [0]:
df.withColumn("time_diff_seconds", unix_timestamp(col("date1")) - unix_timestamp(col("date2")))\
    .withColumn("time_diff_minutes", col('time_diff_seconds')/60)\
        .withColumn("time_diff_hours", col('time_diff_minutes')/60).display()

date1,date2,time_diff_seconds,time_diff_minutes,time_diff_hours
2024-02-09,2023-12-31,3456000,57600.0,960.0


### 15. Convert UTC Timestamp to a Different Timezone

In [0]:
df.withColumn("date_in_EST", from_utc_timestamp(col("date1"), "America/New_York")).display()

date1,date2,date_in_EST
2024-02-09,2023-12-31,2024-02-08T19:00:00.000+0000


### 16. Fill Null Values in a Date Column with the Current Date

In [0]:
df.withColumn("datenew", when(col("date1").isNull(), current_date()).otherwise(col("date1"))).display()

date1,date2,datenew
2024-02-09,2023-12-31,2024-02-09


### 17. Find the First Monday (or Any Specific Weekday) After a Given Date

In [0]:
df.withColumn("next_monday", expr("next_day(date1,'Monday')"))\
    .withColumn("next_friday", expr("next_day(date1,'Friday')")).display()

date1,date2,next_monday,next_friday
2024-02-09,2023-12-31,2024-02-12,2024-02-16


### 18. Generate a Sequence of Dates Between Two Given Dates

In [0]:
df.withColumn("date_seq", sequence(col("date2"), col("date1"), expr("interval 1 day"))).display()

date1,date2,date_seq
2024-02-09,2023-12-31,"List(2023-12-31, 2024-01-01, 2024-01-02, 2024-01-03, 2024-01-04, 2024-01-05, 2024-01-06, 2024-01-07, 2024-01-08, 2024-01-09, 2024-01-10, 2024-01-11, 2024-01-12, 2024-01-13, 2024-01-14, 2024-01-15, 2024-01-16, 2024-01-17, 2024-01-18, 2024-01-19, 2024-01-20, 2024-01-21, 2024-01-22, 2024-01-23, 2024-01-24, 2024-01-25, 2024-01-26, 2024-01-27, 2024-01-28, 2024-01-29, 2024-01-30, 2024-01-31, 2024-02-01, 2024-02-02, 2024-02-03, 2024-02-04, 2024-02-05, 2024-02-06, 2024-02-07, 2024-02-08, 2024-02-09)"


In [0]:
df.withColumn("date_seq", sequence(col("date2"), col("date1"), expr("interval 1 day")))\
    .withColumn('date', explode(col('date_seq'))).display()

date1,date2,date_seq,date
2024-02-09,2023-12-31,"List(2023-12-31, 2024-01-01, 2024-01-02, 2024-01-03, 2024-01-04, 2024-01-05, 2024-01-06, 2024-01-07, 2024-01-08, 2024-01-09, 2024-01-10, 2024-01-11, 2024-01-12, 2024-01-13, 2024-01-14, 2024-01-15, 2024-01-16, 2024-01-17, 2024-01-18, 2024-01-19, 2024-01-20, 2024-01-21, 2024-01-22, 2024-01-23, 2024-01-24, 2024-01-25, 2024-01-26, 2024-01-27, 2024-01-28, 2024-01-29, 2024-01-30, 2024-01-31, 2024-02-01, 2024-02-02, 2024-02-03, 2024-02-04, 2024-02-05, 2024-02-06, 2024-02-07, 2024-02-08, 2024-02-09)",2023-12-31
2024-02-09,2023-12-31,"List(2023-12-31, 2024-01-01, 2024-01-02, 2024-01-03, 2024-01-04, 2024-01-05, 2024-01-06, 2024-01-07, 2024-01-08, 2024-01-09, 2024-01-10, 2024-01-11, 2024-01-12, 2024-01-13, 2024-01-14, 2024-01-15, 2024-01-16, 2024-01-17, 2024-01-18, 2024-01-19, 2024-01-20, 2024-01-21, 2024-01-22, 2024-01-23, 2024-01-24, 2024-01-25, 2024-01-26, 2024-01-27, 2024-01-28, 2024-01-29, 2024-01-30, 2024-01-31, 2024-02-01, 2024-02-02, 2024-02-03, 2024-02-04, 2024-02-05, 2024-02-06, 2024-02-07, 2024-02-08, 2024-02-09)",2024-01-01
2024-02-09,2023-12-31,"List(2023-12-31, 2024-01-01, 2024-01-02, 2024-01-03, 2024-01-04, 2024-01-05, 2024-01-06, 2024-01-07, 2024-01-08, 2024-01-09, 2024-01-10, 2024-01-11, 2024-01-12, 2024-01-13, 2024-01-14, 2024-01-15, 2024-01-16, 2024-01-17, 2024-01-18, 2024-01-19, 2024-01-20, 2024-01-21, 2024-01-22, 2024-01-23, 2024-01-24, 2024-01-25, 2024-01-26, 2024-01-27, 2024-01-28, 2024-01-29, 2024-01-30, 2024-01-31, 2024-02-01, 2024-02-02, 2024-02-03, 2024-02-04, 2024-02-05, 2024-02-06, 2024-02-07, 2024-02-08, 2024-02-09)",2024-01-02
2024-02-09,2023-12-31,"List(2023-12-31, 2024-01-01, 2024-01-02, 2024-01-03, 2024-01-04, 2024-01-05, 2024-01-06, 2024-01-07, 2024-01-08, 2024-01-09, 2024-01-10, 2024-01-11, 2024-01-12, 2024-01-13, 2024-01-14, 2024-01-15, 2024-01-16, 2024-01-17, 2024-01-18, 2024-01-19, 2024-01-20, 2024-01-21, 2024-01-22, 2024-01-23, 2024-01-24, 2024-01-25, 2024-01-26, 2024-01-27, 2024-01-28, 2024-01-29, 2024-01-30, 2024-01-31, 2024-02-01, 2024-02-02, 2024-02-03, 2024-02-04, 2024-02-05, 2024-02-06, 2024-02-07, 2024-02-08, 2024-02-09)",2024-01-03
2024-02-09,2023-12-31,"List(2023-12-31, 2024-01-01, 2024-01-02, 2024-01-03, 2024-01-04, 2024-01-05, 2024-01-06, 2024-01-07, 2024-01-08, 2024-01-09, 2024-01-10, 2024-01-11, 2024-01-12, 2024-01-13, 2024-01-14, 2024-01-15, 2024-01-16, 2024-01-17, 2024-01-18, 2024-01-19, 2024-01-20, 2024-01-21, 2024-01-22, 2024-01-23, 2024-01-24, 2024-01-25, 2024-01-26, 2024-01-27, 2024-01-28, 2024-01-29, 2024-01-30, 2024-01-31, 2024-02-01, 2024-02-02, 2024-02-03, 2024-02-04, 2024-02-05, 2024-02-06, 2024-02-07, 2024-02-08, 2024-02-09)",2024-01-04
2024-02-09,2023-12-31,"List(2023-12-31, 2024-01-01, 2024-01-02, 2024-01-03, 2024-01-04, 2024-01-05, 2024-01-06, 2024-01-07, 2024-01-08, 2024-01-09, 2024-01-10, 2024-01-11, 2024-01-12, 2024-01-13, 2024-01-14, 2024-01-15, 2024-01-16, 2024-01-17, 2024-01-18, 2024-01-19, 2024-01-20, 2024-01-21, 2024-01-22, 2024-01-23, 2024-01-24, 2024-01-25, 2024-01-26, 2024-01-27, 2024-01-28, 2024-01-29, 2024-01-30, 2024-01-31, 2024-02-01, 2024-02-02, 2024-02-03, 2024-02-04, 2024-02-05, 2024-02-06, 2024-02-07, 2024-02-08, 2024-02-09)",2024-01-05
2024-02-09,2023-12-31,"List(2023-12-31, 2024-01-01, 2024-01-02, 2024-01-03, 2024-01-04, 2024-01-05, 2024-01-06, 2024-01-07, 2024-01-08, 2024-01-09, 2024-01-10, 2024-01-11, 2024-01-12, 2024-01-13, 2024-01-14, 2024-01-15, 2024-01-16, 2024-01-17, 2024-01-18, 2024-01-19, 2024-01-20, 2024-01-21, 2024-01-22, 2024-01-23, 2024-01-24, 2024-01-25, 2024-01-26, 2024-01-27, 2024-01-28, 2024-01-29, 2024-01-30, 2024-01-31, 2024-02-01, 2024-02-02, 2024-02-03, 2024-02-04, 2024-02-05, 2024-02-06, 2024-02-07, 2024-02-08, 2024-02-09)",2024-01-06
2024-02-09,2023-12-31,"List(2023-12-31, 2024-01-01, 2024-01-02, 2024-01-03, 2024-01-04, 2024-01-05, 2024-01-06, 2024-01-07, 2024-01-08, 2024-01-09, 2024-01-10, 2024-01-11, 2024-01-12, 2024-01-13, 2024-01-14, 2024-01-15, 2024-01-16, 2024-01-17, 2024-01-18, 2024-01-19, 2024-01-20, 2024-01-21, 2024-01-22, 2024-01-23, 2024-01-24, 2024-01-25, 2024-01-26, 2024-01-27, 2024-01-28, 2024-01-29, 2024-01-30, 2024-01-31, 2024-02-01, 2024-02-02, 2024-02-03, 2024-02-04, 2024-02-05, 2024-02-06, 2024-02-07, 2024-02-08, 2024-02-09)",2024-01-07
2024-02-09,2023-12-31,"List(2023-12-31, 2024-01-01, 2024-01-02, 2024-01-03, 2024-01-04, 2024-01-05, 2024-01-06, 2024-01-07, 2024-01-08, 2024-01-09, 2024-01-10, 2024-01-11, 2024-01-12, 2024-01-13, 2024-01-14, 2024-01-15, 2024-01-16, 2024-01-17, 2024-01-18, 2024-01-19, 2024-01-20, 2024-01-21, 2024-01-22, 2024-01-23, 2024-01-24, 2024-01-25, 2024-01-26, 2024-01-27, 2024-01-28, 2024-01-29, 2024-01-30, 2024-01-31, 2024-02-01, 2024-02-02, 2024-02-03, 2024-02-04, 2024-02-05, 2024-02-06, 2024-02-07, 2024-02-08, 2024-02-09)",2024-01-08
2024-02-09,2023-12-31,"List(2023-12-31, 2024-01-01, 2024-01-02, 2024-01-03, 2024-01-04, 2024-01-05, 2024-01-06, 2024-01-07, 2024-01-08, 2024-01-09, 2024-01-10, 2024-01-11, 2024-01-12, 2024-01-13, 2024-01-14, 2024-01-15, 2024-01-16, 2024-01-17, 2024-01-18, 2024-01-19, 2024-01-20, 2024-01-21, 2024-01-22, 2024-01-23, 2024-01-24, 2024-01-25, 2024-01-26, 2024-01-27, 2024-01-28, 2024-01-29, 2024-01-30, 2024-01-31, 2024-02-01, 2024-02-02, 2024-02-03, 2024-02-04, 2024-02-05, 2024-02-06, 2024-02-07, 2024-02-08, 2024-02-09)",2024-01-09


In [0]:
df.withColumn("date_seq", sequence(col("date2"), col("date1"), expr("interval 1 day")))\
    .withColumn('date', explode(col('date_seq')))\
        .drop('date_seq').orderBy(col('date')).display()

date1,date2,date
2024-02-09,2023-12-31,2023-12-31
2024-02-09,2023-12-31,2024-01-01
2024-02-09,2023-12-31,2024-01-02
2024-02-09,2023-12-31,2024-01-03
2024-02-09,2023-12-31,2024-01-04
2024-02-09,2023-12-31,2024-01-05
2024-02-09,2023-12-31,2024-01-06
2024-02-09,2023-12-31,2024-01-07
2024-02-09,2023-12-31,2024-01-08
2024-02-09,2023-12-31,2024-01-09


### 19. Find How Many Days Are Left Until the End of the Year

In [0]:
df.withColumn("last_date_of_year", expr("make_date(year(date1), 12, 31)")).display()

date1,date2,last_date_of_year
2024-02-09,2023-12-31,2024-12-31


In [0]:
df.withColumn("days_to_year_end", datediff(expr("make_date(year(date1), 12, 31)"), col("date1"))).display()

date1,date2,days_to_year_end
2024-02-09,2023-12-31,326


### 20. Round a Timestamp Column to the Nearest Hour

In [0]:
df.withColumn("rounded_hour", date_trunc("hour", col("date1"))).display()

date1,date2,rounded_hour
2024-02-09,2023-12-31,2024-02-09T00:00:00.000+0000
