In [0]:
from pyspark.sql.functions import *

### 1. Convert a String Column (yyyy-MM-dd) to a Date Type Column No

In [0]:
data = [("2024-02-09",), ("2023-12-31",)]
df = spark.createDataFrame(data, ["date_str"])
df.display()

In [0]:
df = df.withColumn("date_col", to_date(col('date_str'),'yyyy-MM-dd'))
df.display()

### 2. Extract Year, Month, and Day from a Date Column

In [0]:
df.withColumn('Year', year(col('date_col')))\
    .withColumn('Month', month(col('date_col')))\
       .withColumn('Day', dayofmonth(col('date_col'))).display()

### 3. Find the Difference (in Days) Between Two Date Columns

In [0]:
data = [("2024-02-09", "2023-12-31")]
df = spark.createDataFrame(data, ["date1", "date2"])\
         .withColumn("date1", to_date(col("date1"), "yyyy-MM-dd"))\
         .withColumn("date2", to_date(col("date2"), "yyyy-MM-dd"))
df.display()

In [0]:
df.withColumn("days_diff", datediff(col('date1'),col('date2'))).display()

### 4. Add or Subtract n Days to/from a Date Column

In [0]:
df.withColumn('4daysadd', date_add(col('date1'),4))\
    .withColumn('4dayssub', date_sub(col('date1'),4)).display()

### 5. Convert a Timestamp Column to a Date Column

In [0]:
df.withColumn("date_only", to_date(col("date1"))).display() # this will only give us the date not the time part

### 6. Find the First and Last Day of the Month

In [0]:
df.withColumn('firstdateofmonth', trunc(col('date1'),'month'))\
    .withColumn('lastdate', last_day(col('date1')))\
        .withColumn('firstdateofyear', trunc(col('date1'),'year')).display()

### 7. Get Current Date and Current Timestamp

In [0]:
df.withColumn('current_date',current_date())\
    .withColumn('current_timestamp', current_timestamp()).display()

### 8. Calculate Months Between Two Dates

In [0]:
df.withColumn("months_diff", round(months_between(col("date1"), col("date2")),2)).display()

### 9. Convert a Date Column to a Specific Format (MM/dd/yyyy)

In [0]:
df.withColumn('formatted_date',date_format(col('date1'),'MM/dd/yyyy')).display()

### 10. Extract the Week Number from a Date Column

In [0]:
df.withColumn('weekofyear', weekofyear(col('date1'))).display()

### 11. Find the Day of the Week (Monday, Tuesday, etc.) from a Date Column

In [0]:
df.withColumn("day_of_week", date_format(col("date1"),'EEEE')).display()

### 12. Check if a Given Year is a Leap Year

In [0]:
df.withColumn("is_leap_year", expr("year(date1) % 4 = 0 AND (year(date1) % 100 <> 0 OR year(date1) % 400 = 0)")).display()

### 13. Filter Records Based on a Specific Date Range

In [0]:
df.filter((col("date1") >= "2024-01-01") & (col("date1") <= "2024-12-31")).display()

### 14. Find the Difference (in Hours, Minutes, Seconds) Between Two Timestamp Columns

In [0]:
df.withColumn("time_diff_seconds", unix_timestamp(col("date1")) - unix_timestamp(col("date2")))\
    .withColumn("time_diff_minutes", col('time_diff_seconds')/60)\
        .withColumn("time_diff_hours", col('time_diff_minutes')/60).display()

### 15. Convert UTC Timestamp to a Different Timezone

In [0]:
df.withColumn("date_in_EST", from_utc_timestamp(col("date1"), "America/New_York")).display()

### 16. Fill Null Values in a Date Column with the Current Date

In [0]:
df.withColumn("datenew", when(col("date1").isNull(), current_date()).otherwise(col("date1"))).display()

### 17. Find the First Monday (or Any Specific Weekday) After a Given Date

In [0]:
df.withColumn("next_monday", expr("next_day(date1,'Monday')"))\
    .withColumn("next_friday", expr("next_day(date1,'Friday')")).display()

### 18. Generate a Sequence of Dates Between Two Given Dates

In [0]:
df.withColumn("date_seq", sequence(col("date2"), col("date1"), expr("interval 1 day"))).display()

In [0]:
df.withColumn("date_seq", sequence(col("date2"), col("date1"), expr("interval 1 day")))\
    .withColumn('date', explode(col('date_seq'))).display()

In [0]:
df.withColumn("date_seq", sequence(col("date2"), col("date1"), expr("interval 1 day")))\
    .withColumn('date', explode(col('date_seq')))\
        .drop('date_seq').orderBy(col('date')).display()

### 19. Find How Many Days Are Left Until the End of the Year

In [0]:
df.withColumn("last_date_of_year", expr("make_date(year(date1), 12, 31)")).display()

In [0]:
df.withColumn("days_to_year_end", datediff(expr("make_date(year(date1), 12, 31)"), col("date1"))).display()

### 20. Round a Timestamp Column to the Nearest Hour

In [0]:
df.withColumn("rounded_hour", date_trunc("hour", col("date1"))).display()