### DATE FUNCTIONS

In [0]:
from pyspark.sql import functions as F
df = (
    spark.read.format("csv")
        .option("inferSchema", "true")
        .option("header", "true")
        .load("/Volumes/raw-data/banking/csv/Banking_Database.csv")
)

df.printSchema()
df.display(5)

In [0]:
from pyspark.sql import functions as F

# Extract year, month, day
display(
    df.select(
        "Transaction Date",
        F.year("Transaction Date").alias("Year"),
        F.month("Transaction Date").alias("Month"),
        F.dayofmonth("Transaction Date").alias("Day")
    )
)

# Add days to a date
display(
    df.select(
        "Date Of Account Opening",
        F.date_add("Date Of Account Opening", 7).alias("OpenDate_Plus7Days")
    )
)

# Subtract days from a date
display(
    df.select(
        "Last Transaction Date",
        F.date_sub("Last Transaction Date", 3).alias("LastTxn_Minus3Days")
    )
)

# Calculate difference in days between two dates
display(
    df.select(
        "Last Transaction Date",
        "Date Of Account Opening",
        F.datediff(
            "Last Transaction Date",
            "Date Of Account Opening"
        ).alias("Days_Between")
    )
)

# Get current date and timestamp
display(
    df.select(
        F.current_date().alias("Current_Date"),
        F.current_timestamp().alias("Current_Timestamp")
    )
)

# Format date as string
display(
    df.select(
        "Transaction Date",
        F.date_format("Transaction Date", "yyyy/MM/dd").alias("TxnDate_Formatted")
    )
)

# Truncate date to month/year
display(
    df.select(
        "Transaction Date",
        F.trunc("Transaction Date", "month").alias("TxnDate_Month"),
        F.trunc("Transaction Date", "year").alias("TxnDate_Year")
    )
)

# Extract week of year and day of week
display(
    df.select(
        "Transaction Date",
        F.weekofyear("Transaction Date").alias("WeekOfYear"),
        F.dayofweek("Transaction Date").alias("DayOfWeek")
    )
)

In [0]:
from pyspark.sql import functions as F

display(
    df.select(
        "Transaction Date",
        F.dayname("Transaction Date").alias("Weekday_Name")
    )
)

In [0]:
from pyspark.sql import functions as F

display(
    df.select(
        "Transaction Date",
        F.weekday("Transaction Date").alias("Weekday_Num")
    )
)