In [0]:
from pyspark.sql.functions import to_date, date_format, split

# Example data
data = [
    (1, "School Re-opens 21-05-2024"),
    (2, "Went for a trip 18-08-2023"),
    (3, "No event 12-12-2022"),
    (4, "No dates are mentioned here"),
]

# Create DataFrame
df = spark.createDataFrame(data, ["id", "event"])
df.display()

id,event
1,School Re-opens 21-05-2024
2,Went for a trip 18-08-2023
3,No event 12-12-2022
4,No dates are mentioned here


In [0]:
df.display()

id,event,extracted_date
1,School Re-opens 21-05-2024,"List(School, Re-opens, 21-05-2024)"
2,Went for a trip 18-08-2023,"List(Went, for, a, trip, 18-08-2023)"
3,No event 12-12-2022,"List(No, event, 12-12-2022)"
4,No dates are mentioned here,"List(No, dates, are, mentioned, here)"


In [0]:
from pyspark.sql.functions import regexp_extract, to_date, date_format, concat_ws, lit

# Extract date from event column using regex
date_pattern = r"(\d{2}-\d{2}-\d{4})"
df_with_date = df.withColumn("date", regexp_extract("event", date_pattern, 1))

# Convert the extracted date into a valid date format
df_with_date = df_with_date.withColumn(
    "formatted_date", to_date(df_with_date["date"], "dd-MM-yyyy")
)

# Get the day of the week from the formatted date
df_with_day = df_with_date.withColumn(
    "day_of_week", date_format("formatted_date", "EEEE")
)

# Append the day of the week with the original event column
df_final = df_with_day.withColumn(
    "updated_sentence",
    concat_ws(
        " ", df_with_day["event"], lit("("), df_with_day["day_of_week"], lit(")")
    ),
)

# Select the required columns and display the result
df_final.select("id", "updated_sentence").display()

id,updated_sentence
1,School Re-opens 21-05-2024 ( Tuesday )
2,Went for a trip 18-08-2023 ( Friday )
3,No event 12-12-2022 ( Monday )
4,No dates are mentioned here ( )


In [0]:
from datetime import datetime

# List of events with dates in string format
events = [
    ("School Re-opens", "21-05-2024"),
    ("Went for a trip", "18-08-2023"),
    ("No event", "12-12-2022"),
]

# Function to add day of the week to each event
def add_day_of_week(events):
    result = []
    for event, date_str in events:
        # Convert the string date to datetime object
        date_obj = datetime.strptime(date_str, "%d-%m-%Y")
        # Get the day of the week (e.g., Monday, Tuesday)
        day_of_week = date_obj.strftime("%A")
        result.append(f"{event} {date_str} ({day_of_week})")
    return result


# Output the events with the day of the week
updated_events = add_day_of_week(events)
for event in updated_events:
    print(event)

School Re-opens 21-05-2024 (Tuesday)
Went for a trip 18-08-2023 (Friday)
No event 12-12-2022 (Monday)


In [0]:
from datetime import datetime

def is_valid_date(date_str, date_format="%d-%m-%Y"):
    try:
        # Try to parse the date using the provided format
        datetime.strptime(date_str, date_format)
        return True
    except ValueError:
        # If parsing fails, it's not a valid date
        return False

# Example usage
date_str = "04-12-2024"
print(is_valid_date(date_str))  # True if the date is valid, False if not


True
