In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum, month, year, lit
from datetime import datetime

spark = SparkSession.builder.appName("AttendanceSummary").getOrCreate()

current_date = datetime.now()
current_year = current_date.year
current_month = current_date.month


delta_table_path = "/delta/attendance_data_cleaned"

try: 
    attendance_df = spark.read.format("delta").load(delta_table_path)

    attendance_current_month_df = attendance_df.filter(
        (year(col("Date")) == lit(current_year)) &
        (month(col("Date")) == lit(current_month)))

    total_hours_df = attendance_current_month_df.groupBy("EmployeeID").agg(
        sum("HoursWorked").alias("TotalHoursWorked"))

    print("Total Hours Worked by Each Employee for Current Month:")
    total_hours_df.show()

    overtime_df = attendance_current_month_df.filter(col("HoursWorked") > 8).select("EmployeeID", "Date", "HoursWorked")

    print("Employees Who Worked Overtime:")
    overtime_df.show()

    total_hours_df.write.format("delta").mode("overwrite").save("/delta/attendance_summary")

    overtime_df.write.format("delta").mode("overwrite").save("/delta/attendance_overtime")
    
except AnalysisException:
    print("Delta Table not found")
