In [0]:
"""
https://www.youtube.com/watch?v=u3W_Op3FTVA

Write sql to output details:
employeeid, employee_default_phone_number, total_entry, total_login, total_logout, last_login, last_logout
"""

from pyspark.sql.functions import *
from pyspark.sql.window import Window
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

employee_checkin_details = spark.createDataFrame([
  (1000 , 'login' , '2023-06-16 01:00:15.34'),
  (1000 , 'login' , '2023-06-16 02:00:15.34'),
  (1000 , 'login' , '2023-06-16 03:00:15.34'),
  (1000 , 'logout' , '2023-06-16 12:00:15.34'),
  (1001 , 'login' , '2023-06-16 01:00:15.34'),
  (1001 , 'login' , '2023-06-16 02:00:15.34'),
  (1001 , 'login' , '2023-06-16 03:00:15.34'),
  (1001 , 'logout' , '2023-06-16 12:00:15.34')
], ["employeeid" ,"entry_details", "timestamp_details"])
   
employee_details = spark.createDataFrame([
    (1001 ,9999 , 'false'),
    (1001 ,1111 , 'false'),
    (1001 ,2222 , 'true'),
    (1003 ,3333 , 'false')
], ["employeeid" , "phone_number" , "isdefault"])

employee_checkin_details = employee_checkin_details.withColumn("timestamp_details", to_timestamp(col("timestamp_details")))

employee_checkin_details.show(truncate=False)
employee_details.show()

+----------+-------------+----------------------+
|employeeid|entry_details|timestamp_details     |
+----------+-------------+----------------------+
|1000      |login        |2023-06-16 01:00:15.34|
|1000      |login        |2023-06-16 02:00:15.34|
|1000      |login        |2023-06-16 03:00:15.34|
|1000      |logout       |2023-06-16 12:00:15.34|
|1001      |login        |2023-06-16 01:00:15.34|
|1001      |login        |2023-06-16 02:00:15.34|
|1001      |login        |2023-06-16 03:00:15.34|
|1001      |logout       |2023-06-16 12:00:15.34|
+----------+-------------+----------------------+

+----------+------------+---------+
|employeeid|phone_number|isdefault|
+----------+------------+---------+
|      1001|        9999|    false|
|      1001|        1111|    false|
|      1001|        2222|     true|
|      1003|        3333|    false|
+----------+------------+---------+



In [0]:
employee_checkin_refined = employee_checkin_details \
    .withColumn("logins", when(col("entry_details") == "login", col("timestamp_details")).otherwise(None)) \
    .withColumn("logouts", when(col("entry_details") == "logout", col("timestamp_details")).otherwise(None))

employee_checkin_refined.show(truncate=False)

+----------+-------------+----------------------+----------------------+----------------------+
|employeeid|entry_details|timestamp_details     |logins                |logouts               |
+----------+-------------+----------------------+----------------------+----------------------+
|1000      |login        |2023-06-16 01:00:15.34|2023-06-16 01:00:15.34|null                  |
|1000      |login        |2023-06-16 02:00:15.34|2023-06-16 02:00:15.34|null                  |
|1000      |login        |2023-06-16 03:00:15.34|2023-06-16 03:00:15.34|null                  |
|1000      |logout       |2023-06-16 12:00:15.34|null                  |2023-06-16 12:00:15.34|
|1001      |login        |2023-06-16 01:00:15.34|2023-06-16 01:00:15.34|null                  |
|1001      |login        |2023-06-16 02:00:15.34|2023-06-16 02:00:15.34|null                  |
|1001      |login        |2023-06-16 03:00:15.34|2023-06-16 03:00:15.34|null                  |
|1001      |logout       |2023-06-16 12:

In [0]:
employee_checkin_refined.groupBy("employeeid").agg(
    count("*").alias("total_entry")
    ,count("logins").alias("total_logins")
    ,count("logouts").alias("total_logouts")
    ,max("logins").alias("latest_login")
    ,max("logouts").alias("latest_logouts")
).join(employee_details.filter(col("isdefault")=="true"), employee_checkin_refined["employeeid"]==employee_details["employeeid"], "left").drop(employee_details["employeeid"],employee_details["isdefault"]) \
.withColumnRenamed("phone_number", "employee_default_phone_number") \
.show(truncate=False)

+----------+-----------+------------+-------------+----------------------+----------------------+-----------------------------+
|employeeid|total_entry|total_logins|total_logouts|latest_login          |latest_logouts        |employee_default_phone_number|
+----------+-----------+------------+-------------+----------------------+----------------------+-----------------------------+
|1000      |4          |3           |1            |2023-06-16 03:00:15.34|2023-06-16 12:00:15.34|null                         |
|1001      |4          |3           |1            |2023-06-16 03:00:15.34|2023-06-16 12:00:15.34|2222                         |
+----------+-----------+------------+-------------+----------------------+----------------------+-----------------------------+

