### To calculate the percentage of total salary that each employee contributes to their respective department.

In [0]:
from pyspark.sql import Window
from pyspark.sql.functions import sum, col, round

In [0]:
# sample data
data = [
    (1, "Alice", "HR", 5000),
    (2, "Bob", "HR", 7000),
    (3, "Charlie", "IT", 10000),
    (4, "David", "IT", 8000),
    (5, "Eve", "IT", 6000)
]

columns = ["Employee_ID", "Name", "Department", "Salary"]

df = spark.createDataFrame(data, columns)
df.show()

+-----------+-------+----------+------+
|Employee_ID|   Name|Department|Salary|
+-----------+-------+----------+------+
|          1|  Alice|        HR|  5000|
|          2|    Bob|        HR|  7000|
|          3|Charlie|        IT| 10000|
|          4|  David|        IT|  8000|
|          5|    Eve|        IT|  6000|
+-----------+-------+----------+------+



In [0]:
# find the total salary for each department
window_specs = Window.partitionBy("Department")

grouped_df = df.withColumn("Total_Salary", sum("Salary").over(window_specs))
grouped_df.show()

+-----------+-------+----------+------+------------+
|Employee_ID|   Name|Department|Salary|Total_Salary|
+-----------+-------+----------+------+------------+
|          1|  Alice|        HR|  5000|       12000|
|          2|    Bob|        HR|  7000|       12000|
|          3|Charlie|        IT| 10000|       24000|
|          4|  David|        IT|  8000|       24000|
|          5|    Eve|        IT|  6000|       24000|
+-----------+-------+----------+------+------------+



In [0]:
# calculate percentage of total salary that each employee 
result_df = grouped_df.withColumn("Salary_Percentage",  round((col("Salary") / col("Total_Salary")) * 100, 2))
result_df.show()

+-----------+-------+----------+------+------------+-----------------+
|Employee_ID|   Name|Department|Salary|Total_Salary|Salary_Percentage|
+-----------+-------+----------+------+------------+-----------------+
|          1|  Alice|        HR|  5000|       12000|            41.67|
|          2|    Bob|        HR|  7000|       12000|            58.33|
|          3|Charlie|        IT| 10000|       24000|            41.67|
|          4|  David|        IT|  8000|       24000|            33.33|
|          5|    Eve|        IT|  6000|       24000|             25.0|
+-----------+-------+----------+------+------------+-----------------+

