In [0]:
"""
Generate salary report
Input 1
+------+--------+-----------+
|emp_id|emp_name|base_salary|
+------+--------+-----------+
|     1|   Rohan|       5000|
|     2|    Alex|       6000|
|     3|  Maryam|       7000|
+------+--------+-----------+

Input 2
+---+---------+----------+
| id|   income|percentage|
+---+---------+----------+
|  1|    Basic|       100|
|  2|Allowance|         4|
|  3|   Others|         6|
+---+---------+----------+

Input 3
+---+---------+----------+
| id|deduction|percentage|
+---+---------+----------+
|  1|Insurance|         5|
|  2|   Health|         6|
|  3|    House|         4|
+---+---------+----------+


Output
+--------+------+---------+------+---------+------+-----+------+----------------+-------+
|emp_name| Basic|Allowance|Others|Insurance|Health|House| Gross|Total_deductions|Net_Pay|
+--------+------+---------+------+---------+------+-----+------+----------------+-------+
|   Rohan|5000.0|    200.0| 300.0|    250.0| 300.0|200.0|5500.0|           750.0| 4750.0|
|    Alex|6000.0|    240.0| 360.0|    300.0| 360.0|240.0|6600.0|           900.0| 5700.0|
|  Maryam|7000.0|    280.0| 420.0|    350.0| 420.0|280.0|7700.0|          1050.0| 6650.0|
+--------+------+---------+------+---------+------+-----+------+----------------+-------+

"""


salary_data = [
    (1, 'Rohan', 5000),
    (2, 'Alex', 6000),
    (3, 'Maryam', 7000)
]

salary_schema = "emp_id int, emp_name string, base_salary int"
salary_df = spark.createDataFrame(data = salary_data, schema = salary_schema)
salary_df.show()

income_data = [
    (1,'Basic', 100),
    (2,'Allowance', 4),
    (3,'Others', 6)
]
income_schema = "id int, income string, percentage int"
income_df = spark.createDataFrame(data = income_data, schema = income_schema)
income_df.show()


deduction_data = [
    (1,'Insurance', 5),
    (2,'Health', 6),
    (3,'House', 4)
]
deduction_schema = "id int, deduction string, percentage int"
deduction_df = spark.createDataFrame(data = deduction_data, schema = deduction_schema)
deduction_df.show()

+------+--------+-----------+
|emp_id|emp_name|base_salary|
+------+--------+-----------+
|     1|   Rohan|       5000|
|     2|    Alex|       6000|
|     3|  Maryam|       7000|
+------+--------+-----------+

+---+---------+----------+
| id|   income|percentage|
+---+---------+----------+
|  1|    Basic|       100|
|  2|Allowance|         4|
|  3|   Others|         6|
+---+---------+----------+

+---+---------+----------+
| id|deduction|percentage|
+---+---------+----------+
|  1|Insurance|         5|
|  2|   Health|         6|
|  3|    House|         4|
+---+---------+----------+



In [0]:
from pyspark.sql.functions import *

salary_income_df = salary_df.join(income_df, lit(1) == lit(1), "inner") \
    .withColumn("amount", (col("base_salary") * col("percentage"))/ 100.0) \
    .select("emp_id", "emp_name", col("income").alias("trans_type"), "amount")

salary_income_df.show()

+------+--------+----------+------+
|emp_id|emp_name|trans_type|amount|
+------+--------+----------+------+
|     1|   Rohan|     Basic|5000.0|
|     1|   Rohan| Allowance| 200.0|
|     1|   Rohan|    Others| 300.0|
|     2|    Alex|     Basic|6000.0|
|     2|    Alex| Allowance| 240.0|
|     2|    Alex|    Others| 360.0|
|     3|  Maryam|     Basic|7000.0|
|     3|  Maryam| Allowance| 280.0|
|     3|  Maryam|    Others| 420.0|
+------+--------+----------+------+



In [0]:
from pyspark.sql.functions import *

salary_deductions_df = salary_df.join(deduction_df, lit(1) == lit(1), "inner") \
    .withColumn("amount", (col("base_salary") * col("percentage")) / 100.0) \
    .select("emp_id", "emp_name", col("deduction").alias("trans_type"), "amount")

salary_deductions_df.show()

+------+--------+----------+------+
|emp_id|emp_name|trans_type|amount|
+------+--------+----------+------+
|     1|   Rohan| Insurance| 250.0|
|     1|   Rohan|    Health| 300.0|
|     1|   Rohan|     House| 200.0|
|     2|    Alex| Insurance| 300.0|
|     2|    Alex|    Health| 360.0|
|     2|    Alex|     House| 240.0|
|     3|  Maryam| Insurance| 350.0|
|     3|  Maryam|    Health| 420.0|
|     3|  Maryam|     House| 280.0|
+------+--------+----------+------+



In [0]:
salary_income_deductions_df = salary_income_df.union(salary_deductions_df)

salary_income_deductions_df.show()

+------+--------+----------+------+
|emp_id|emp_name|trans_type|amount|
+------+--------+----------+------+
|     1|   Rohan|     Basic|5000.0|
|     1|   Rohan| Allowance| 200.0|
|     1|   Rohan|    Others| 300.0|
|     2|    Alex|     Basic|6000.0|
|     2|    Alex| Allowance| 240.0|
|     2|    Alex|    Others| 360.0|
|     3|  Maryam|     Basic|7000.0|
|     3|  Maryam| Allowance| 280.0|
|     3|  Maryam|    Others| 420.0|
|     1|   Rohan| Insurance| 250.0|
|     1|   Rohan|    Health| 300.0|
|     1|   Rohan|     House| 200.0|
|     2|    Alex| Insurance| 300.0|
|     2|    Alex|    Health| 360.0|
|     2|    Alex|     House| 240.0|
|     3|  Maryam| Insurance| 350.0|
|     3|  Maryam|    Health| 420.0|
|     3|  Maryam|     House| 280.0|
+------+--------+----------+------+



In [0]:
salary_income_deductions_df \
    .withColumn("Basic", when(col("trans_type") == lit("Basic"), col("amount")).otherwise(lit(0))) \
    .withColumn("Allowance", when(col("trans_type") == lit("Allowance"), col("amount")).otherwise(lit(0))) \
    .withColumn("Others", when(col("trans_type") == lit("Others"), col("amount")).otherwise(lit(0))) \
    .withColumn("Insurance", when(col("trans_type") == lit("Insurance"), col("amount")).otherwise(lit(0))) \
    .withColumn("Health", when(col("trans_type") == lit("Health"), col("amount")).otherwise(lit(0))) \
    .withColumn("House", when(col("trans_type") == lit("House"), col("amount")).otherwise(lit(0))) \
    .groupBy(col("emp_name")) \
    .agg(max(col("Basic")).alias("Basic"), max(col("Allowance")).alias("Allowance"), max(col("Others")).alias("Others"), max(col("Insurance")).alias("Insurance"), max(col("Health")).alias("Health"), max(col("House")).alias("House")) \
    .withColumn("Gross", col("Basic") + col("Allowance") + col("Others")) \
    .withColumn("Total_deductions", col("Insurance") + col("Health") + col("House")) \
    .withColumn("Net_Pay", col("Gross") - col("Total_deductions")) \
    .show()

+--------+------+---------+------+---------+------+-----+------+----------------+-------+
|emp_name| Basic|Allowance|Others|Insurance|Health|House| Gross|Total_deductions|Net_Pay|
+--------+------+---------+------+---------+------+-----+------+----------------+-------+
|   Rohan|5000.0|    200.0| 300.0|    250.0| 300.0|200.0|5500.0|           750.0| 4750.0|
|    Alex|6000.0|    240.0| 360.0|    300.0| 360.0|240.0|6600.0|           900.0| 5700.0|
|  Maryam|7000.0|    280.0| 420.0|    350.0| 420.0|280.0|7700.0|          1050.0| 6650.0|
+--------+------+---------+------+---------+------+-----+------+----------------+-------+

