###Problem Statement: 
Find Employees with Minimum and Maximum Salaries in Each Department
Given a dataset containing employee names, department IDs, and their corresponding salaries, the goal is to determine the following for each department:

The name of the employee with the minimum salary.
The name of the employee with the maximum salary.
Input:
A table or dataset containing the following columns:

emp_name (String): Name of the employee.

dept_id (Integer): ID of the department the employee belongs to.

salary (Integer): Salary of the employee.

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from pyspark.sql import functions as F
from pyspark.sql import Window

# Create Spark session
spark = SparkSession.builder.appName("CreateTableExample").getOrCreate()

# Define schema
schema = StructType([
    StructField("emp_name", StringType(), True),
    StructField("dept_id", IntegerType(), True),
    StructField("salary", IntegerType(), True)
])

# Create DataFrame with data
data = [
    ("Siva", 1, 30000),
    ("Ravi", 2, 40000),
    ("Prasad", 1, 50000),
    ("Sai", 2, 20000),
    ("Anna", 2, 10000)
]

emps_df = spark.createDataFrame(data, schema)

# Show the DataFrame content
emps_df.display()



emp_name,dept_id,salary
Siva,1,30000
Ravi,2,40000
Prasad,1,50000
Sai,2,20000
Anna,2,10000


In [0]:
emps_df.createOrReplaceTempView('emps_wipro')

#Spark SQL

In [0]:
%sql
with t as (
  select
    dept_id,
    first_value(emp_name) over(
      partition by dept_id
      order by
        salary
    ) as min_sal_empname,
    last_value(emp_name) over(
      partition by dept_id
      order by
        salary rows between unbounded preceding
        and unbounded following
    ) as max_sal_empname
  from
    emps_wipro
)
select
  distinct dept_id,
  min_sal_empname,
  max_sal_empname
from
  t;

dept_id,min_sal_empname,max_sal_empname
1,Siva,Prasad
2,Anna,Ravi


#Pyspark

In [0]:
# Create window specification partitioned by dept_id
window_spec = Window.partitionBy("dept_id").orderBy("salary")

# Get employee with minimum salary in each department
min_sal_df = emps_df.withColumn("rank", F.row_number().over(window_spec)) \
                    .filter(F.col("rank") == 1) \
                    .select("dept_id", F.col("emp_name").alias("min_sal_empname"))

# Get employee with maximum salary in each department
max_sal_df = emps_df.withColumn("rank", F.row_number().over(window_spec.orderBy(F.col("salary").desc()))) \
                    .filter(F.col("rank") == 1) \
                    .select("dept_id", F.col("emp_name").alias("max_sal_empname"))

# Join the two DataFrames on dept_id
result_df = min_sal_df.join(max_sal_df, on="dept_id", how="inner")

# Show the result
result_df.display()

dept_id,min_sal_empname,max_sal_empname
1,Siva,Prasad
2,Anna,Ravi
