In [None]:
!pip install pandas
!pip install pyspark
!pip install pyarrow

In [None]:
from pyspark.sql import SparkSession
 
# Building the SparkSession and name 
# it :'pandas to spark'
spark = SparkSession.builder.appName(
  "pandas to spark").getOrCreate()

spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")

In [3]:
import pandas as pd

data = [[1, 'Joe', 85000, 1], [2, 'Henry', 80000, 2], [3, 'Sam', 60000, 2], [4, 'Max', 90000, 1], [5, 'Janet', 69000, 1], [6, 'Randy', 85000, 1], [7, 'Will', 70000, 1]]
employee = pd.DataFrame(data, columns=['id', 'name', 'salary', 'departmentId']).astype({'id':'Int64', 'name':'object', 'salary':'Int64', 'departmentId':'Int64'})
data = [[1, 'IT'], [2, 'Sales']]
department = pd.DataFrame(data, columns=['id', 'name']).astype({'id':'Int64', 'name':'object'})

In [4]:
employee = spark.createDataFrame(employee)
employee.show()

department = spark.createDataFrame(department)
department.show()


+---+-----+------+------------+
| id| name|salary|departmentId|
+---+-----+------+------------+
|  1|  Joe| 85000|           1|
|  2|Henry| 80000|           2|
|  3|  Sam| 60000|           2|
|  4|  Max| 90000|           1|
|  5|Janet| 69000|           1|
|  6|Randy| 85000|           1|
|  7| Will| 70000|           1|
+---+-----+------+------------+

+---+-----+
| id| name|
+---+-----+
|  1|   IT|
|  2|Sales|
+---+-----+



In [None]:
from pyspark.sql.functions import dense_rank, desc
from pyspark.sql.window import Window

window = Window.partitionBy('departmentId').orderBy(desc('salary'))

employee \
    .withColumn('dense_rank', dense_rank().over(window)) \
    .where('dense_rank <= 3 and dense_rank >= 1') \
    .withColumnRenamed('name', 'employee') \
    .join(department.withColumnRenamed('id', 'departmentId') \
          .withColumnRenamed('name', 'department'),
          'departmentId',
          'inner'
    ) \
    .select(['department', 'employee', 'salary']) \
    .show()

+----------+--------+------+
|department|employee|salary|
+----------+--------+------+
|        IT|     Max| 90000|
|        IT|     Joe| 85000|
|        IT|   Randy| 85000|
|        IT|    Will| 70000|
|     Sales|   Henry| 80000|
|     Sales|     Sam| 60000|
+----------+--------+------+

