In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructField, StructType,IntegerType,FloatType
from pyspark.sql.functions import col,dense_rank
from pyspark.sql.window import Window

spark = SparkSession.builder.appName("app").master("local[3]").getOrCreate()

In [0]:
schema = StructType([
    StructField("id",IntegerType(),False),
    StructField("score",FloatType(),False)
])
data = [
( 1  , 3.50 ) ,
( 2  , 3.65 ) ,
( 3  , 4.00 ) ,
( 4  , 3.85 ) ,
( 5  , 4.00 ) ,
( 6  , 3.65 ) 
]

scores = spark.createDataFrame(data,schema)
scores.show()

+---+-----+
| id|score|
+---+-----+
|  1|  3.5|
|  2| 3.65|
|  3|  4.0|
|  4| 3.85|
|  5|  4.0|
|  6| 3.65|
+---+-----+



In [0]:
# Write a solution to find the rank of the scores. The ranking should be calculated according to the following rules:
# * The scores should be ranked from the highest to the lowest.
# * If there is a tie between two scores, both should have the same ranking.
# * After a tie, the next ranking number should be the next consecutive integer value. In other words, there should be no holes between ranks.
# Return the result table ordered by score in descending order.

window_spec = Window.orderBy(col("score").desc())
scores.withColumn("rank",dense_rank().over(window_spec)).select("score","rank").show()

+-----+----+
|score|rank|
+-----+----+
|  4.0|   1|
|  4.0|   1|
| 3.85|   2|
| 3.65|   3|
| 3.65|   3|
|  3.5|   4|
+-----+----+



In [0]:
scores.createOrReplaceTempView("scores")
spark.sql("""select score, dense_rank() over(order by score desc) `rank` from scores""").show()

+-----+----+
|score|rank|
+-----+----+
|  4.0|   1|
|  4.0|   1|
| 3.85|   2|
| 3.65|   3|
| 3.65|   3|
|  3.5|   4|
+-----+----+



In [0]:
spark.stop()