In [1]:
import pyspark
from pyspark.sql import SparkSession
spark=SparkSession.builder\
    .appName('Test Windows')\
    .getOrCreate()

In [2]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
input_data = [ ['Data Structures', 'AIML', 48], \
    ['Data Structures', 'AIML', 34], \
    ['Data Structures', 'AIML', 43], \
    ['Machine Learning', 'BDA', 47], \
    ['Machine Learning', 'BDA', 47], \
    ['Data Structures', 'ES', 49], \
    ['Data Structures', 'ES', 24], \
    ['Data Structures', 'BDA', 45], \
    ['Data Structures', 'BDA', 49], \
    ['Data Structures', 'BDA', 48], \
    ['ABD', 'AIML', 47], \
    ['ABD', 'BDA', 42], \
    ['ABD', 'BDA', 37], \
    ['ABD', 'BDA', 41], \
    ['ABD', 'BDA', 43], \
    ['ABD', 'BDA', 40], \
    ['ABD', 'BDA', 49], \
    ['ABD', 'BDA', 37], \
    ['ABD', 'BDA', 29], \
    ['ABD', 'BDA', 46], \
    ['Machine Learning', 'BDA', 47], \
    ['Machine Learning', 'BDA', 42], \
    ['Machine Learning', 'BDA', 37], \
    ['Machine Learning', 'BDA', 47], \
    ['Data Structures', 'ES', 48], \
    ['Data Structures', 'ES', 49], \
    ['Data Structures', 'ES', 46], \
    ['Data Structures', 'ES', 44] ]

schema = "`Subject` STRING, `Program` STRING, `Marks` INTEGER"
prog_df = spark.createDataFrame(data = input_data, schema = schema)
prog_df.printSchema()
prog_df.show(50)

root
 |-- Subject: string (nullable = true)
 |-- Program: string (nullable = true)
 |-- Marks: integer (nullable = true)

+----------------+-------+-----+
|         Subject|Program|Marks|
+----------------+-------+-----+
| Data Structures|   AIML|   48|
| Data Structures|   AIML|   34|
| Data Structures|   AIML|   43|
|Machine Learning|    BDA|   47|
|Machine Learning|    BDA|   47|
| Data Structures|     ES|   49|
| Data Structures|     ES|   24|
| Data Structures|    BDA|   45|
| Data Structures|    BDA|   49|
| Data Structures|    BDA|   48|
|             ABD|   AIML|   47|
|             ABD|    BDA|   42|
|             ABD|    BDA|   37|
|             ABD|    BDA|   41|
|             ABD|    BDA|   43|
|             ABD|    BDA|   40|
|             ABD|    BDA|   49|
|             ABD|    BDA|   37|
|             ABD|    BDA|   29|
|             ABD|    BDA|   46|
|Machine Learning|    BDA|   47|
|Machine Learning|    BDA|   42|
|Machine Learning|    BDA|   37|
|Machine Learning|  

In [4]:
# Rank the subject within branch
winSpec = Window.partitionBy('Program', 'Subject').orderBy(col('Marks').desc())
branch_df = prog_df.withColumn('Rank', dense_rank().over(winSpec))
branch_df.show(50)

+----------------+-------+-----+----+
|         Subject|Program|Marks|Rank|
+----------------+-------+-----+----+
| Data Structures|    BDA|   49|   1|
| Data Structures|    BDA|   48|   2|
| Data Structures|    BDA|   45|   3|
|Machine Learning|    BDA|   47|   1|
|Machine Learning|    BDA|   47|   1|
|Machine Learning|    BDA|   47|   1|
|Machine Learning|    BDA|   47|   1|
|Machine Learning|    BDA|   42|   2|
|Machine Learning|    BDA|   37|   3|
| Data Structures|     ES|   49|   1|
| Data Structures|     ES|   49|   1|
| Data Structures|     ES|   48|   2|
| Data Structures|     ES|   46|   3|
| Data Structures|     ES|   44|   4|
| Data Structures|     ES|   24|   5|
|             ABD|    BDA|   49|   1|
|             ABD|    BDA|   46|   2|
|             ABD|    BDA|   43|   3|
|             ABD|    BDA|   42|   4|
|             ABD|    BDA|   41|   5|
|             ABD|    BDA|   40|   6|
|             ABD|    BDA|   37|   7|
|             ABD|    BDA|   37|   7|
|           

In [5]:
# Rank the subject across branch
winSpec = Window.partitionBy('Subject').orderBy(col('Marks').desc())
subject_df = prog_df.withColumn('Rank', rank().over(winSpec))
subject_df.show(50)

+----------------+-------+-----+----+
|         Subject|Program|Marks|Rank|
+----------------+-------+-----+----+
|             ABD|    BDA|   49|   1|
|             ABD|   AIML|   47|   2|
|             ABD|    BDA|   46|   3|
|             ABD|    BDA|   43|   4|
|             ABD|    BDA|   42|   5|
|             ABD|    BDA|   41|   6|
|             ABD|    BDA|   40|   7|
|             ABD|    BDA|   37|   8|
|             ABD|    BDA|   37|   8|
|             ABD|    BDA|   29|  10|
|Machine Learning|    BDA|   47|   1|
|Machine Learning|    BDA|   47|   1|
|Machine Learning|    BDA|   47|   1|
|Machine Learning|    BDA|   47|   1|
|Machine Learning|    BDA|   42|   5|
|Machine Learning|    BDA|   37|   6|
| Data Structures|     ES|   49|   1|
| Data Structures|    BDA|   49|   1|
| Data Structures|     ES|   49|   1|
| Data Structures|   AIML|   48|   4|
| Data Structures|    BDA|   48|   4|
| Data Structures|     ES|   48|   4|
| Data Structures|     ES|   46|   7|
| Data Struc

In [None]:
# Subject toppers across branch
subject_df.filter(col('Rank') <= 2).show()

In [None]:
# Subject toper within branch
branch_df.filter(col('Rank') <= 1).show()