# Window Functions
- useful when you need to perform calculations across a specific window of data

In [87]:
from pyspark.sql import SparkSession
from pyspark.sql.window import Window
from pyspark.sql.functions import rank, col

spark = SparkSession.builder.appName("WindowFunctions").getOrCreate()

df = spark.read.csv('./resources/7_employee.csv', header=True, inferSchema=True)
df.show()

window_specification = Window.partitionBy('department').orderBy(col('salary').desc())

df_ranked = df.withColumn('rank', rank().over(window_specification))
df_ranked.show()

+-----------+---------+----------+------+
|employee_id|     name|department|salary|
+-----------+---------+----------+------+
|          1|     John|        HR| 55000|
|          2|     Jane|   Finance| 80000|
|          3|    James|        HR| 60000|
|          4|     Anna|   Finance| 90000|
|          5|      Bob| Marketing| 75000|
|          6|    Emily| Marketing| 82000|
|          7|    David|        HR| 65000|
|          8|   George|   Finance| 95000|
|          9|   Olivia| Marketing| 68000|
|         10|     Liam|        HR| 54000|
|         11|   Sophia|   Finance| 85000|
|         12|    Lucas| Marketing| 78000|
|         13| Isabella|   Finance| 92000|
|         14|    Mason|        HR| 63000|
|         15|   Amelia| Marketing| 79000|
|         16|    Ethan|        HR| 67000|
|         17|  Abigail|   Finance| 87000|
|         18|    Aiden|        HR| 56000|
|         19|Charlotte| Marketing| 81000|
|         20|     Jack|        HR| 69000|
+-----------+---------+----------+

### Join With Aggregations
 - perform aggregations on joined DataFrames

In [92]:
from pyspark.sql import SparkSession
from pyspark.sql.window import Window
from pyspark.sql.functions import rank, col

spark = SparkSession.builder.appName('TestApp').getOrCreate()

df = spark.read.csv('./resources/7_employee.csv', header=True, inferSchema=True)

window_spec = Window.partitionBy('department').orderBy(col('salary'))
df_ranked1 = df.withColumn('rank', rank().over(window_spec))
df_ranked1.show()

+-----------+---------+----------+------+----+
|employee_id|     name|department|salary|rank|
+-----------+---------+----------+------+----+
|          2|     Jane|   Finance| 80000|   1|
|         11|   Sophia|   Finance| 85000|   2|
|         17|  Abigail|   Finance| 87000|   3|
|          4|     Anna|   Finance| 90000|   4|
|         13| Isabella|   Finance| 92000|   5|
|          8|   George|   Finance| 95000|   6|
|         10|     Liam|        HR| 54000|   1|
|          1|     John|        HR| 55000|   2|
|         18|    Aiden|        HR| 56000|   3|
|          3|    James|        HR| 60000|   4|
|         14|    Mason|        HR| 63000|   5|
|          7|    David|        HR| 65000|   6|
|         16|    Ethan|        HR| 67000|   7|
|         20|     Jack|        HR| 69000|   8|
|          9|   Olivia| Marketing| 68000|   1|
|          5|      Bob| Marketing| 75000|   2|
|         12|    Lucas| Marketing| 78000|   3|
|         15|   Amelia| Marketing| 79000|   4|
|         19|