In [0]:
from pyspark.sql import Row

data = [
    Row(date="2022-01", item="apple", sales=100),
    Row(date="2022-01", item="banana", sales=200),
    Row(date="2022-01", item="grapes", sales=200),
    Row(date="2022-01", item="orange", sales=300),
    Row(date="2022-02", item="apple", sales=150),
    Row(date="2022-02", item="banana", sales=250),
    Row(date="2022-02", item="orange", sales=350),
    Row(date="2022-02", item="grapes", sales=420),
    Row(date="2022-03", item="apple", sales=200),
    Row(date="2022-03", item="banana", sales=300),
    Row(date="2022-03", item="grapes", sales=110),
    Row(date="2022-03", item="orange", sales=400),
]

# Create a DataFrame
df = spark.createDataFrame(data)

In [0]:
display(df)

In [0]:
from pyspark.sql.window import Window

In [0]:
from pyspark.sql.functions import row_number

windowSpec = Window.orderBy("date")
display(df.withColumn("row_number", row_number().over(windowSpec)))

In [0]:
from pyspark.sql.functions import rank

windowSpec = Window.orderBy("sales").partitionBy("date")
display(df.withColumn("rank", rank().over(windowSpec)))

In [0]:
from pyspark.sql.functions import dense_rank

windowSpec = Window.orderBy("sales").partitionBy("date")
display(df.withColumn("rank", dense_rank().over(windowSpec)))

In [0]:
from pyspark.sql.functions import percent_rank

windowSpec = Window.orderBy("sales").partitionBy("date")
display(df.withColumn("rank", percent_rank().over(windowSpec)))

In [0]:
from pyspark.sql.functions import lag, lead, col

windowSpec = Window.partitionBy("item").orderBy("date")

In [0]:
display(df.withColumn("prev_sales", lag(col("sales"), 1).over(windowSpec)))

In [0]:
display(df.withColumn("next_sales", lead(col("sales"), 1).over(windowSpec)))

In [0]:
display(
    df.withColumn(
        "sales_pct_change",
        (col("sales") - lag(col("sales"), 1).over(windowSpec))
        / lag(col("sales"), 1).over(windowSpec),
    )
)

In [0]:
data = [("Alice", 25, "NYC"),
        ("Bob", 30, "LA"),
        ("Charlie", 35, "Chicago"),
        ("Dave", 40, "Boston"),
        ("Eve", 45, "Seattle"),
        ("Poly", 24, "Seattle"),
         ("Hepty", 29, "Chicago")]

df = spark.createDataFrame(data, ["name", "age", "city"])

In [0]:
display(df)

In [0]:
from pyspark.sql.functions import desc

windowSpec = Window.orderBy(desc("age")).partitionBy("city")

In [0]:
from pyspark.sql.functions import first, last

display(df.select("*", first("name").over(windowSpec).alias("first_func")))

display(df.select("*", last("name").over(windowSpec).alias("first_func")))