#Zadanie 1 i Zadanie 2

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import avg, count, min, max, sum, row_number, lag, lead, first, last
from pyspark.sql.window import Window
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, DateType
from datetime import date

data = [
    ("A1", date(2024, 1, 1), 100.0),
    ("A1", date(2024, 1, 2), 150.0),
    ("A1", date(2024, 1, 3), 200.0),
    ("A1", date(2024, 1, 4), 250.0),
    ("A2", date(2024, 1, 1), 300.0),
    ("A2", date(2024, 1, 2), 350.0),
    ("A2", date(2024, 1, 3), 400.0),
]

schema = StructType([
    StructField("AccountId", StringType(), True),
    StructField("TranDate", DateType(), True),
    StructField("TranAmt", DoubleType(), True),
])

df = spark.createDataFrame(data, schema)
window_slide = Window.partitionBy("AccountId").orderBy("TranDate").rowsBetween(-2, 0)
window_order = Window.partitionBy("AccountId").orderBy("TranDate")

df_with_windows = df.select(
    "*",
    avg("TranAmt").over(window_slide).alias("SlideAvg"),
    count("*").over(window_slide).alias("SlideQty"),
    min("TranAmt").over(window_slide).alias("SlideMin"),
    max("TranAmt").over(window_slide).alias("SlideMax"),
    sum("TranAmt").over(window_slide).alias("SlideTotal"),
    row_number().over(window_order).alias("RN"),
    lag("TranAmt", 1).over(window_order).alias("PrevTranAmt"),
    lead("TranAmt", 1).over(window_order).alias("NextTranAmt"),
    first("TranAmt").over(window_order).alias("FirstTranAmt"),
    last("TranAmt").over(window_order).alias("LastTranAmt"),
    row_number().over(window_order).alias("RowNumber")
)

df_with_windows.show()

+---------+----------+-------+--------+--------+--------+--------+----------+---+-----------+-----------+------------+-----------+---------+
|AccountId|  TranDate|TranAmt|SlideAvg|SlideQty|SlideMin|SlideMax|SlideTotal| RN|PrevTranAmt|NextTranAmt|FirstTranAmt|LastTranAmt|RowNumber|
+---------+----------+-------+--------+--------+--------+--------+----------+---+-----------+-----------+------------+-----------+---------+
|       A1|2024-01-01|  100.0|   100.0|       1|   100.0|   100.0|     100.0|  1|       null|      150.0|       100.0|      100.0|        1|
|       A1|2024-01-02|  150.0|   125.0|       2|   100.0|   150.0|     250.0|  2|      100.0|      200.0|       100.0|      150.0|        2|
|       A1|2024-01-03|  200.0|   150.0|       3|   100.0|   200.0|     450.0|  3|      150.0|      250.0|       100.0|      200.0|        3|
|       A1|2024-01-04|  250.0|   200.0|       3|   150.0|   250.0|     600.0|  4|      200.0|       null|       100.0|      250.0|        4|
|       A2|20