🚀 Getting the Latest Transactions in Microsoft Fabric using PySpark 🧾🔥


Working with transaction data? It’s a common need to retrieve:

✅ The most recent transaction overall

✅ The latest transaction per customer

In [9]:
from pyspark.sql.functions import col, max
from pyspark.sql.types import StructType, StructField, IntegerType, DoubleType, TimestampType
import datetime


# Create sample DataFrame
data = [
    (1, 101, 250.0, datetime.datetime(2024, 6, 10, 12, 0)),
    (2, 102, 400.0, datetime.datetime(2024, 6, 11, 13, 30)),
    (3, 101, 300.0, datetime.datetime(2024, 6, 12, 9, 0)),
    (4, 103, 150.0, datetime.datetime(2024, 6, 9, 10, 15)),
    (5, 101, 500.0, datetime.datetime(2024, 6, 15, 17, 45)),
]

schema = StructType([
    StructField("txn_id", IntegerType(), True),
    StructField("customer_id", IntegerType(), True),
    StructField("amount", DoubleType(), True),
    StructField("txn_date", TimestampType(), True),
])

df = spark.createDataFrame(data, schema)

# ✅ Get Latest Transaction Overall
latest_txn = df.orderBy(col("txn_date").desc()).limit(1)
latest_txn.show()  # Use display() in Fabric notebooks


# ✅ Latest Transaction Per Customer
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number

window_spec = Window.partitionBy("customer_id").orderBy(col("txn_date").desc())

df_with_rank = df.withColumn("row_num", row_number().over(window_spec))
latest_per_customer = df_with_rank.filter(col("row_num") == 1).drop("row_num")

latest_per_customer.show()


StatementMeta(, ded16785-e89f-4b77-8d72-d26d0fe42da6, 11, Finished, Available, Finished)

+------+-----------+------+-------------------+
|txn_id|customer_id|amount|           txn_date|
+------+-----------+------+-------------------+
|     5|        101| 500.0|2024-06-15 17:45:00|
+------+-----------+------+-------------------+

+------+-----------+------+-------------------+
|txn_id|customer_id|amount|           txn_date|
+------+-----------+------+-------------------+
|     5|        101| 500.0|2024-06-15 17:45:00|
|     2|        102| 400.0|2024-06-11 13:30:00|
|     4|        103| 150.0|2024-06-09 10:15:00|
+------+-----------+------+-------------------+

