In [0]:
from pyspark.sql.types import  StructType,StructField, StringType, IntegerType,DayTimeIntervalType,TimestampType
from pyspark.sql import functions as SF

#Given Columns
amount = "amount"
merchant_id = "merchant_id"
transaction_id = "tranaction_id"
date_format = "MM/dd/yyyy HH:mm:ss"
credit_card_id = "credit_card_id"
transaction_timestamp = "transaction_timestamp"
# Transaction Data 
transactionData = [(1, 101, 1, 100, "09/25/2022 12:00:00"),(2, 101, 1, 100, "09/25/2022 12:08:00"),
                  (3, 101, 1, 100, "09/25/2022 12:28:00"),(4, 102, 2, 300, "09/25/2022 12:00:00"),
                  (6, 102, 2, 400, "09/25/2022 14:00:00")]

 
# Schema creations
transactionSchema = StructType([ StructField(transaction_id, IntegerType(), True),\
                        StructField(merchant_id, IntegerType(), True),\
                        StructField(credit_card_id, IntegerType(), True),\
                        StructField(amount, IntegerType(), True),\
                        StructField(transaction_timestamp, StringType(), True)
                      ])

transaction_df = spark.createDataFrame(data=transactionData,schema=transactionSchema)
timeStamp = SF.to_timestamp(SF.col(transaction_timestamp),date_format).cast(TimestampType())
transaction_df = transaction_df.withColumn(transaction_timestamp,timeStamp)
transaction_df.show()


+-------------+-----------+--------------+------+---------------------+
|tranaction_id|merchant_id|credit_card_id|amount|transaction_timestamp|
+-------------+-----------+--------------+------+---------------------+
|            1|        101|             1|   100|  2022-09-25 12:00:00|
|            2|        101|             1|   100|  2022-09-25 12:08:00|
|            3|        101|             1|   100|  2022-09-25 12:28:00|
|            4|        102|             2|   300|  2022-09-25 12:00:00|
|            6|        102|             2|   400|  2022-09-25 14:00:00|
+-------------+-----------+--------------+------+---------------------+



In [0]:
from pyspark.sql.window import Window


diff_min_col = SF.col(transaction_timestamp).cast("long") - SF.col("previous_transaction").cast("long")

windowSpec = Window.partitionBy(merchant_id,credit_card_id,amount,).orderBy(transaction_timestamp)
transaction_df = transaction_df.withColumn("previous_transaction", SF.lag(transaction_timestamp,1).over(windowSpec))\
                               .withColumn("diff_in_minutes",diff_min_col/60)\
                               .filter(SF.col("diff_in_minutes") <= 10)

 
transaction_df.show()
transaction_df.printSchema()

+-------------+-----------+--------------+------+---------------------+--------------------+---------------+
|tranaction_id|merchant_id|credit_card_id|amount|transaction_timestamp|previous_transaction|diff_in_minutes|
+-------------+-----------+--------------+------+---------------------+--------------------+---------------+
|            2|        101|             1|   100|  2022-09-25 12:08:00| 2022-09-25 12:00:00|            8.0|
+-------------+-----------+--------------+------+---------------------+--------------------+---------------+

root
 |-- tranaction_id: integer (nullable = true)
 |-- merchant_id: integer (nullable = true)
 |-- credit_card_id: integer (nullable = true)
 |-- amount: integer (nullable = true)
 |-- transaction_timestamp: timestamp (nullable = true)
 |-- previous_transaction: timestamp (nullable = true)
 |-- diff_in_minutes: double (nullable = true)

