In [1]:
from pyspark.sql import SparkSession, functions as F
from pyspark.sql.window import Window

spark = SparkSession.builder.getOrCreate()

In [2]:
# Read CSV
df = spark.read.option("header", True).option("inferSchema", True).csv("/content/FPT.csv")

In [4]:
# Alias DataFrames
o = df.alias("o")
r = df.alias("r")

In [None]:
# Join with fully qualified column references
joined = o.join(
    r,
    (F.col("o.orig") == F.col("r.dest")) &
    (F.col("o.dest") == F.col("r.orig")) &
    (F.col("o.acft_regs_cde") == F.col("r.acft_regs_cde")) &
    (F.col("r.actl_dep_lcl_tms") >= F.col("o.actl_arr_lcl_tms")) &
    (F.col("r.actl_dep_lcl_tms") <= (F.col("o.actl_arr_lcl_tms") + F.expr("INTERVAL 2 DAYS"))),
    how="left"
)

In [None]:
# Window for ranking
window_spec = Window.partitionBy(F.col("o.acft_regs_cde"), F.col("o.actl_arr_lcl_tms")).orderBy(F.col("r.actl_dep_lcl_tms"))

In [8]:
# Add rank column
with_rank = joined.withColumn("rnk", F.rank().over(window_spec))

In [9]:
# Select and filter
result = with_rank.filter(F.col("rnk") == 1).select(
    F.col("o.orig"),
    F.col("o.dest"),
    F.col("o.id"),
    F.col("o.actl_dep_lcl_tms"),
    F.col("o.actl_arr_lcl_tms"),
    F.col("o.flight_num"),
    F.col("o.flights"),
    F.col("o.acft_regs_cde"),
    F.col("o.airborne_lcl_tms"),
    F.col("o.landing_lcl_tms"),
    F.col("r.id").alias("next_flight_id")
).orderBy(F.col("o.id"))

In [10]:
# Show result
result.show()

+----+----+---+-------------------+-------------------+----------+-------+-------------+-------------------+-------------------+--------------+
|orig|dest| id|   actl_dep_lcl_tms|   actl_arr_lcl_tms|flight_num|flights|acft_regs_cde|   airborne_lcl_tms|    landing_lcl_tms|next_flight_id|
+----+----+---+-------------------+-------------------+----------+-------+-------------+-------------------+-------------------+--------------+
| YYZ| YVR|  1|2022-12-31 20:36:00|2022-12-31 22:28:00|       127|      1|          737|2022-12-31 21:02:00|2022-12-31 22:17:00|          NULL|
| YYZ| YVR|  2|2022-12-31 19:39:00|2022-12-31 21:22:00|       185|      1|          451|2022-12-31 20:05:00|2022-12-31 21:14:00|          NULL|
| YYZ| YVR|  3|2022-12-31 18:53:00|2022-12-31 20:33:00|       123|      1|          843|2022-12-31 19:10:00|2022-12-31 20:22:00|          NULL|
| YYZ| YVR|  4|2022-12-31 17:27:00|2022-12-31 19:00:00|       121|      1|          747|2022-12-31 17:43:00|2022-12-31 18:53:00|        