In [0]:
spark.sql("DROP TABLE IF EXISTS mc.amadeus2.data")
df = spark.read.parquet("/Volumes/mc/amadeus2/data/part-00000-7118aad9-0739-4860-912f-1a58c85d689c.c000.snappy.parquet")
df.write.saveAsTable("mc.amadeus2.data")
display(spark.table("mc.amadeus2.data"))

In [0]:
spark.sql("""
CREATE OR REPLACE TABLE mc.amadeus2.data_full_1 AS
SELECT
  d.*,
  i.city AS trip_origin_city_full,
  i.country AS trip_origin_country_full
FROM mc.amadeus2.data_iata d
LEFT JOIN mc.amadeus2.iata i
  ON d.trip_origin_city = i.iata
""")
display(spark.table("mc.amadeus2.data_full_1"))

In [0]:
spark.sql("ALTER TABLE mc.amadeus2.data_full RENAME TO mc.amadeus2.data_full_1")

In [0]:
spark.sql("""
CREATE OR REPLACE TABLE mc.amadeus2.data_full AS
SELECT
  d.*,
  i.city AS trip_destination_city_full,
  i.country AS trip_destination_country_full
FROM mc.amadeus2.data_full_1 d
LEFT JOIN mc.amadeus2.iata i
  ON d.trip_origin_city = i.iata
""")
display(spark.table("mc.amadeus2.data_full"))

In [0]:
spark.sql("ALTER TABLE mc.amadeus2.data RENAME TO mc.amadeus2.data_iata")

In [0]:
spark.sql("ALTER TABLE mc.amadeus2.data_jan2026 RENAME TO mc.amadeus2.data_jan26")

In [0]:
spark.sql("DROP TABLE IF EXISTS mc.amadeus2.iata")

In [0]:
spark.sql("DROP TABLE IF EXISTS mc.amadeus2.data_full")

In [0]:
df_iata = spark.read.format("csv").option("header", True).load("/Volumes/mc/amadeus2/data/iata.csv")
df_iata.write.mode("overwrite").saveAsTable("mc.amadeus2.iata")
display(spark.table("mc.amadeus2.iata"))

In [0]:
display(spark.sql("SELECT * FROM mc.amadeus2.iata WHERE country = 'Ethiopia'"))

In [0]:
display(spark.sql("SELECT DISTINCT flight_leg_departure_date FROM mc.amadeus2.data"))

In [0]:
display(spark.sql("SELECT DISTINCT flight_leg_departure_date FROM mc.amadeus2.data_jan2026"))

In [0]:
from delta.tables import DeltaTable

deltaTable = DeltaTable.forName(spark, "mc.amadeus2.data")
deltaTable.update(
    set = { "flight_leg_departure_date": "'2026-01-01'" }
)

In [0]:
date_range_df = spark.sql("""
SELECT explode(sequence(to_date('2026-01-01'), to_date('2026-01-31'), interval 1 day)) AS flight_leg_departure_date
""")

df_data = spark.table("mc.amadeus2.data").drop("flight_leg_departure_date")

df_expanded = df_data.crossJoin(date_range_df) \
    .withColumn("flight_leg_departure_date", date_range_df["flight_leg_departure_date"])

df_expanded.write.mode("overwrite").saveAsTable("mc.amadeus2.data_jan2026")

display(spark.table("mc.amadeus2.data_jan2026"))

In [0]:
spark.sql("""
CREATE OR REPLACE TABLE mc.amadeus2.data AS
SELECT * FROM mc.amadeus2.data_full
""")

In [0]:
%sql
SELECT flight_leg_origin_country, COUNT(*) AS count FROM mc.amadeus2.data GROUP BY flight_leg_origin_country ORDER BY flight_leg_origin_country

In [0]:
display(spark.sql("""
SELECT trip_origin_city,
       trip_origin_country,
       trip_destination_city,
       trip_destination_country,
       SUM(flight_leg_total_seats) AS total_seats
FROM mc.amadeus2.data
GROUP BY trip_origin_city,
         trip_origin_country,
         trip_destination_city,
         trip_destination_country
"""))

In [0]:
display(spark.sql("""
SELECT trip_origin_city,
       trip_origin_city_full,
       trip_origin_country,
       trip_origin_country_full,
       trip_destination_city,
       trip_destination_city_full,
       trip_destination_country,
       trip_destination_country_full,
       trip_destination_city,
       trip_destination_country,
SUM(flight_leg_total_seats) AS total_seats
FROM mc.amadeus2.data_full
GROUP BY trip_origin_city,
         trip_origin_city_full,
         trip_origin_country,
         trip_origin_country_full,
         trip_destination_city,
         trip_destination_city_full,
         trip_destination_country,
         trip_destination_country_full,
         trip_destination_city,
         trip_destination_country
"""))

In [0]:
display(spark.sql("""
SELECT trip_origin_city,
       trip_origin_city_full,
       trip_origin_country,
       trip_origin_country_full,
       trip_destination_city,
       trip_destination_city_full,
       trip_destination_country,
       trip_destination_country_full,
       trip_destination_city,
       trip_destination_country,
SUM(flight_leg_total_seats) AS total_seats
FROM mc.amadeus2.data_full
WHERE trip_origin_country = 'FR' AND trip_destination_country = 'ES'
GROUP BY trip_origin_city,
         trip_origin_city_full,
         trip_origin_country,
         trip_origin_country_full,
         trip_destination_city,
         trip_destination_city_full,
         trip_destination_country,
         trip_destination_country_full,
         trip_destination_city,
         trip_destination_country
"""))

In [0]:
spark.table("mc.amadeus2.data_full")

In [0]:
display(spark.table("mc.amadeus2.data_full").selectExpr("count(*) as row_count"))