In [16]:
# Amadeus Flight Tracker - Jupyter Notebook Template

import requests
import pandas as pd
from datetime import datetime
from uuid import uuid4
import boto3
import os
from dotenv import load_dotenv
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from collections import defaultdict

In [17]:
# === CONFIGURATION ===
# We'll load environment variables from a .env file
load_dotenv()
client_id = os.getenv("AMADEUS_CLIENT_ID")
client_secret = os.getenv("AMADEUS_CLIENT_SECRET")


DEPARTURE_DATE = "2025-09-15"
DEPARTURE_DATE02 = "2025-12-05"
ORIGIN = "BOS"
DESTINATION = "LAX"
CSV_FILE = "BOS_LAX_tracking.csv"
DYNAMODB_TABLE = "FlightObservations"
CHROMEDRIVER_PATH = "/path/to/chromedriver"  # Update this path as needed


In [None]:
# === Aircraft capacity database ===
aircraft_capacity_db = {
    "321": {"Economy": 170, "First": 8, "Business": 12, "Premium": 24},
    "738": {"Economy": 160, "First": 16, "Business": 12, "Premium": 20},
    "739": {"Economy": 180, "First": 20, "Business": 16, "Premium": 24},
    "320": {"Economy": 150, "First": 12, "Business": 10, "Premium": 18},
    "757": {"Economy": 180, "First": 16, "Business": 14, "Premium": 22},
    "777": {"Economy": 250, "First": 48, "Business": 40, "Premium": 30},
    "787": {"Economy": 220, "First": 40, "Business": 35, "Premium": 28},
    "A319": {"Economy": 120, "First": 8, "Business": 8, "Premium": 12},
    "A320": {"Economy": 150, "First": 12, "Business": 10, "Premium": 16}
}


In [18]:
# === Authenticate with Amadeus ===
def get_amadeus_token(client_id, client_secret):
    url = "https://test.api.amadeus.com/v1/security/oauth2/token"
    payload = {
        "grant_type": "client_credentials",
        "client_id": client_id,
        "client_secret": client_secret
    }
    r = requests.post(url, data=payload)
    r.raise_for_status()
    return r.json()["access_token"]

In [19]:
token = get_amadeus_token(client_id, client_secret)

In [20]:
# === Query Seat Availability API ===
def query_seat_availability(token, origin, destination, date):
    url = "https://test.api.amadeus.com/v1/shopping/availability/flight-availabilities"
    headers = {"Authorization": f"Bearer {token}"}
    params = {
        "originLocationCode": origin,
        "destinationLocationCode": destination,
        "departureDate": date,
        "travelClass": "ECONOMY",
        "adults": 1,
        "currencyCode": "USD"
    }
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    return response.json()

In [21]:
# === Summarize Seat Availability ===
def summarize_seats_by_cabin(availability_data):
    summary = defaultdict(int)
    for flight in availability_data.get("data", []):
        for cls in flight.get("availabilityClasses", []):
            cabin = cls.get("cabin")
            available = cls.get("availability", 0)
            summary[cabin] += available
    return dict(summary)

In [None]:
   # Query seat availability and summarize
availability_raw = query_seat_availability(token, ORIGIN, DESTINATION, DEPARTURE_DATE)
seat_summary = summarize_seats_by_cabin(availability_raw)

In [None]:
# === Fetch and Format Flight Offers ===
def collect_flight_data(token):
    headers = {"Authorization": f"Bearer {token}"}
    url = "https://test.api.amadeus.com/v2/shopping/flight-offers"
    params = {
        "originLocationCode": ORIGIN,
        "destinationLocationCode": DESTINATION,
        "departureDate": DEPARTURE_DATE,
        "adults": 1,
        "max": 50
    }

    r = requests.get(url, headers=headers, params=params)
    r.raise_for_status()
    offers = r.json().get("data", [])
    collection_time = datetime.utcnow()
 
    #seat_avail = scrape_aa_seat_availability()
    #
    # Query seat availability and summarize
    availability_raw = query_seat_availability(token, ORIGIN, DESTINATION, DEPARTURE_DATE)
    seat_summary = summarize_seats_by_cabin(availability_raw)
        
        
    cabin_keys = ["ECONOMY", "FIRST", "BUSINESS", "PREMIUM_ECONOMY"]
    best_prices = {k: None for k in cabin_keys}

    for offer in offers:
        try:
            price = float(offer["price"]["total"])
            traveler = offer.get("travelerPricings", [])
            if not traveler:
                continue
            fare_classes = [f.get("cabin") for f in traveler[0].get("fareDetailsBySegment", []) if f.get("cabin")]
            for cabin in fare_classes:
                if cabin in best_prices:
                    if best_prices[cabin] is None or price < best_prices[cabin]:
                        best_prices[cabin] = price
        except Exception as e:
            print("Error parsing offer for price class:", e)

    aircraft_code = offers[0]["itineraries"][0]["segments"][0]["aircraft"]["code"] if offers else "321"
    capacity = aircraft_capacity_db.get(aircraft_code, {})
    print(f"Aircraft Code: {aircraft_code}, Capacity: {capacity}")
    total_seats = sum(capacity.values())



    rows = []
    for offer in offers:
        try:
            segments = offer["itineraries"][0]["segments"]
            segment = segments[0]
            airline = offer["validatingAirlineCodes"][0]
            flight_number = segment["number"]
            departure_time = segment["departure"]["at"]
            origin_code = segment["departure"]["iataCode"]
            dest_code = segments[-1]["arrival"]["iataCode"]
            aircraft_type = segment["aircraft"]["code"]
            cabin_classes = [f["cabin"] for f in offer["travelerPricings"][0]["fareDetailsBySegment"]]

            row = {
                "Observation_ID": str(uuid4()),
                "Flight_Unique_ID": f"{airline}_{flight_number}_{DEPARTURE_DATE}",
                "Collection_Timestamp": collection_time.isoformat(),
                "Departure_DateTime": departure_time,
                "Airline_Code": airline,
                "Route": f"{ORIGIN}-{DESTINATION}",
                "Origin_Airport_Code": origin_code,
                "Destination_Airport_Code": dest_code,
                "Flight_Number": flight_number,
                "Aircraft_Type_Code": aircraft_type,
                "Total_Aircraft_Seats": total_seats,
                "Number_of_Stops": len(segments) - 1,
                # "PE_Current": float(offer["price"]["total"]) if "ECONOMY" in cabin_classes else None,
                # "PF_Current": float(offer["price"]["total"]) if "FIRST" in cabin_classes else None,
                # "AS_Economy": seat_summary.get("ECONOMY"),
                # "AS_First": seat_summary.get("FIRST"),
                "PE_Current": best_prices["ECONOMY"],
                "PF_Current": best_prices["FIRST"],
                "PB_Current": best_prices["BUSINESS"],
                "PP_Current": best_prices["PREMIUM_ECONOMY"],
                "AS_Economy": seat_summary.get("ECONOMY"),
                "AS_First": seat_summary.get("FIRST"),
                "AS_Business": seat_summary.get("BUSINESS"),
                "AS_Premium": seat_summary.get("PREMIUM_ECONOMY"),
                "Booked_Seats_Economy": capacity.get("Economy", None) - seat_summary.get("ECONOMY", 0) if "ECONOMY" in seat_summary else None,
                "Booked_Seats_First": capacity.get("First", None) - seat_summary.get("FIRST", 0) if "FIRST" in seat_summary else None,
                "Booked_Seats_Business": capacity.get("Business", None) - seat_summary.get("BUSINESS", 0) if "BUSINESS" in seat_summary else None,
                "Booked_Seats_Premium": capacity.get("Premium", None) - seat_summary.get("PREMIUM_ECONOMY", 0) if "PREMIUM_ECONOMY" in seat_summary else None,
                # "BR_Economy": None,
                # "BR_First": None,
                "CP_Cheapest_Flight_Price": None,
                "CPV_Lowest_Price_Volatility": None,
                "DT_Days_to_Departure": (datetime.strptime(DEPARTURE_DATE, "%Y-%m-%d") - collection_time).days,
                "Departure_Hour_UTC": int(departure_time[11:13]),
                "Departure_Day_of_Week": datetime.strptime(DEPARTURE_DATE, "%Y-%m-%d").weekday(),
                "RC_Num_Competitors": None,
                "Temp_Dest_C": None,
                "Oil_Price_USD": None,
                "Interest_Limit_Score": None
            }
                # === Add historical context for volatility/pressure/booking ===
            if os.path.exists(CSV_FILE):
                history_df = pd.read_csv(CSV_FILE)
                history_df["Collection_Timestamp"] = pd.to_datetime(history_df["Collection_Timestamp"])
                history_df.sort_values("Collection_Timestamp", inplace=True)

                temp_df = history_df.append(row, ignore_index=True)

                for key, label in [("PE_Current", "ECONOMY"), ("PF_Current", "FIRST"), ("PB_Current", "BUSINESS"), ("PP_Current", "PREMIUM")]:
                    temp_df[f"price_delta_{label.lower()}"] = temp_df[key].diff()
                    temp_df[f"price_std_{label.lower()}"] = temp_df[key].rolling(window=5).std()

                temp_df["Load_Factor_Economy"] = 100 * (1 - temp_df["AS_Economy"] / temp_df["Total_Aircraft_Seats"].fillna(method='ffill'))
                temp_df["Load_Factor_First"] = 100 * (1 - temp_df["AS_First"] / capacity.get("First", 1))
                temp_df["Load_Factor_Business"] = 100 * (1 - temp_df["AS_Business"] / capacity.get("Business", 1))
                temp_df["Load_Factor_Premium"] = 100 * (1 - temp_df["AS_Premium"] / capacity.get("Premium", 1))

                temp_df["BR_Economy"] = temp_df["Booked_Seats_Economy"].diff() / temp_df["Collection_Timestamp"].diff().dt.total_seconds().div(3600)
                temp_df["BR_First"] = temp_df["Booked_Seats_First"].diff() / temp_df["Collection_Timestamp"].diff().dt.total_seconds().div(3600)
                temp_df["BR_Business"] = temp_df["Booked_Seats_Business"].diff() / temp_df["Collection_Timestamp"].diff().dt.total_seconds().div(3600)
                temp_df["BR_Premium"] = temp_df["Booked_Seats_Premium"].diff() / temp_df["Collection_Timestamp"].diff().dt.total_seconds().div(3600)

                enriched_row = temp_df.iloc[-1].to_dict()
                row.update({
                    "price_delta_economy": enriched_row.get("price_delta_economy"),
                    "price_std_economy": enriched_row.get("price_std_economy"),
                    "price_delta_first": enriched_row.get("price_delta_first"),
                    "price_std_first": enriched_row.get("price_std_first"),
                    "price_delta_business": enriched_row.get("price_delta_business"),
                    "price_std_business": enriched_row.get("price_std_business"),
                    "price_delta_premium": enriched_row.get("price_delta_premium"),
                    "price_std_premium": enriched_row.get("price_std_premium"),
                    "Load_Factor_Economy": enriched_row.get("Load_Factor_Economy"),
                    "Load_Factor_First": enriched_row.get("Load_Factor_First"),
                    "Load_Factor_Business": enriched_row.get("Load_Factor_Business"),
                    "Load_Factor_Premium": enriched_row.get("Load_Factor_Premium"),
                    "BR_Economy": enriched_row.get("BR_Economy"),
                    "BR_First": enriched_row.get("BR_First"),
                    "BR_Business": enriched_row.get("BR_Business"),
                    "BR_Premium": enriched_row.get("BR_Premium")
                })

            rows.append(row)
        except Exception as e:
            print("Error parsing offer:", e)
    return pd.DataFrame(rows)


In [None]:
# === Save to CSV and DynamoDB ===
# We want 
df = collect_flight_data(get_amadeus_token(client_id, client_secret))    
df.to_csv("flight_prices.csv", index=False)

In [None]:
# === Save to CSV ===
def save_to_csv(df):
    df.to_csv(CSV_FILE, mode="a", header=not os.path.exists(CSV_FILE), index=False)