In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import csv

def scrape_flights(origin, destination, depart_date, return_date, output_csv):
    """
    Scrape flight data from Expedia given origin, destination, departure and return dates.
    Saves the result to a specified CSV file.
    """

    # Construct the URL with given parameters
    # This URL format may change over time. You should verify the parameters and URL structure.
    url = (
        f"https://www.expedia.com/Flights-Search?"
        f"flight-type=on&mode=search&trip=roundtrip"
        f"&leg1=from:{origin},to:{destination},departure:{depart_date}TANYT,fromType:AIRPORT,toType:METROCODE"
        f"&leg2=from:{destination},to:{origin},departure:{return_date}TANYT,fromType:METROCODE,toType:AIRPORT"
        f"&options=cabinclass:economy"
        f"&fromDate={depart_date}&toDate={return_date}"
        f"&d1={depart_date}&d2={return_date}&passengers=adults:1,infantinlap:N"
    )

    # Set up headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # remove this if you want to see the browser window
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")

    driver = webdriver.Chrome(options=chrome_options)
    driver.get(url)
    
    # Wait for page to load. Adjust time if needed or use WebDriverWait for a more robust approach.
    time.sleep(10)

    # At this point, you may need to scroll or click to load more results.
    # Sometimes Expedia requires interaction to load all results, or data might be in iframes.
    # Check the flight listings container carefully.
    
    # Example selector - this will depend on Expedia’s current HTML structure.
    # Inspect the page in your browser’s dev tools and update these selectors accordingly.
    
    flight_cards = driver.find_elements(By.CSS_SELECTOR, "li[data-test-id='offer-listing']")
    
    # If that doesn't work, look for other selectors or elements. For the example,
    # we assume each flight is contained in an element like this.
    # You may need to refine your locators based on Expedia’s current DOM.
    
    results = []
    for card in flight_cards:
        try:
            airline = card.find_element(By.CSS_SELECTOR, "[data-test-id='airline-name']").text
        except:
            airline = ""
        try:
            departure_time = card.find_element(By.CSS_SELECTOR, "[data-test-id='departure-time']").text
        except:
            departure_time = ""
        try:
            arrival_time = card.find_element(By.CSS_SELECTOR, "[data-test-id='arrival-time']").text
        except:
            arrival_time = ""
        try:
            duration = card.find_element(By.CSS_SELECTOR, "[data-test-id='duration']").text
        except:
            duration = ""
        try:
            price = card.find_element(By.CSS_SELECTOR, "[data-test-id='listing-price-dollars']").text
        except:
            price = ""

        results.append([airline, departure_time, arrival_time, duration, price])

    driver.quit()

    # Save results to CSV
    with open(output_csv, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        # Write header
        writer.writerow(["Airline", "Departure Time", "Arrival Time", "Duration", "Price"])
        # Write rows
        writer.writerows(results)


if __name__ == "__main__":
    # Example usage: Seattle to New York roundtrip
    # Note: The parameter format (SEA, NYC) and date formats must match Expedia’s URL expectations.
    origin_airport = "Seattle, WA, United States of America (SEA-Seattle - Tacoma Intl.)"
    destination_airport = "New York, NY, United States of America (NYC-All Airports)"
    departure_date = "2024-12-23"  # YYYY-MM-DD format
    return_date = "2024-12-30"    # YYYY-MM-DD format

    # Output CSV filename
    output_filename = "expedia_flights.csv"
    
    scrape_flights(origin_airport, destination_airport, departure_date, return_date, output_filename)
    print(f"Data saved to {output_filename}")


Data saved to expedia_flights.csv


In [6]:
cheapest_flight = 'Frontier,06:15,11:30,5h 15m,$190'

In [None]:
import csv
import os
import openai # type: ignore

# Set your OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

def read_csv_data(filepath):
    data = []
    with open(filepath, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        headers = next(reader)  # Skip the header row
        for row in reader:
            data.append(row)
    return data

def ask_openai_for_cheapest_flight(flight_data):
    # Convert flight_data (list of lists) into a readable format (CSV-like text)
    # Assuming flight_data is already a list of rows without header since we skipped it above.
    # If you have headers in flight_data, adjust accordingly.
    csv_string = "Airline,Departure Time,Arrival Time,Duration,Price\n"
    for row in flight_data:
        csv_string += ",".join(row) + "\n"
    
    # Create a prompt
    prompt = (
        "Below is a list of flights in CSV format:\n\n"
        f"{csv_string}\n"
        "From the above data, identify the cheapest flight and provide its details as Airline, Departure Time, Arrival Time, Duration, and Price."
    )

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "system", "content": "You are a helpful assistant."},
                  {"role": "user", "content": prompt}],
        temperature=0
    )

    # Extract the assistant's answer
    return response.choices[0].message.content.strip()

if __name__ == "__main__":
    # Read data from the CSV file
    # Ensure that "expedia_flights.csv" is the file we created previously or your actual file.
    flight_data = read_csv_data("expedia_flights.csv")
    
    # Call OpenAI to find the cheapest flight
    cheapest_flight = ask_openai_for_cheapest_flight(flight_data)
    
    print("Cheapest flight identified by OpenAI:")
    print(cheapest_flight)


Frontier,06:15,11:30,5h 15m,$190
