In [85]:
import requests
import json
import pandas as pd
import os
import time
from openpyxl import load_workbook
import warnings
warnings.filterwarnings(action="ignore")
from dotenv import load_dotenv
# Load environment variables
load_dotenv()

True

### Define some global inputs

In [86]:
airport_data_excel_file = "airport_data.xlsx"
failed_airports_excel_file = "failed_airports.xlsx"

### Load the flight routes from the Excel file

In [87]:
df_routes = pd.read_excel("flight_routes.xlsx")

# Display the data frame
df_routes.columns = df_routes.columns.str.lower().str.replace(" ", "_")
df_routes

Unnamed: 0,departure_city,arrival_city,route_competition_status
0,Aalborg,Kaunas,Monopoly always
1,Aalborg,Kaunas,Monopoly always
2,Aalborg,London (GB),Monopoly not always
3,Aalborg,Stockholm,Monopoly always
4,Aarhus,Faro (PT),Monopoly always
...,...,...,...
5116,Zadar,Paris (FR),1 comp No Wizz
5117,Zadar,Vienna,1 comp No Wizz
5118,Zagreb,Dublin (IE),1 comp No Wizz
5119,Zagreb,Duesseldorf,1 comp No Wizz


In [88]:
# Create a list of all airports from df_routes
airport_list = pd.concat([df_routes["departure_city"], df_routes["arrival_city"]], axis=0).drop_duplicates().reset_index(drop=True).sort_values().tolist()

# Clean the list so only valid airports are shown
failed_airports_mapping_dict = {
    "Duesseldorf": "Düsseldorf",
    "Basel/Mulhouse": "Basel",
    "Cologne/Bonn": "Cologne",
    "Karlsruhe/Baden-Baden": "Karlsruhe",
    "Klaipeda/Palanga": "Palanga",
    "Leipzig/Halle": "Leipzig",
    "Lourdes/Tarbes": "Tarbes",
    "Maastricht/Aachen (NL)": "Maastricht",
    "Muenster/Osnabrueck (DE) 00": "Münster",
    "Paderborn/Lippstadt": "Paderborn Lippstadt",
    "Preveza/Lefkada": "Preveza",

}
airport_list = [failed_airports_mapping_dict.get(airport) if airport in list(failed_airports_mapping_dict.keys()) else airport for airport in airport_list]

### Define a function to extract the airport based on the city name

In [89]:
def extract_airport(city_name):
    url = "https://skyscanner50.p.rapidapi.com/api/v1/searchAirport"

    querystring = {"query": city_name}

    headers = {
        "X-RapidAPI-Key": os.getenv("API_KEY"),
        "X-RapidAPI-Host": "skyscanner50.p.rapidapi.com"
    }

    response = requests.get(url, headers=headers, params=querystring)

    return response.json()

### Call the airport data endpoint to retrieve information about the airports found in the cities

In [90]:
list_airport_data = []
list_failed_airports = []
for idx, i in enumerate(airport_list):
    # Print a status message
    print(f"Extracting the airport data of airport {i}, which is airport number {idx + 1} out of {len(airport_list)}")
    
    # Get the airport data and create a data frame out of it
    try:
        output_extract_airport = pd.json_normalize(extract_airport(i)["data"][0])
        output_extract_airport["search_term"] = i

        # Append the result to airport_list
        list_airport_data.append(output_extract_airport)

        # Write the result to Excel
        with pd.ExcelWriter(airport_data_excel_file, mode='a', engine='openpyxl', if_sheet_exists="overlay") as writer:
            last_row = pd.read_excel(airport_data_excel_file).shape[0]
            if idx == 0:
                output_extract_airport.to_excel(writer, sheet_name='Sheet1', index=False, header=True, encoding="utf-8", startrow=last_row)
            else:
                output_extract_airport.to_excel(writer, sheet_name='Sheet1', index=False, header=False, encoding="utf-8", startrow=last_row + 1)
     # This KeyError can occur because the airport cannot be found in the data object of the API
     # The IndexError occurs because the airport cannot be found in the 0th index of the API response
    except (KeyError, IndexError): 
        list_failed_airports.append(i)
        
        # Write the result to Excel
        with pd.ExcelWriter(failed_airports_excel_file, mode='a', engine='openpyxl', if_sheet_exists="overlay") as writer:
            last_row = pd.read_excel(failed_airports_excel_file).shape[0]
            pd.DataFrame({"failed_airport": [i]}).to_excel(writer, sheet_name='Sheet1', index=False, header=False, encoding="utf-8", startrow=last_row + 1)

    # Introduce a sleep of 0.5 second between each call and the next
    time.sleep(0.5)

# Change list_airport_data to a dataframe
df_airport_data = pd.concat(list_airport_data).reset_index(drop=True)

Extracting the airport data of airport Aalborg, which is airport number 1 out of 229
Extracting the airport data of airport Aarhus, which is airport number 2 out of 229
Extracting the airport data of airport Aberdeen (GB), which is airport number 3 out of 229
Extracting the airport data of airport Agadir, which is airport number 4 out of 229
Extracting the airport data of airport Alghero, which is airport number 5 out of 229
Extracting the airport data of airport Alicante, which is airport number 6 out of 229
Extracting the airport data of airport Almeria, which is airport number 7 out of 229
Extracting the airport data of airport Amman, which is airport number 8 out of 229
Extracting the airport data of airport Amsterdam, which is airport number 9 out of 229
Extracting the airport data of airport Ancona, which is airport number 10 out of 229
Extracting the airport data of airport Aqaba, which is airport number 11 out of 229
Extracting the airport data of airport Asturias, which is air