In [2]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
import pandas as pd
import requests
import json
from datetime import datetime, date, timedelta
from pytz import timezone
import os
from dotenv import load_dotenv

ModuleNotFoundError: ignored

In [5]:
load_dotenv()

NameError: ignored

In [6]:
API_key = os.getenv("aero_api")

In [7]:
# read the city dataframe to get the names of all cities we want airport data for
city_data = pd.read_csv("city_data.csv")
city_data.drop(columns = ["Unnamed: 0"], inplace = True)
city_data

FileNotFoundError: ignored

In [None]:
# convert the latitude column to a list to be used as input of a function call
lat = city_data["city_latitude"].to_list()
lat

In [None]:
# convert the longitude column to a list to be used as input of a function call
lon = city_data["city_longitude"].to_list()
lon

In [None]:
# example call to get airports from location
url = "https://aerodatabox.p.rapidapi.com/airports/search/location/51.511142/-0.103869/km/100/16"
querystring = {"withFlightInfoOnly":"true"}
headers = {
    "X-RapidAPI-Key": API_key,
    "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}
response = requests.request("GET", url, headers = headers, params = querystring)
response.json()

In [None]:
# loop through the list of cities and make API calls to get airport data and concatenate them into one dataframe
airport_list = []
for i in range(len(lat)):
    url = f"https://aerodatabox.p.rapidapi.com/airports/search/location/{lat[i]}/{lon[i]}/km/100/20"
    querystring = {"withFlightInfoOnly":"true"}
    headers = {
        "X-RapidAPI-Key": API_key,
        "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
    }
    response = requests.request("GET", url, headers = headers, params = querystring)
    airport_df = pd.json_normalize(response.json()["items"])
    airport_list.append(airport_df)
airports_df = pd.concat(airport_list, ignore_index = True)
airports_df

In [None]:
# rewrite the code above as a function so it can be reused again elsewhere
def icao_airport_code(latitude, longitude):
    airport_list = []
    # check the length of the latitude and longitude lists to make sure they are equal
    assert len(latitude) == len(longitude)
    # set the API call to get airport data within 50km of the lat and lon being input and show 10 results
    url = "https://aerodatabox.p.rapidapi.com/airports/search/location/51.511142/-0.103869/km/50/10"
    querystring = {"withFlightInfoOnly":"true"}
    headers = {
        "X-RapidAPI-Key": API_key,
        "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
    }
    test = requests.request("GET", url, headers = headers, params = querystring)
    if test.status_code >= 200 and test.status_code <= 299:
        for i in range(len(latitude)):
            url = f"https://aerodatabox.p.rapidapi.com/airports/search/location/{latitude[i]}/{longitude[i]}/km/50/10"
            querystring = {"withFlightInfoOnly":"true"}
            headers = {
                "X-RapidAPI-Key": API_key, 
                "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
            }
            response = requests.request("GET", url, headers = headers, params = querystring)
            airport_df = pd.json_normalize(response.json()["items"])
            airport_list.append(airport_df)
    else:
        return -1
    airports_df = pd.concat(airport_list, ignore_index = True)
    return airports_df

In [None]:
airport_data = icao_airport_code(lat, lon)
airport_data

In [None]:
# remove all air bases from the result
airport_data = airport_data[~airport_data.name.str.contains("Air Base", case = False)]

In [None]:
# remove all duplicate icao
airport_data.drop_duplicates(subset = "icao", inplace = True)

In [None]:
# drop unnessary columns
airport_data.drop(columns = ["shortName", "localCode"], inplace = True)

In [None]:
# rename columns with more intuitive names and get rid of dots to avoid problems in MySQL
airport_data.rename(columns = {"name": "airport_name",
                               "municipalityName": "municipality_name",
                               "countryCode": "country_code",
                               "location.lat": "airport_latitude",
                               "location.lon": "airport_longitude"},
                    inplace = True)

In [None]:
# edit entries in the municipality_name column to match the city name for a merge operation later
airport_data.at[5, "municipality_name"] = "Düsseldorf"

In [None]:
airport_data.at[6, "municipality_name"] = "Frankfurt am Main"

In [None]:
# merge the airport data with the city data to add a city_id column to airport data
airport_data = airport_data.merge(city_data[["city_id", "city"]], how = "left", left_on = "municipality_name", right_on = "city")

In [None]:
airport_data.drop(columns = ["city"], inplace = True)

In [None]:
airport_data.reset_index(drop = True, inplace = True)

In [None]:
airport_data

In [None]:
airport_data.to_csv("airport_data.csv")

In [None]:
icao = airport_data["icao"].to_list()
len(icao)

In [None]:
# arrival to EHAM airport example
today = datetime.now().astimezone(timezone("Europe/Berlin")).date()
tomorrow = (today + timedelta(days = 1))
url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/EHAM/{tomorrow}T10:00/{tomorrow}T22:00"
querystring = {"withLeg":"false","direction":"Arrival","withCancelled":"false","withCodeshared":"true",
               "withCargo":"false","withPrivate":"false","withLocation":"false"}
headers = {
    "X-RapidAPI-Key": API_key,
    "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}
response = requests.request("GET", url, headers = headers, params = querystring)
pd.json_normalize(response.json()["arrivals"])

In [None]:
# loop through the list of airport icao codes and make API calls to get flight arrival data and concatenate 
# them into one dataframe
arrival_list = []
for code in icao:
    url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{code}/2022-06-09T10:00/2022-06-09T22:00"
    querystring = {"withLeg":"false","direction":"Arrival","withCancelled":"false","withCodeshared":"true",
                   "withCargo":"false","withPrivate":"false","withLocation":"false"}
    headers = {
        "X-RapidAPI-Key": API_key,
        "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
    }
    response = requests.request("GET", url, headers = headers, params = querystring)
    arrival_df = pd.json_normalize(response.json()["arrivals"])
    arrival_list.append(arrival_df)
arrivals_df = pd.concat(arrival_list, ignore_index = True)  
arrivals_df

In [None]:
# rewrite the code above as a function
def get_flight_data(icao):
    # use the datetime function in python to get today's and tomorrow's date to be used as inputs of the API call
    today = datetime.now().astimezone(timezone("Europe/Berlin")).date()
    tomorrow = (today + timedelta(days = 1))
    arrival_list = []
    url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/EHAM/{tomorrow}T10:00/{tomorrow}T22:00"
    querystring = {"withLeg":"false","direction":"Arrival","withCancelled":"false","withCodeshared":"true",
                   "withCargo":"false","withPrivate":"false","withLocation":"false"}
    headers = {
        "X-RapidAPI-Key": API_key,
        "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
    }
    test = requests.request("GET", url, headers = headers, params = querystring)
    if test.status_code >= 200 and test.status_code <= 299:
        for code in icao:
            url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{code}/{tomorrow}T10:00/{tomorrow}T22:00"
            querystring = {"withLeg":"false","direction":"Arrival","withCancelled":"false",
                           "withCodeshared":"true","withCargo":"false","withPrivate":"false",
                           "withLocation":"false"}
            headers = {
                "X-RapidAPI-Key": API_key,
                "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
            }
            response = requests.request("GET", url, headers = headers, params = querystring)
            arrival_df = pd.json_normalize(response.json()["arrivals"])
            arrival_df["arrival_icao"] = code
            arrival_list.append(arrival_df)
    else:
        return -1
    arrivals_df = pd.concat(arrival_list, ignore_index = True)  
    return arrivals_df

In [None]:
flight_data = get_flight_data(icao)
flight_data.head()

In [None]:
flight_data.drop(columns = ["codeshareStatus", "isCargo", "movement.scheduledTimeUtc", "movement.quality", 
                             "aircraft.reg", "aircraft.modeS", "callSign", "movement.actualTimeLocal",
                             "movement.actualTimeUtc", "movement.gate", "movement.baggageBelt"], inplace = True)

In [None]:
flight_data.rename(columns = {"number": "flight_number",
                              "movement.airport.icao": "departure_icao",
                              "movement.airport.iata": "departure_iata",
                              "movement.airport.name": "departure_airport",
                              "movement.scheduledTimeLocal": "scheduled_time",
                              "movement.terminal": "terminal",
                              "aircraft.model": "aircraft_model",
                              "airline.name": "airline"},
                   inplace = True)

In [None]:
flight_data

In [None]:
flight_data.dtypes

In [None]:
flight_data["scheduled_time"] = pd.to_datetime(flight_data["scheduled_time"])
flight_data.dtypes


In [None]:
flight_data.to_csv("flight_data.csv")