In [25]:
import pandas as pd
from datetime import datetime, timedelta
import requests
from pytz import timezone
from dotenv import load_dotenv
import os

# using colab secrets to store our passwords and keys

# Making the API call and viewing the json

In [26]:
load_dotenv()  # Load environment variables from .env file

api_key = os.getenv('aerodatabox_api_key')
icao = "EDDB"
date = datetime.now().date()
time_1 = "00:00"
time_2 = "11:59"

url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{date}T{time_1}/{date}T{time_2}"

querystring = {"withLeg":"true",
               "direction":"Arrival",
               "withCancelled":"false",
               "withCodeshared":"true",
               "withCargo":"false",
               "withPrivate":"false"}

headers = {
    'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
    'x-rapidapi-key': api_key
    }

response = requests.request("GET",
                            url,
                            headers = headers,
                            params = querystring)

flights_json = response.json()

flights_json

{'arrivals': [{'departure': {'airport': {'icao': 'EDDK',
     'iata': 'CGN',
     'name': 'Cologne',
     'timeZone': 'Europe/Berlin'},
    'quality': []},
   'arrival': {'scheduledTime': {'utc': '2024-11-05 04:30Z',
     'local': '2024-11-05 05:30+01:00'},
    'revisedTime': {'utc': '2024-11-05 04:30Z',
     'local': '2024-11-05 05:30+01:00'},
    'quality': ['Basic', 'Live']},
   'number': 'DJ 6228',
   'callSign': 'SRR6228',
   'status': 'Approaching',
   'codeshareStatus': 'IsOperator',
   'isCargo': False,
   'aircraft': {'reg': 'OY-SRM', 'modeS': '45CE4D', 'model': 'Boeing 767'},
   'airline': {'name': 'Star Air A/S', 'iata': 'DJ', 'icao': 'SRR'}},
  {'departure': {'airport': {'icao': 'OTHH',
     'iata': 'DOH',
     'name': 'Doha',
     'timeZone': 'Asia/Qatar'},
    'scheduledTime': {'utc': '2024-11-04 23:40Z',
     'local': '2024-11-05 02:40+03:00'},
    'revisedTime': {'utc': '2024-11-04 23:39Z',
     'local': '2024-11-05 02:39+03:00'},
    'runwayTime': {'utc': '2024-11-04 2

# Exploring the json

In [27]:
flights_json.keys()

dict_keys(['arrivals'])

In [28]:
flights_json["arrivals"]

[{'departure': {'airport': {'icao': 'EDDK',
    'iata': 'CGN',
    'name': 'Cologne',
    'timeZone': 'Europe/Berlin'},
   'quality': []},
  'arrival': {'scheduledTime': {'utc': '2024-11-05 04:30Z',
    'local': '2024-11-05 05:30+01:00'},
   'revisedTime': {'utc': '2024-11-05 04:30Z',
    'local': '2024-11-05 05:30+01:00'},
   'quality': ['Basic', 'Live']},
  'number': 'DJ 6228',
  'callSign': 'SRR6228',
  'status': 'Approaching',
  'codeshareStatus': 'IsOperator',
  'isCargo': False,
  'aircraft': {'reg': 'OY-SRM', 'modeS': '45CE4D', 'model': 'Boeing 767'},
  'airline': {'name': 'Star Air A/S', 'iata': 'DJ', 'icao': 'SRR'}},
 {'departure': {'airport': {'icao': 'OTHH',
    'iata': 'DOH',
    'name': 'Doha',
    'timeZone': 'Asia/Qatar'},
   'scheduledTime': {'utc': '2024-11-04 23:40Z',
    'local': '2024-11-05 02:40+03:00'},
   'revisedTime': {'utc': '2024-11-04 23:39Z',
    'local': '2024-11-05 02:39+03:00'},
   'runwayTime': {'utc': '2024-11-04 23:53Z',
    'local': '2024-11-05 02:53

The square brackets at the beginning of `flights_json` indicate that it represents a list-like structure. Since we're dealing with a list, we can iterate through its elements to access and process the data. Let's start by examining the first element in the list.

In [29]:
flights_json["arrivals"][0]

{'departure': {'airport': {'icao': 'EDDK',
   'iata': 'CGN',
   'name': 'Cologne',
   'timeZone': 'Europe/Berlin'},
  'quality': []},
 'arrival': {'scheduledTime': {'utc': '2024-11-05 04:30Z',
   'local': '2024-11-05 05:30+01:00'},
  'revisedTime': {'utc': '2024-11-05 04:30Z',
   'local': '2024-11-05 05:30+01:00'},
  'quality': ['Basic', 'Live']},
 'number': 'DJ 6228',
 'callSign': 'SRR6228',
 'status': 'Approaching',
 'codeshareStatus': 'IsOperator',
 'isCargo': False,
 'aircraft': {'reg': 'OY-SRM', 'modeS': '45CE4D', 'model': 'Boeing 767'},
 'airline': {'name': 'Star Air A/S', 'iata': 'DJ', 'icao': 'SRR'}}

In [30]:
flights_json["arrivals"][0].keys()

dict_keys(['departure', 'arrival', 'number', 'callSign', 'status', 'codeshareStatus', 'isCargo', 'aircraft', 'airline'])

Looking at the first element of the json and the available keys, we can select the information we think would be important for our dataframe.
- Departure airport icao
- scheduled arrival time, local
- flight number

# Using for loops

## Making the DataFrame

In [39]:
flight_items = []

for item in flights_json["arrivals"]:
  flight_item = {
      "arrival_airport_icao": icao,
      "departure_airport_icao": item["departure"]["airport"].get("icao", None),
      "scheduled_arrival_time": item["arrival"]["scheduledTime"].get("local", None),
      "flight_number": item.get("number", None)
  }

  flight_items.append(flight_item)

flights_df = pd.DataFrame(flight_items)

flights_df.head()

Unnamed: 0,arrival_airport_icao,departure_airport_icao,scheduled_arrival_time,flight_number
0,EDDB,EDDK,2024-11-05 05:30+01:00,DJ 6228
1,EDDB,OTHH,2024-11-05 06:55+01:00,QR 79
2,EDDB,LFSB,2024-11-05 07:40+01:00,U2 1185
3,EDDB,LFSB,2024-11-05 07:40+01:00,DS 1185
4,EDDB,EDDS,2024-11-05 07:45+01:00,EW 2000


Let's get rid of the `+01:00` from `scheduled_arrival_time`.

In [32]:
flights_df["scheduled_arrival_time"] = flights_df["scheduled_arrival_time"].str[:-6]
flights_df.head()

Unnamed: 0,arrival_airport_icao,departure_airport_icao,scheduled_arrival_time,flight_number
0,EDDB,EDDK,2024-11-05 05:30,DJ 6228
1,EDDB,OTHH,2024-11-05 06:55,QR 79
2,EDDB,LFSB,2024-11-05 07:40,U2 1185
3,EDDB,LFSB,2024-11-05 07:40,DS 1185
4,EDDB,EDDS,2024-11-05 07:45,EW 2000


While string slicing provides a quick solution to correcting the `scheduled_arrival_time` column, it's not the most robust approach. This is because it assumes that every cell in the column has the `+01:00` time zone offset. If there are cells without this offset, slicing would remove part of the time value, leading to inaccurate results.

A more robust solution would involve using the `re.sub()` function from the re module. Feel free to look into this if you have extra time and are curious.

## Creating a function for multiple cities

In [38]:
import pandas as pd
from datetime import datetime, timedelta
import requests
from pytz import timezone

def get_flight_data(icao_list):
  api_key = os.getenv('aerodatabox_api_key')

  berlin_timezone = timezone('Europe/Berlin')
  today = datetime.now(berlin_timezone).date()
  tomorrow = (today + timedelta(days=1))

  flight_items = []

  for icao in icao_list:
    # the api can only make 12 hour calls, therefore, two 12 hour calls make a full day
    # using the nested lists below we can make a morning call and extract the data
    # then make an afternoon call and extract the data
    times = [["00:00","11:59"],
             ["12:00","23:59"]]

    for time in times:
      url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"

      querystring = {"withLeg":"true",
                    "direction":"Arrival",
                    "withCancelled":"false",
                    "withCodeshared":"true",
                    "withCargo":"false",
                    "withPrivate":"false"}

      headers = {
          'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
          'x-rapidapi-key': api_key
          }

      response = requests.request("GET",
                                  url,
                                  headers = headers,
                                  params = querystring)

      flights_json = response.json()

      retrieval_time = datetime.now(berlin_timezone).strftime("%Y-%m-%d %H:%M:%S")

      for item in flights_json["arrivals"]:
        flight_item = {
            "arrival_airport_icao": icao,
            "departure_airport_icao": item["departure"]["airport"].get("icao", None),
            "scheduled_arrival_time": item["arrival"]["scheduledTime"].get("local", None),
            "flight_number": item.get("number", None),
            "data_retrieved_at": retrieval_time
        }

        flight_items.append(flight_item)

  flights_df = pd.DataFrame(flight_items)
  flights_df["scheduled_arrival_time"] = flights_df["scheduled_arrival_time"].str[:-6]
  flights_df["scheduled_arrival_time"] = pd.to_datetime(flights_df["scheduled_arrival_time"])
  flights_df["data_retrieved_at"] = pd.to_datetime(flights_df["data_retrieved_at"])

  return flights_df

In [34]:
icao_list = ["EDDB", "EDDF"]

get_flight_data(icao_list)

Unnamed: 0,arrival_airport_icao,departure_airport_icao,scheduled_arrival_time,flight_number,data_retrieved_at
0,EDDB,ZBAA,2024-11-06 06:40:00,HU 489,2024-11-05 16:34:20
1,EDDB,OTHH,2024-11-06 06:55:00,QR 79,2024-11-05 16:34:20
2,EDDB,EDDS,2024-11-06 07:35:00,EW 8001,2024-11-05 16:34:20
3,EDDB,EDDK,2024-11-06 07:40:00,EW 2,2024-11-05 16:34:20
4,EDDB,KEWR,2024-11-06 07:55:00,UA 962,2024-11-05 16:34:20
...,...,...,...,...,...
2218,EDDF,LPPT,2024-11-06 22:40:00,LH 6957,2024-11-05 16:34:21
2219,EDDF,LPPT,2024-11-06 22:40:00,TP 574,2024-11-05 16:34:21
2220,EDDF,LPPT,2024-11-06 22:40:00,AC 2660,2024-11-05 16:34:21
2221,EDDF,DTTJ,2024-11-06 22:45:00,X3 6573,2024-11-05 16:34:21


Your flight function can now be incorporated with your other functions to send and receive data from your SQL database.

# Using json_normalize

In [37]:
def tomorrows_flight_arrivals(icao_list):

    api_key = os.getenv('aerodatabox_api_key')

    berlin_timezone = timezone('Europe/Berlin')
    today = datetime.now(berlin_timezone).date()
    tomorrow = (today + timedelta(days=1))

    list_for_arrivals_df = []

    for icao in icao_list:

        times = [["00:00","11:59"],["12:00","23:59"]]

        for time in times:
            url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"

            querystring = {"direction":"Arrival","withCancelled":"false"}

            headers = {
                "X-RapidAPI-Key": api_key,
                "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
                }

            response = requests.request("GET", url, headers=headers, params=querystring)
            flights_resp = response.json()

            arrivals_df = pd.json_normalize(flights_resp["arrivals"])[["number", "airline.name", "movement.scheduledTime.local", "movement.terminal", "movement.airport.name", "movement.airport.icao"]]
            arrivals_df = arrivals_df.rename(columns={"number": "flight_number", "airline.name": "airline", "movement.scheduledTime.local": "arrival_time", "movement.terminal": "arrival_terminal", "movement.airport.name": "departure_city", "movement.airport.icao": "departure_airport_icao"})
            arrivals_df["arrival_airport_icao"] = icao
            arrivals_df["data_retrieved_on"] = datetime.now(berlin_timezone).strftime("%Y-%m-%d %H:%M:%S")
            arrivals_df = arrivals_df[["arrival_airport_icao", "flight_number", "airline", "arrival_time", "arrival_terminal", "departure_city", "departure_airport_icao", "data_retrieved_on"]]

            # fixing arrival_time
            arrivals_df["arrival_time"] = arrivals_df["arrival_time"].str.split("+").str[0]

            list_for_arrivals_df.append(arrivals_df)

    return pd.concat(list_for_arrivals_df, ignore_index=True)

In [36]:
icao_list = ["EDDF", "EDDB"]

tomorrows_flight_arrivals(icao_list)

Unnamed: 0,arrival_airport_icao,flight_number,airline,arrival_time,arrival_terminal,departure_city,departure_airport_icao,data_retrieved_on
0,EDDF,4Y 133,Discover Airlines,2024-11-06 05:20,1,Windhoek,FYWH,2024-11-05 16:34:21
1,EDDF,OS 8460,Austrian,2024-11-06 05:20,1,Windhoek,FYWH,2024-11-05 16:34:21
2,EDDF,LX 9309,SWISS,2024-11-06 05:20,1,Windhoek,FYWH,2024-11-05 16:34:21
3,EDDF,LH 4357,Lufthansa,2024-11-06 05:20,1,Windhoek,FYWH,2024-11-05 16:34:21
4,EDDF,OS 7316,Austrian,2024-11-06 05:20,1,Jo'anna,FAOR,2024-11-05 16:34:21
...,...,...,...,...,...,...,...,...
2218,EDDB,EC 5150,EasyJet Europe,2024-11-06 22:55,1,Paris,LFPG,2024-11-05 16:34:23
2219,EDDB,FR 2419,Ryanair,2024-11-06 22:55,2,Tenerife Island,GCTS,2024-11-05 16:34:23
2220,EDDB,IB 1809,Iberia,2024-11-06 23:10,1,Madrid,LEMD,2024-11-05 16:34:23
2221,EDDB,I2 1809,Iberia Express,2024-11-06 23:10,1,Madrid,LEMD,2024-11-05 16:34:23
