In [40]:
import pandas as pd
from datetime import datetime, timedelta
import requests
from pytz import timezone
from dotenv import load_dotenv
import os

# using colab secrets to store our passwords and keys

# Making the API call and viewing the json

In [41]:
load_dotenv()  # Load environment variables from .env file

api_key = os.getenv('aerodatabox_api_key')
icao = "EDDB"
date = datetime.now().date()
time_1 = "00:00"
time_2 = "11:59"

url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{date}T{time_1}/{date}T{time_2}"

querystring = {"withLeg":"true",
               "direction":"Arrival",
               "withCancelled":"false",
               "withCodeshared":"true",
               "withCargo":"false",
               "withPrivate":"false"}

headers = {
    'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
    'x-rapidapi-key': api_key
    }

response = requests.request("GET",
                            url,
                            headers = headers,
                            params = querystring)

flights_json = response.json()

flights_json

{'arrivals': [{'departure': {'airport': {'icao': 'EDDK',
     'iata': 'CGN',
     'name': 'Cologne',
     'timeZone': 'Europe/Berlin'},
    'scheduledTime': {'utc': '2024-11-06 03:47Z',
     'local': '2024-11-06 04:47+01:00'},
    'revisedTime': {'utc': '2024-11-06 03:47Z',
     'local': '2024-11-06 04:47+01:00'},
    'runwayTime': {'utc': '2024-11-06 03:47Z',
     'local': '2024-11-06 04:47+01:00'},
    'runway': '14L',
    'quality': ['Basic', 'Live']},
   'arrival': {'scheduledTime': {'utc': '2024-11-06 04:33Z',
     'local': '2024-11-06 05:33+01:00'},
    'revisedTime': {'utc': '2024-11-06 04:33Z',
     'local': '2024-11-06 05:33+01:00'},
    'quality': ['Basic', 'Live']},
   'number': 'DJ 6228',
   'callSign': 'SRR6228',
   'status': 'Approaching',
   'codeshareStatus': 'IsOperator',
   'isCargo': False,
   'aircraft': {'reg': 'OY-SRJ', 'modeS': '45CE4A', 'model': 'Boeing 767-200'},
   'airline': {'name': 'Star Air A/S', 'iata': 'DJ', 'icao': 'SRR'}},
  {'departure': {'airport': {

# Exploring the json

In [42]:
flights_json.keys()

dict_keys(['arrivals'])

In [43]:
flights_json["arrivals"]

[{'departure': {'airport': {'icao': 'EDDK',
    'iata': 'CGN',
    'name': 'Cologne',
    'timeZone': 'Europe/Berlin'},
   'scheduledTime': {'utc': '2024-11-06 03:47Z',
    'local': '2024-11-06 04:47+01:00'},
   'revisedTime': {'utc': '2024-11-06 03:47Z',
    'local': '2024-11-06 04:47+01:00'},
   'runwayTime': {'utc': '2024-11-06 03:47Z',
    'local': '2024-11-06 04:47+01:00'},
   'runway': '14L',
   'quality': ['Basic', 'Live']},
  'arrival': {'scheduledTime': {'utc': '2024-11-06 04:33Z',
    'local': '2024-11-06 05:33+01:00'},
   'revisedTime': {'utc': '2024-11-06 04:33Z',
    'local': '2024-11-06 05:33+01:00'},
   'quality': ['Basic', 'Live']},
  'number': 'DJ 6228',
  'callSign': 'SRR6228',
  'status': 'Approaching',
  'codeshareStatus': 'IsOperator',
  'isCargo': False,
  'aircraft': {'reg': 'OY-SRJ', 'modeS': '45CE4A', 'model': 'Boeing 767-200'},
  'airline': {'name': 'Star Air A/S', 'iata': 'DJ', 'icao': 'SRR'}},
 {'departure': {'airport': {'icao': 'ZBAA',
    'iata': 'PEK',
  

The square brackets at the beginning of `flights_json` indicate that it represents a list-like structure. Since we're dealing with a list, we can iterate through its elements to access and process the data. Let's start by examining the first element in the list.

In [44]:
flights_json["arrivals"][0]

{'departure': {'airport': {'icao': 'EDDK',
   'iata': 'CGN',
   'name': 'Cologne',
   'timeZone': 'Europe/Berlin'},
  'scheduledTime': {'utc': '2024-11-06 03:47Z',
   'local': '2024-11-06 04:47+01:00'},
  'revisedTime': {'utc': '2024-11-06 03:47Z',
   'local': '2024-11-06 04:47+01:00'},
  'runwayTime': {'utc': '2024-11-06 03:47Z',
   'local': '2024-11-06 04:47+01:00'},
  'runway': '14L',
  'quality': ['Basic', 'Live']},
 'arrival': {'scheduledTime': {'utc': '2024-11-06 04:33Z',
   'local': '2024-11-06 05:33+01:00'},
  'revisedTime': {'utc': '2024-11-06 04:33Z',
   'local': '2024-11-06 05:33+01:00'},
  'quality': ['Basic', 'Live']},
 'number': 'DJ 6228',
 'callSign': 'SRR6228',
 'status': 'Approaching',
 'codeshareStatus': 'IsOperator',
 'isCargo': False,
 'aircraft': {'reg': 'OY-SRJ', 'modeS': '45CE4A', 'model': 'Boeing 767-200'},
 'airline': {'name': 'Star Air A/S', 'iata': 'DJ', 'icao': 'SRR'}}

In [45]:
flights_json["arrivals"][0].keys()

dict_keys(['departure', 'arrival', 'number', 'callSign', 'status', 'codeshareStatus', 'isCargo', 'aircraft', 'airline'])

Looking at the first element of the json and the available keys, we can select the information we think would be important for our dataframe.
- Departure airport icao
- scheduled arrival time, local
- flight number

# Using for loops

## Making the DataFrame

In [46]:
flight_items = []

for item in flights_json["arrivals"]:
  flight_item = {
      "arrival_airport_icao": icao,
      "departure_airport_icao": item["departure"]["airport"].get("icao", None),
      "scheduled_arrival_time": item["arrival"]["scheduledTime"].get("local", None),
      "flight_number": item.get("number", None)
  }

  flight_items.append(flight_item)

flights_df = pd.DataFrame(flight_items)

flights_df.head()

Unnamed: 0,arrival_airport_icao,departure_airport_icao,scheduled_arrival_time,flight_number
0,EDDB,EDDK,2024-11-06 05:33+01:00,DJ 6228
1,EDDB,ZBAA,2024-11-06 06:40+01:00,HU 489
2,EDDB,OTHH,2024-11-06 06:55+01:00,QR 79
3,EDDB,EDDK,2024-11-06 07:24+01:00,4U 1YX
4,EDDB,EDDK,2024-11-06 07:40+01:00,EW 2


Let's get rid of the `+01:00` from `scheduled_arrival_time`.

In [47]:
flights_df["scheduled_arrival_time"] = flights_df["scheduled_arrival_time"].str[:-6]
flights_df.head()

Unnamed: 0,arrival_airport_icao,departure_airport_icao,scheduled_arrival_time,flight_number
0,EDDB,EDDK,2024-11-06 05:33,DJ 6228
1,EDDB,ZBAA,2024-11-06 06:40,HU 489
2,EDDB,OTHH,2024-11-06 06:55,QR 79
3,EDDB,EDDK,2024-11-06 07:24,4U 1YX
4,EDDB,EDDK,2024-11-06 07:40,EW 2


While string slicing provides a quick solution to correcting the `scheduled_arrival_time` column, it's not the most robust approach. This is because it assumes that every cell in the column has the `+01:00` time zone offset. If there are cells without this offset, slicing would remove part of the time value, leading to inaccurate results.

A more robust solution would involve using the `re.sub()` function from the re module. Feel free to look into this if you have extra time and are curious.

## Creating a function for multiple cities

In [48]:
import pandas as pd
from datetime import datetime, timedelta
import requests
from pytz import timezone

def get_flight_data(icao_list):
  api_key = os.getenv('aerodatabox_api_key')

  berlin_timezone = timezone('Europe/Berlin')
  today = datetime.now(berlin_timezone).date()
  tomorrow = (today + timedelta(days=1))

  flight_items = []

  for icao in icao_list:
    # the api can only make 12 hour calls, therefore, two 12 hour calls make a full day
    # using the nested lists below we can make a morning call and extract the data
    # then make an afternoon call and extract the data
    times = [["00:00","11:59"],
             ["12:00","23:59"]]

    for time in times:
      url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"

      querystring = {"withLeg":"true",
                    "direction":"Arrival",
                    "withCancelled":"false",
                    "withCodeshared":"true",
                    "withCargo":"false",
                    "withPrivate":"false"}

      headers = {
          'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
          'x-rapidapi-key': api_key
          }

      response = requests.request("GET",
                                  url,
                                  headers = headers,
                                  params = querystring)

      flights_json = response.json()

      retrieval_time = datetime.now(berlin_timezone).strftime("%Y-%m-%d %H:%M:%S")

      for item in flights_json["arrivals"]:
        flight_item = {
            "arrival_airport_icao": icao,
            "departure_airport_icao": item["departure"]["airport"].get("icao", None),
            "scheduled_arrival_time": item["arrival"]["scheduledTime"].get("local", None),
            "flight_number": item.get("number", None),
            "data_retrieved_at": retrieval_time
        }

        flight_items.append(flight_item)

  flights_df = pd.DataFrame(flight_items)
  flights_df["scheduled_arrival_time"] = flights_df["scheduled_arrival_time"].str[:-6]
  flights_df["scheduled_arrival_time"] = pd.to_datetime(flights_df["scheduled_arrival_time"])
  flights_df["data_retrieved_at"] = pd.to_datetime(flights_df["data_retrieved_at"])

  return flights_df

In [49]:
icao_list = ["EDDB", "EDDF"]

get_flight_data(icao_list)

Unnamed: 0,arrival_airport_icao,departure_airport_icao,scheduled_arrival_time,flight_number,data_retrieved_at
0,EDDB,OTHH,2024-11-07 06:55:00,QR 79,2024-11-06 13:08:31
1,EDDB,LIMC,2024-11-07 07:30:00,FR 9,2024-11-06 13:08:31
2,EDDB,LBSF,2024-11-07 07:30:00,FR 1149,2024-11-06 13:08:31
3,EDDB,EDDS,2024-11-07 07:35:00,EW 2000,2024-11-06 13:08:31
4,EDDB,LFSB,2024-11-07 07:40:00,U2 1185,2024-11-06 13:08:31
...,...,...,...,...,...
2262,EDDF,LEMD,2024-11-07 22:35:00,VY 5520,2024-11-06 13:08:33
2263,EDDF,LPPT,2024-11-07 22:40:00,S4 8762,2024-11-06 13:08:33
2264,EDDF,LPPT,2024-11-07 22:40:00,TP 574,2024-11-06 13:08:33
2265,EDDF,LPPT,2024-11-07 22:40:00,AC 2660,2024-11-06 13:08:33


Your flight function can now be incorporated with your other functions to send and receive data from your SQL database.

# Using json_normalize

In [50]:
def tomorrows_flight_arrivals(icao_list):

    api_key = os.getenv('aerodatabox_api_key')

    berlin_timezone = timezone('Europe/Berlin')
    today = datetime.now(berlin_timezone).date()
    tomorrow = (today + timedelta(days=1))

    list_for_arrivals_df = []

    for icao in icao_list:

        times = [["00:00","11:59"],["12:00","23:59"]]

        for time in times:
            url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"

            querystring = {"direction":"Arrival","withCancelled":"false"}

            headers = {
                "X-RapidAPI-Key": api_key,
                "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
                }

            response = requests.request("GET", url, headers=headers, params=querystring)
            flights_resp = response.json()

            arrivals_df = pd.json_normalize(flights_resp["arrivals"])[["number", "airline.name", "movement.scheduledTime.local", "movement.terminal", "movement.airport.name", "movement.airport.icao"]]
            arrivals_df = arrivals_df.rename(columns={"number": "flight_number", "airline.name": "airline", "movement.scheduledTime.local": "arrival_time", "movement.terminal": "arrival_terminal", "movement.airport.name": "departure_city", "movement.airport.icao": "departure_airport_icao"})
            arrivals_df["arrival_airport_icao"] = icao
            arrivals_df["data_retrieved_on"] = datetime.now(berlin_timezone).strftime("%Y-%m-%d %H:%M:%S")
            arrivals_df = arrivals_df[["arrival_airport_icao", "flight_number", "airline", "arrival_time", "arrival_terminal", "departure_city", "departure_airport_icao", "data_retrieved_on"]]

            # fixing arrival_time
            arrivals_df["arrival_time"] = arrivals_df["arrival_time"].str.split("+").str[0]

            list_for_arrivals_df.append(arrivals_df)

    return pd.concat(list_for_arrivals_df, ignore_index=True)

In [51]:
icao_list = ["EDDF", "EDDB"]

tomorrows_flight_arrivals(icao_list)

Unnamed: 0,arrival_airport_icao,flight_number,airline,arrival_time,arrival_terminal,departure_city,departure_airport_icao,data_retrieved_on
0,EDDF,LH 4305,Lufthansa,2024-11-07 05:20,1,Victoria Falls,FVFA,2024-11-06 13:08:33
1,EDDF,OS 8500,Austrian,2024-11-07 05:20,1,Victoria Falls,FVFA,2024-11-06 13:08:33
2,EDDF,LX 9349,SWISS,2024-11-07 05:20,1,Victoria Falls,FVFA,2024-11-06 13:08:33
3,EDDF,SN 7207,Brussels,2024-11-07 05:20,1,Jo'anna,FAOR,2024-11-06 13:08:33
4,EDDF,OS 7316,Austrian,2024-11-07 05:20,1,Jo'anna,FAOR,2024-11-06 13:08:33
...,...,...,...,...,...,...,...,...
2262,EDDB,FR 1145,Ryanair,2024-11-07 22:55,2,Manchester,EGCC,2024-11-06 13:08:35
2263,EDDB,EW 8983,Eurowings,2024-11-07 23:10,1,Hurghada,HEGN,2024-11-06 13:08:35
2264,EDDB,IB 1809,Iberia,2024-11-07 23:10,1,Madrid,LEMD,2024-11-06 13:08:35
2265,EDDB,I2 1809,Iberia Express,2024-11-07 23:10,1,Madrid,LEMD,2024-11-06 13:08:35
