In [1]:
from datetime import datetime, timezone, timedelta
import logging
from typing import Optional, Tuple

def date_string_to_day_range_epoch(date_string: str, date_format: str = "%Y-%m-%d") -> Optional[Tuple[int, int, str, str, str]]:
    if not date_string:
        logging.error("Input date string is empty or None.")
        raise ValueError("A date string must be provided for processing.")
        
    try:
        dt_start_of_day = datetime.strptime(date_string, date_format).replace(
            hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
        )

        dt_end_of_day_exclusive_boundary = dt_start_of_day + timedelta(days=1)
        dt_end_of_day_inclusive = dt_end_of_day_exclusive_boundary - timedelta(minutes=1)
        dt_midday = dt_start_of_day + timedelta(hours=12)

        start_epoch = int(dt_start_of_day.timestamp())
        end_epoch_exclusive = int(dt_end_of_day_exclusive_boundary.timestamp()) - 1

        API_TIME_FORMAT = "%Y-%m-%dT%H:%M"
        start_str = dt_start_of_day.strftime(API_TIME_FORMAT)
        midday_str = dt_midday.strftime(API_TIME_FORMAT)
        end_str_inclusive = dt_end_of_day_inclusive.strftime(API_TIME_FORMAT)

        return start_epoch, end_epoch_exclusive, start_str, midday_str, end_str_inclusive

    except ValueError as e:
        logging.error(f"Date conversion failed for string '{date_string}' with format '{date_format}': {e}")
        return None
    except Exception as e:
        logging.error(f"An unexpected error occurred during day range conversion: {e}")
        return None


In [2]:
start_epoch, end_epoch, start_str, mid_str, end_str = date_string_to_day_range_epoch("2025-01-06")
print(start_epoch, end_epoch, start_str, mid_str, end_str)

1736121600 1736207999 2025-01-06T00:00 2025-01-06T12:00 2025-01-06T23:59


Accessing OpenSky Network token for sending API requests

In [3]:
import requests
import json
import logging
from datetime import datetime

credentials_file_path = "credentials/opensky_credentials.json"

try:
    with open(credentials_file_path, 'r', encoding='utf-8') as f:
        credentials = json.load(f)
        logging.info("Successfully loaded credentials attributes.")
except FileNotFoundError:
    logging.error(f"Error: The file '{credentials_file_path}' was not found. Please check the path.")
except json.JSONDecodeError:
    logging.error(f"Error: The file '{credentials_file_path}' is not valid JSON.")
except Exception as e:
    logging.error(f"An unexpected error occurred: {e}")

# 1. Your OpenSky API Client Credentials
CLIENT_ID = credentials['clientId']
CLIENT_SECRET = credentials['clientSecret']

AUTH_URL = "https://auth.opensky-network.org/auth/realms/opensky-network/protocol/openid-connect/token"

def get_access_token(client_id, client_secret):
    """Requests a new access token from the OpenSky auth server."""
    logging.info("Requesting new Access Token...")
    
    headers = {
        "Content-Type": "application/x-www-form-urlencoded"
    }
    
    data = {
        "grant_type": "client_credentials",
        "client_id": client_id,
        "client_secret": client_secret
    }
    
    try:
        response = requests.post(AUTH_URL, headers=headers, data=data)
        response.raise_for_status()  
        token_data = response.json()
        
        # The token is valid for 'expires_in' seconds (usually 1800 seconds or 30 minutes)
        access_token = token_data.get("access_token")
        #expires_in = token_data.get("expires_in", 1800)
        
        logging.info(f"Successfully retrieved Access Token.")
        return access_token
        
    except requests.exceptions.RequestException as e:
        logging.error(f"Error requesting token: {e}")
        raise Exception

Function to send request for real-time arrivals and departures

In [4]:
def make_OpenSky_request(API_BASE_URL, endpoint, date, token):
    """Makes an API request using the Bearer Token."""
    if not token:
        logging.error("Error: No valid token available.")
        raise "notValidTokenError"
        
    url = f"{API_BASE_URL}{endpoint}"
    logging.info(f"\nMaking API request to {url}...")
    
    begin_ts, end_ts, _, _, _ = date_string_to_day_range_epoch(date)
    
    params = {
        "begin": begin_ts,
        "end": end_ts
    }
    
    logging.info(f"params: {params}")
    
    headers = {
        "Authorization": f"Bearer {token}"
    }
    
    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status() 
        return response
        
    except requests.exceptions.RequestException as e:
        logging.error(f"Error making API request: {e}")
        raise e

Function to request for arrivals, departure and individual aircrafts' routes

In [20]:
def make_OpenSky_request_with_param(API_BASE_URL, endpoint, param, airport_or_icao24_value, date, token):
    """Makes an API request using the Bearer Token."""
    if not token:
        logging.error("Error: No valid token available.")
        raise "notValidTokenError"
        
    url = f"{API_BASE_URL}{endpoint}"
    logging.info(f"\nMaking API request to {url}...")
    
    begin_ts, end_ts, _, _, _ = date_string_to_day_range_epoch(date)
    
    # As this function works for two REST API endpoints, depending on the param provided the params were selected  
    if param == "airport":
        params = {
            "airport": airport_or_icao24_value,
            "begin": begin_ts,
            "end": end_ts
        }
    else:
        params = {
            "icao24": airport_or_icao24_value,
            "begin": begin_ts,
            "end": end_ts
        }
    
    logging.info(f"Param:'{param}' is selected, therefore params: {params}")
    
    headers = {
        "Authorization": f"Bearer {token}"
    }
    
    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status() 
        return response
        
    except requests.exceptions.RequestException as e:
        logging.error(f"Error making API request: {e}")
        raise e

Read airport icao from airports table and apply for loop in the airflow dag.

In [27]:

token = get_access_token(CLIENT_ID, CLIENT_SECRET)

API_BASE_URL = "https://opensky-network.org/api"

columns = [
    'icao24', 'firstSeen', 'estDepartureAirport', 'lastSeen',
    'estArrivalAirport', 'callsign',
    'estDepartureAirportHorizDistance', 'estDepartureAirportVertDistance',
    'estArrivalAirportHorizDistance', 'estArrivalAirportVertDistance',
    'departureAirportCandidatesCount', 'arrivalAirportCandidatesCount'
]

airports_icao = ['VABB']

all_records = []

for icao in airports_icao:
    
    MAX_RETRIES=2
    retry = 0
    
    while retry < MAX_RETRIES:
        
        response = make_OpenSky_request_with_param(API_BASE_URL, "/flights/arrival", "airport", icao, "2025-02-28", token)

        if response.status_code == 200:
            data = response.json()
            logging.info(f"Successfully retrieved Aircraft vector records.")
            ingestion_timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
            columns = columns + ['airport_icao', 'ingestion_timestamp'] # icao and timestamp not in the response
            
            records = [tuple(item.get(col) for col in columns[0:-2]) + (icao, ingestion_timestamp) for item in data]
            
            all_records.extend(records)
            
            retry = 2

        elif response.status_code == 401:
            logging.warn("Token might have expired. Sending request to get new token...")
            # Retrieve the token
            token = get_access_token(CLIENT_ID, CLIENT_SECRET)
            
            retry += 1
            
            if retry == 2:
                logging.error(f"Already accessed token twice for this icao: {icao} request.")
                raise Exception
            
        else:
            logging.error(f"Error while retrieving the data. Status Code: {response.status_code}")
            raise Exception
        
print(all_records)

[('801546', 1740775591, None, 1740783589, 'VABB', 'AKJ560  ', None, None, 18571, 1946, 0, 1, 'VABB', '2025-11-25 07:38:18'), ('8015c1', 1740775802, None, 1740783550, 'VABB', 'AIC2256 ', None, None, 11123, 551, 0, 1, 'VABB', '2025-11-25 07:38:18'), ('8940c5', 1740774004, None, 1740783151, 'VABB', 'GFA056  ', None, None, 7046, 346, 0, 1, 'VABB', '2025-11-25 07:38:18'), ('800446', 1740777052, 'VIDP', 1740782951, 'VABB', 'BDA201  ', 2593, 83, 10724, 536, 1, 1, 'VABB', '2025-11-25 07:38:18'), ('8015c7', 1740773911, 'VECC', 1740782805, 'VABB', 'IGO5065 ', 161, 25, 8131, 399, 0, 1, 'VABB', '2025-11-25 07:38:18'), ('800b05', 1740772728, 'VIDP', 1740782545, 'VABB', 'BDA481  ', 2599, 67, 15581, 780, 1, 1, 'VABB', '2025-11-25 07:38:18'), ('8005f0', 1740774249, 'OMDB', 1740782474, 'VABB', 'AIC984  ', 2577, 171, 71, 26, 0, 1, 'VABB', '2025-11-25 07:38:18'), ('074047', 1740745658, 'FAOR', 1740782287, 'VABB', 'SEY260  ', 3796, 203, 4179, 201, 2, 1, 'VABB', '2025-11-25 07:38:18'), ('8013b2', 174077599

  ingestion_timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")


In [5]:
token = get_access_token(CLIENT_ID, CLIENT_SECRET)

API_BASE_URL = "https://opensky-network.org/api"

In [6]:
response = make_OpenSky_request(API_BASE_URL, "/flights/all", "2025-01-0", token)

In [7]:
data = response.json()

In [15]:
remaining_credits = response.headers.get('X-Rate-Limit-Remaining')
print(f"API Request Successful. Remaining Credits: {remaining_credits}")

API Request Successful. Remaining Credits: 2920


In [8]:
import pandas as pd

df = pd.DataFrame(data)

In [None]:
df[df['estArrivalAirport']=='VIDP']

Unnamed: 0,icao24,firstSeen,estDepartureAirport,lastSeen,estArrivalAirport,callsign,estDepartureAirportHorizDistance,estDepartureAirportVertDistance,estArrivalAirportHorizDistance,estArrivalAirportVertDistance,departureAirportCandidatesCount,arrivalAirportCandidatesCount
370,801524,1736136057,VECC,1736156114,VIDP,AKJ117C,815.0,20.0,1756.0,0.0,0,2
371,801645,1736167778,VNKT,1736172208,VIDP,AIC220,1586.0,147.0,3155.0,37.0,0,1
373,801645,1736143986,VECC,1736151149,VIDP,AIC769,903.0,33.0,3054.0,22.0,0,1
379,800d4f,1736168538,VIDP,1736186986,VIDP,IGO2326,1008.0,37.0,2219.0,22.0,1,2
380,800d4f,1736161208,,1736163342,VIDP,IGO2609,,,1803.0,7.0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...
89286,8013b1,1736143892,,1736145620,VIDP,AIC2430,,,3028.0,7.0,0,1
89290,80169e,1736156468,,1736179979,VIDP,AKJ112J,,,1971.0,0.0,0,2
89292,80169e,1736121612,,1736129209,VIDP,AKJ946W,,,1726.0,0.0,0,2
89300,800c64,1736166540,VOMM,1736175089,VIDP,AIC2468,679.0,121.0,3076.0,22.0,1,1


In [1]:
import requests
import urllib.parse
import json
import logging
import datetime
import pandas as pd

aerodatabox_api_key_file_path = "credentials/aerodatabox_api_key.json"

try:
    with open(aerodatabox_api_key_file_path, 'r', encoding='utf-8') as f:
        credentials = json.load(f)
        logging.info("Successfully loaded credentials attributes.")
except FileNotFoundError:
    logging.error(f"Error: The file '{aerodatabox_api_key_file_path}' was not found. Please check the path.")
except json.JSONDecodeError:
    logging.error(f"Error: The file '{aerodatabox_api_key_file_path}' is not valid JSON.")
except Exception as e:
    logging.error(f"An unexpected error occurred: {e}")


delays_base_url = "https://prod.api.market/api/v1/aedbx/aerodatabox"
codetype = "icao"
code = "KJFK"
endpoint = "airports/delays"
TIME_FROM = "2025-11-02T00:00"
TIME_TO = "2025-11-02T00:15"

# 1. URL-encode the time strings
encoded_from = urllib.parse.quote(TIME_FROM)
encoded_to = urllib.parse.quote(TIME_TO)

headers = {
    "accept": "application/json",
    "x-api-market-key": credentials['key'],
}

params = {
    "withLeg" : True
}


try:
    # Use the full_url for the request
    response = requests.get("https://prod.api.market/api/v1/aedbx/aerodatabox/airports/delays/2025-05-01", headers=headers)
    
    # Check for HTTP errors before trying to parse JSON
    response.raise_for_status() 
    
    data = response.json()
    
    df = pd.json_normalize(data)
    
except requests.exceptions.HTTPError as errh:
    logging.error(f"Http Error: {errh}")
except requests.exceptions.ConnectionError as errc:
    logging.error(f"Error Connecting: {errc}")
except requests.exceptions.Timeout as errt:
    logging.error(f"Timeout Error: {errt}")
except requests.exceptions.RequestException as e:
    # This catches the original exception and others not caught above
    logging.error(f"An unexpected API request error occurred: {e}")
except Exception as e:
    # Catches non-request errors, like JSON decoding failure
    logging.error(f"An unexpected error occurred: {e}")

In [5]:
df['from_utc'] = df['from'].apply(lambda x: x['utc'])
df['to_utc'] = df['to'].apply(lambda x: x['utc'])

# --- Extracting 'local' times ---
df['from_local'] = df['from'].apply(lambda x: x['local'])
df['to_local'] = df['to'].apply(lambda x: x['local'])

In [6]:
df

Unnamed: 0,airportIcao,from,to,departuresDelayInformation,arrivalsDelayInformation,from_utc,to_utc,from_local,to_local
0,KDAL,"{'utc': '2025-04-30 22:00Z', 'local': '2025-04...","{'utc': '2025-05-01 00:00Z', 'local': '2025-04...","{'numTotal': 26, 'numQualifiedTotal': 21, 'num...","{'numTotal': 49, 'numQualifiedTotal': 46, 'num...",2025-04-30 22:00Z,2025-05-01 00:00Z,2025-04-30 17:00-05:00,2025-04-30 19:00-05:00
1,EPKK,"{'utc': '2025-04-30 22:00Z', 'local': '2025-05...","{'utc': '2025-05-01 00:00Z', 'local': '2025-05...","{'numTotal': 0, 'numQualifiedTotal': 0, 'numCa...","{'numTotal': 9, 'numQualifiedTotal': 9, 'numCa...",2025-04-30 22:00Z,2025-05-01 00:00Z,2025-05-01 00:00+02:00,2025-05-01 02:00+02:00
2,KDFW,"{'utc': '2025-04-30 22:00Z', 'local': '2025-04...","{'utc': '2025-05-01 00:00Z', 'local': '2025-04...","{'numTotal': 141, 'numQualifiedTotal': 70, 'nu...","{'numTotal': 178, 'numQualifiedTotal': 109, 'n...",2025-04-30 22:00Z,2025-05-01 00:00Z,2025-04-30 17:00-05:00,2025-04-30 19:00-05:00
3,KEWR,"{'utc': '2025-04-30 22:00Z', 'local': '2025-04...","{'utc': '2025-05-01 00:00Z', 'local': '2025-04...","{'numTotal': 85, 'numQualifiedTotal': 70, 'num...","{'numTotal': 55, 'numQualifiedTotal': 45, 'num...",2025-04-30 22:00Z,2025-05-01 00:00Z,2025-04-30 18:00-04:00,2025-04-30 20:00-04:00
4,LPPT,"{'utc': '2025-04-30 22:00Z', 'local': '2025-04...","{'utc': '2025-05-01 00:00Z', 'local': '2025-05...","{'numTotal': 18, 'numQualifiedTotal': 18, 'num...","{'numTotal': 26, 'numQualifiedTotal': 26, 'num...",2025-04-30 22:00Z,2025-05-01 00:00Z,2025-04-30 23:00+01:00,2025-05-01 01:00+01:00
...,...,...,...,...,...,...,...,...,...
1457,USTR,"{'utc': '2025-04-30 22:00Z', 'local': '2025-05...","{'utc': '2025-05-01 00:00Z', 'local': '2025-05...","{'numTotal': 0, 'numQualifiedTotal': 0, 'numCa...","{'numTotal': 6, 'numQualifiedTotal': 6, 'numCa...",2025-04-30 22:00Z,2025-05-01 00:00Z,2025-05-01 03:00+05:00,2025-05-01 05:00+05:00
1458,YPAD,"{'utc': '2025-04-30 22:00Z', 'local': '2025-05...","{'utc': '2025-05-01 00:00Z', 'local': '2025-05...","{'numTotal': 22, 'numQualifiedTotal': 22, 'num...","{'numTotal': 14, 'numQualifiedTotal': 14, 'num...",2025-04-30 22:00Z,2025-05-01 00:00Z,2025-05-01 07:30+09:30,2025-05-01 09:30+09:30
1459,KSFO,"{'utc': '2025-04-30 22:00Z', 'local': '2025-04...","{'utc': '2025-05-01 00:00Z', 'local': '2025-04...","{'numTotal': 64, 'numQualifiedTotal': 64, 'num...","{'numTotal': 61, 'numQualifiedTotal': 61, 'num...",2025-04-30 22:00Z,2025-05-01 00:00Z,2025-04-30 15:00-07:00,2025-04-30 17:00-07:00
1460,SAEZ,"{'utc': '2025-04-30 22:00Z', 'local': '2025-04...","{'utc': '2025-05-01 00:00Z', 'local': '2025-04...","{'numTotal': 4, 'numQualifiedTotal': 4, 'numCa...","{'numTotal': 8, 'numQualifiedTotal': 7, 'numCa...",2025-04-30 22:00Z,2025-05-01 00:00Z,2025-04-30 19:00-03:00,2025-04-30 21:00-03:00


In [4]:
df[df[['airportIcao', 'from','to']]]

ValueError: Boolean array expected for the condition, not object

Extracting Arrivals and Departures from AeroDataBox API

In [None]:
import requests
import urllib.parse
import json
import logging
import datetime

aerodatabox_api_key_file_path = "credentials/aerodatabox_api_key.json"

try:
    with open(aerodatabox_api_key_file_path, 'r', encoding='utf-8') as f:
        credentials = json.load(f)
        logging.info("Successfully loaded credentials attributes.")
except FileNotFoundError:
    logging.error(f"Error: The file '{aerodatabox_api_key_file_path}' was not found. Please check the path.")
except json.JSONDecodeError:
    logging.error(f"Error: The file '{aerodatabox_api_key_file_path}' is not valid JSON.")
except Exception as e:
    logging.error(f"An unexpected error occurred: {e}")


delays_base_url = "https://prod.api.market/api/v1/aedbx/aerodatabox"
codetype = "icao"
code = "KJFK"
endpoint = f"flights/airports/{codetype}/{code}"
TIME_FROM = "2025-11-02T00:00"
TIME_TO = "2025-11-02T00:15"

# 1. URL-encode the time strings
encoded_from = urllib.parse.quote(TIME_FROM)
encoded_to = urllib.parse.quote(TIME_TO)

headers = {
    "accept": "application/json",
    "x-api-market-key": credentials['key'],
}

params = {
    "withLeg" : True
}

# Combine the base URL and the endpoint for the final request URL
full_url = f"{delays_base_url}/{endpoint}/{encoded_from}/{encoded_to}"

try:
    # Use the full_url for the request
    response = requests.get(full_url, params=params, headers=headers)
    
    # Check for HTTP errors before trying to parse JSON
    response.raise_for_status() 
    
    print(response.json())
    
except requests.exceptions.HTTPError as errh:
    logging.error(f"Http Error: {errh}")
except requests.exceptions.ConnectionError as errc:
    logging.error(f"Error Connecting: {errc}")
except requests.exceptions.Timeout as errt:
    logging.error(f"Timeout Error: {errt}")
except requests.exceptions.RequestException as e:
    # This catches the original exception and others not caught above
    logging.error(f"An unexpected API request error occurred: {e}")
except Exception as e:
    # Catches non-request errors, like JSON decoding failure
    logging.error(f"An unexpected error occurred: {e}")

In [4]:
data = response.json()

In [7]:
departures = data['departures']
arrivals = data['arrivals']

The below one process can be used to flatten the json and assign the keys. And also another method is to use schema mapping config using yaml.

In [13]:
def flatten_json(nested_json, parent_key='', sep='_'):
    items = []
    for k, v in nested_json.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_json(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

In [14]:
flattened_departures = [flatten_json(rec) for rec in departures]

# Collect all keys from all records
all_keys = sorted({key for rec in flattened_departures for key in rec.keys()})

# Align records so that missing keys are filled with None
aligned_records = [
    tuple(rec.get(k) for k in all_keys)
    for rec in flattened_departures
]

print(aligned_records)

At present we follow the manual process which give us more grip on attributes

In [16]:
def get_value(data, path, default=None):
    """Safely get a nested value from a dict using dot notation."""
    keys = path.split('.')
    for key in keys:
        if isinstance(data, dict):
            data = data.get(key, default)
        else:
            return default
    return data

In [None]:
from datetime import datetime
departure_records=[]
for departure in departures:
    record = {
    "flight_number": get_value(departure, "number"),
    "flight_date": "2025-05-02",  # can be dynamic later
    "callsign": get_value(departure, "callSign"),
    "status": get_value(departure, "codeshareStatus"),
    "iscargo": get_value(departure, "isCargo"),
    "aircraft_reg": get_value(departure, "aircraft.reg"),
    "aircraft_modeS": get_value(departure, "aircraft.modeS"),
    "aircraft_model": get_value(departure, "aircraft.model"),
    "airline_name": get_value(departure, "airline.name"),
    "airline_iata": get_value(departure, "airline.iata"),
    "airline_icao": get_value(departure, "airline.icao"),
    "airport_icao": "icao",  # dynamic later
    "departure_scheduledtime_utc": get_value(departure, "departure.scheduledTime.utc"),
    "departure_scheduledtime_local": get_value(departure, "departure.scheduledTime.local"),
    "departure_revisedtime_utc": get_value(departure, "departure.revisedTime.utc"),
    "departure_revisedtime_local": get_value(departure, "departure.revisedTime.local"),
    "departure_runwaytime_utc": get_value(departure, "departure.runwayTime.utc"),
    "departure_runwaytime_local": get_value(departure, "departure.runwayTime.local"),
    "departure_terminal": get_value(departure, "departure.terminal"),
    "departure_quality": get_value(departure, "departure.quality"),
    "arrival_airport_icao": get_value(departure, "arrival.airport.icao"),
    "arrival_airport_iata": get_value(departure, "arrival.airport.iata"),
    "arrival_airport_name": get_value(departure, "arrival.airport.name"),
    "arrival_airport_timezone": get_value(departure, "arrival.airport.timeZone"),  # note: correct key name
    "arrival_scheduledtime_utc": get_value(departure, "arrival.scheduledTime.utc"),
    "arrival_scheduledtime_local": get_value(departure, "arrival.scheduledTime.local"),
    "arrival_revisedtime_utc": get_value(departure, "arrival.revisedTime.utc"),
    "arrival_revisedtime_local": get_value(departure, "arrival.revisedTime.local"),
    "arrival_runwaytime_utc": get_value(departure, "arrival.runwayTime.utc"),
    "arrival_runwaytime_local": get_value(departure, "arrival.runwayTime.local"),
    "arrival_terminal": get_value(departure, "arrival.terminal"),
    "arrival_gate": get_value(departure, "arrival.gate"),
    "arrival_baggagebelt": get_value(departure, "arrival.baggageBelt"),
    "arrival_quality": get_value(departure, "arrival.quality"),
    "ingestion_timestamp": datetime.utcnow().isoformat(),
    "data_source": "AeroDataBox"
    }
    
    departure_records.append(record)

columns = list(record.keys())

In [26]:
all_departures = [tuple(item.get(col) for col in columns) for item in departure_records]

In [72]:


def make_aerodatabox_request(api_key, BASE_URL, endpoint, code_type, code, TIME_FROM, TIME_TO):
    
    # 1. URL-encode the time strings
    encoded_from = urllib.parse.quote(TIME_FROM)
    encoded_to = urllib.parse.quote(TIME_TO)

    headers = {
        "accept": "application/json",
        "x-api-market-key": api_key,
    }
    
    params = {
    "withLeg" : True
    }
    
    full_url = f"{BASE_URL}/{endpoint}/{code_type}/{code}/{encoded_from}/{encoded_to}"

    logging.info(f"Sending api request for the icao: {code}.....")
    logging.info(f"URL: {full_url}")
    
    print(f"Sending api request for the icao: {code}.....")
    print(f"URL: {full_url}")
    
    try:
        response = requests.get(full_url, params=params, headers=headers)
        # Check for HTTP errors before trying to parse JSON
        response.raise_for_status() 
        
        print(response)
        
        return response
        
    except requests.exceptions.HTTPError as errh:
        logging.error(f"Http Error: {errh}")
        raise Exception
    except requests.exceptions.ConnectionError as errc:
        logging.error(f"Error Connecting: {errc}")
        raise Exception
    except requests.exceptions.Timeout as errt:
        logging.error(f"Timeout Error: {errt}")
        raise Exception
    except requests.exceptions.RequestException as e:
        logging.error(f"An unexpected API request error occurred: {e}")
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        
def get_value(data, path, default=None):
    """Safely get a nested value from a dict using dot notation."""
    keys = path.split('.')
    for key in keys:
        if isinstance(data, dict):
            data = data.get(key, default)
        else:
            return default
    return data

import logging
from datetime import datetime

def json_reader(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            credentials = json.load(f)
            logging.info("Successfully loaded credentials.")
        return credentials
    except FileNotFoundError:
        logging.error(f"Error: The file '{file_path}' was not found. Please check the path.")
    except json.JSONDecodeError:
        logging.error(f"Error: The file '{file_path}' is not valid JSON.")
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        
def fetch_arrivals_departures_data(api_key_file_path: str, base_url: str, endpoint: str, airport_icao: str, date: str):
    """
    Fetch arrivals and departures data from AeroDataBox for a given airport and date.
    Returns tuples of (departures, arrivals) with column order preserved.
    """

    # --- Helper functions ---

    def base_flight_fields(record: dict) -> dict:
        """Fields shared by both arrivals and departures."""
        return {
            "flight_number": get_value(record, "number"),
            "flight_date": date,
            "callsign": get_value(record, "callSign"),
            "status": get_value(record, "codeshareStatus"),
            "iscargo": get_value(record, "isCargo"),
            "aircraft_reg": get_value(record, "aircraft.reg"),
            "aircraft_modeS": get_value(record, "aircraft.modeS"),
            "aircraft_model": get_value(record, "aircraft.model"),
            "airline_name": get_value(record, "airline.name"),
            "airline_iata": get_value(record, "airline.iata"),
            "airline_icao": get_value(record, "airline.icao"),
            "airport_icao": airport_icao,
            "ingestion_timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
            "data_source": "AeroDataBox"
        }

    def parse_departure_record(dep: dict) -> dict:
        rec = base_flight_fields(dep)
        rec.update({
            # Current airport = departure
            "departure_scheduledtime_utc": get_value(dep, "departure.scheduledTime.utc"),
            "departure_scheduledtime_local": get_value(dep, "departure.scheduledTime.local"),
            "departure_revisedtime_utc": get_value(dep, "departure.revisedTime.utc"),
            "departure_revisedtime_local": get_value(dep, "departure.revisedTime.local"),
            "departure_runwaytime_utc": get_value(dep, "departure.runwayTime.utc"),
            "departure_runwaytime_local": get_value(dep, "departure.runwayTime.local"),
            "departure_terminal": get_value(dep, "departure.terminal"),
            "departure_runway": get_value(dep, "departure.runway"),
            "departure_quality": get_value(dep, "departure.quality"),
            # Destination airport info
            "arrival_airport_icao": get_value(dep, "arrival.airport.icao"),
            "arrival_airport_iata": get_value(dep, "arrival.airport.iata"),
            "arrival_airport_name": get_value(dep, "arrival.airport.name"),
            "arrival_airport_timezone": get_value(dep, "arrival.airport.timeZone"),
            "arrival_scheduledtime_utc": get_value(dep, "arrival.scheduledTime.utc"),
            "arrival_scheduledtime_local": get_value(dep, "arrival.scheduledTime.local"),
            "arrival_revisedtime_utc": get_value(dep, "arrival.revisedTime.utc"),
            "arrival_revisedtime_local": get_value(dep, "arrival.revisedTime.local"),
            "arrival_runwaytime_utc": get_value(dep, "arrival.runwayTime.utc"),
            "arrival_runwaytime_local": get_value(dep, "arrival.runwayTime.local"),
            "arrival_terminal": get_value(dep, "arrival.terminal"),
            "arrival_gate": get_value(dep, "arrival.gate"),
            "arrival_baggagebelt": get_value(dep, "arrival.baggageBelt"),
            "arrival_quality": get_value(dep, "arrival.quality")
        })

        return rec

    def parse_arrival_record(arr: dict) -> dict:
        rec = base_flight_fields(arr)
        rec.update({
            # Origin airport info (note: fixed path naming bug)
            "departure_airport_icao": get_value(arr, "departure.airport.icao"),
            "departure_airport_iata": get_value(arr, "departure.airport.iata"),
            "departure_airport_name": get_value(arr, "departure.airport.name"),
            "departure_airport_timezone": get_value(arr, "departure.airport.timeZone"),
            "departure_scheduledtime_utc": get_value(arr, "departure.scheduledTime.utc"),
            "departure_scheduledtime_local": get_value(arr, "departure.scheduledTime.local"),
            "departure_revisedtime_utc": get_value(arr, "departure.revisedTime.utc"),
            "departure_revisedtime_local": get_value(arr, "departure.revisedTime.local"),
            "departure_runwaytime_utc": get_value(arr, "departure.runwayTime.utc"),
            "departure_runwaytime_local": get_value(arr, "departure.runwayTime.local"),
            "departure_terminal": get_value(arr, "departure.terminal"),
            "departure_runway": get_value(arr, "departure.runway"),
            "departure_quality": get_value(arr, "departure.quality"),
            # Current airport = arrival
            "arrival_scheduledtime_utc": get_value(arr, "arrival.scheduledTime.utc"),
            "arrival_scheduledtime_local": get_value(arr, "arrival.scheduledTime.local"),
            "arrival_revisedtime_utc": get_value(arr, "arrival.revisedTime.utc"),
            "arrival_revisedtime_local": get_value(arr, "arrival.revisedTime.local"),
            "arrival_runwaytime_utc": get_value(arr, "arrival.runwayTime.utc"),
            "arrival_runwaytime_local": get_value(arr, "arrival.runwayTime.local"),
            "arrival_terminal": get_value(arr, "arrival.terminal"),
            "arrival_runway": get_value(arr, "arrival.runway"),
            "arrival_gate": get_value(arr, "arrival.gate"),
            "arrival_baggagebelt": get_value(arr, "arrival.baggageBelt"),
            "arrival_quality": get_value(arr, "arrival.quality")
        })
        return rec

    # --- API calls in two halves ---
    halves = [
        ("first_half", start_str, mid_str),
        ("second_half", mid_str, end_str)
    ]
    
    credentials = json_reader(api_key_file_path)
    api_key = credentials['key']
    
    _, _, start_str, mid_str, end_str = date_string_to_day_range_epoch(date)

    departure_records, arrival_records = [], []

    for half_name, time_from, time_to in halves:
        response = make_aerodatabox_request(api_key, base_url, endpoint, "icao", airport_icao, time_from, time_to)
        
        if response.status_code == 200:
            data = response.json()
            logging.info(f"Retrieved flight data for {airport_icao} ({half_name}).")

            departures = data.get("departures", [])
            print(departures)
            arrivals = data.get("arrivals", [])

            departure_records.extend(parse_departure_record(d) for d in departures)
            arrival_records.extend(parse_arrival_record(a) for a in arrivals)

            if not departures:
                logging.warning(f"No departures found for {airport_icao} ({half_name}).")
            if not arrivals:
                logging.warning(f"No arrivals found for {airport_icao} ({half_name}).")

        elif response.status_code == 204:
            logging.warning(f"No content for {airport_icao} in {half_name}.")
            continue
        else:
            logging.error(f"AeroDataBox API error {response.status_code}: {response.text}")
            raise RuntimeError(f"AeroDataBox API error {response.status_code}: {response.text}")

    # Safely derive schema
    departure_columns = list(departure_records[0].keys()) if departure_records else []
    arrival_columns = list(arrival_records[0].keys()) if arrival_records else []

    icao_departures = [tuple(rec.get(col) for col in departure_columns) for rec in departure_records]
    icao_arrivals = [tuple(rec.get(col) for col in arrival_columns) for rec in arrival_records]

    return icao_departures, icao_arrivals

In [None]:
base_url = "https://prod.api.market/api/v1/aedbx/aerodatabox"
codetype = "icao"
code = "EDDN"
endpoint = f"flights/airports/{codetype}/{code}"
TIME_FROM = "2025-11-02T00:00"
TIME_TO = "2025-11-02T00:15"

departures, arrivals = fetch_arrivals_departures_data("credentials/aerodatabox_api_key.json", base_url, "flights/airports", code, "2025-01-02")