## Load API keys

In [3]:
import os
import yaml
import pandas as pd
import requests
import json
import time
import random
from datetime import datetime, timedelta

from IPython.display import clear_output

In [4]:
def load_api_keys(yaml_path=None):
    """
    Load API keys from a YAML file.

    :param yaml_path: Path to the YAML file (optional).
    :return: A dictionary with API keys.
    """
    # Default path to the keys.yml file
    if not yaml_path:
        yaml_path = os.path.expanduser("../project_keys.yml")
    
    try:
        with open(yaml_path, 'r') as file:
            data = yaml.safe_load(file)
            return data.get('api_keys', {})
    except FileNotFoundError:
        raise FileNotFoundError(f"API keys file not found at: {yaml_path}")
    except yaml.YAMLError as e:
        raise Exception(f"Error parsing YAML file: {e}")

In [5]:
# Load API keys
api_keys = load_api_keys()

# Access individual keys
flightradar_key = api_keys.get('Flightradar24_flight-lab-01')

## Flightradar API requests

#### REGA Aircraft List

In [6]:
rega_heli_regs = ['HB-ZRN',
                  'HB-ZRP',
                  'HB-ZRQ',
                  'HB-ZRR',
                  'HB-ZRS',
                  'HB-ZRT',
                  'HB-ZQO',
                  'HB-ZRU',
                  'HB-ZRW',
                  'HB-ZRX',
                  'HB-ZRY',
                  'HB-ZRZ',
                  'HB-ZRJ',
                  'HB-TIB',
                  'HB-ZQI',
                  'HB-ZQJ',
                  'HB-ZQL',
                  'HB-ZQM',
                  'HB-ZQN']

rega_airplane_regs = ['HB-JWC',
                      'HB-JWA',
                      'HB-JWB']

rega_ac_regs = rega_heli_regs + rega_airplane_regs

print(len(rega_heli_regs))
print(len(rega_airplane_regs))
print(len(rega_ac_regs))

rega_ac_regs_string = ''
for reg in rega_ac_regs :
    rega_ac_regs_string += reg + ','

rega_ac_regs_string


19
3
22


'HB-ZRN,HB-ZRP,HB-ZRQ,HB-ZRR,HB-ZRS,HB-ZRT,HB-ZQO,HB-ZRU,HB-ZRW,HB-ZRX,HB-ZRY,HB-ZRZ,HB-ZRJ,HB-TIB,HB-ZQI,HB-ZQJ,HB-ZQL,HB-ZQM,HB-ZQN,HB-JWC,HB-JWA,HB-JWB,'

## Flight positions

- **I have 3'397'535 credits remaining**
- **Run one year every 5 minutes -> Expected queries at 8 credits per query (60/5)x24x365x8 = 840'960 credits**
- **One year at 5m intervals would take approx. 29h**
- **Assess how many individual missions this gets me per year and validate credit usage for the number of queries**
- **See how many years I can do with full mission track data given 2'500'000 credits affords 62'500 missions worth of tracks, approx. 8 per day per registration (62'500 missions would take about 16h to pull once I have the flight IDs)**

### CREDITS NOTE: Consider doing just 2024 in review for REGA and using the rest for Pilatus (considering about 4000 SNs for Pilatus...)

In [7]:
def rega_flight_positions(flightradar_key,rega_ac_regs_string,save_path_root,start_date,end_date,splits_per_hour) :
  # API Parameters
  API_TOKEN = flightradar_key
  url = "https://fr24api.flightradar24.com/api/historic/flight-positions/full"
  
  params = {
    'registrations': rega_ac_regs_string #	Aircraft registration numbers (comma-separated values)
  }

  headers = {
          'Accept': 'application/json',
          'Accept-Version': 'v1',
          'Authorization': f'Bearer {API_TOKEN}',
          }

  # Define Save Path
  progress_save_path = "../data/raw" + "/progress_and_errors" + save_path_root.split("../data/raw")[1]
  last_progress_save = "No progress save yet"

  # Define date ranges
  dates = pd.date_range(start=start_date, end=end_date, freq='h')
  query_no = 0

  # Initialize the DataFrame to store data
  df_flight_position = pd.DataFrame()

  # Loop through the dates and timestamps to search for REGA data in API
  for j, date in enumerate(dates): 
    timestamps = []

    # Generate the timestamps for the hour based on the splits per hour
    for split in range(splits_per_hour):
      minute = int((60/splits_per_hour) * split)
      timestamp = int(datetime(date.year, date.month, date.day, date.hour, minute).timestamp())  # Generate the timestamp for the hour
      timestamps.append(timestamp)
    
    # Loop through the timestamps
    for k, timestamp in enumerate(timestamps): 
        query_no += 1
        time.sleep(0.67) 
        params['timestamp'] = timestamp 
        
        # Debugging line
        clear_output(wait=True)    
        print(f"Date range from {start_date} to {end_date}")
        print(f"Query {query_no} of expected {len(timestamps)*len(dates)}")
        print(last_progress_save)
        print(f"Date {j+1} of {len(dates)} and Timestamp {k+1} of {len(timestamps)}")
        print(f"Requesting on date {date} at timestamp {timestamp}") 

        try:
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            data = response.json()

            if isinstance(data, dict) and "data" in data and isinstance(data["data"], list) and data["data"]:
              print(json.dumps(data, indent=4))  # Debugging line
              # If data is found, append it to the DataFrame
              df_flight_position = pd.concat([df_flight_position, pd.DataFrame(data["data"])], ignore_index=True)

            else:
              print(f"No data found for date {date} at timestamp {timestamp}")
              
        except requests.exceptions.HTTPError as http_err:
              print(f"HTTP error occurred: {http_err}")
              time_of_error = int(time.time())
              save_path = progress_save_path + str(time_of_error) + "_http_error.csv"
              df_flight_position.to_csv(save_path, index=False)
  
        except Exception as err:
              print(f"An error occurred: {err}")
              time_of_error = int(time.time())
              save_path = progress_save_path + str(time_of_error) + "_error.csv"
              df_flight_position.to_csv(save_path, index=False)
    
    # In loop progress saving for every 7th day in date range
    if (query_no) % (7*24*splits_per_hour) == 0 :
      time_of_progress_save = int(time.time())
      save_path = progress_save_path + str(date) + "_" + str(time_of_progress_save) + ".csv"
      df_flight_position.to_csv(save_path, index=False)
      last_progress_save = f"The last progress save was on {str(date)} at {str(time_of_progress_save)}"

  # Final Saving
  time_of_run = int(time.time())
  save_path = save_path_root + str(splits_per_hour) + "_splitsperhour_final_" + str(time_of_run) + ".csv"
  df_flight_position.to_csv(save_path, index=False)

  print("GREAT SUCCESS!!!!!!!!!!")
  return df_flight_position

In [8]:
save_path_root = "../data/raw/df_flight_position_20221201_20231231_" # No need to end with .csv, leave as "_" for timestamp
start_date = datetime(2023, 11, 16, 0, 0, 0)
end_date = datetime(2023, 12, 31, 23, 59, 59)
splits_per_hour = 12

# rega_flight_positions = rega_flight_positions(flightradar_key,rega_ac_regs_string,save_path_root,start_date,end_date,splits_per_hour) 

In [9]:
rega_flight_positions_full = pd.read_csv("../data/raw/rega_flight_positions_full.csv")

# Drop duplicates based on unique ID 'fr24_id'
rega_flight_positions_full.drop_duplicates(subset=['fr24_id'], inplace=True)

 # Sort by unique ID 'fr24_id'
rega_flight_positions_full.sort_values(by=['fr24_id'], inplace=True)

# Verify the list length
print(len(rega_flight_positions_full))

# Save the cleaned data
rega_flight_positions_full.to_csv("../data/raw/rega_flight_positions_reduced.csv", index=False)


31175


#### Flight tracks

- Need to think of how to organize data, think IGC where each row encodes all information and each column is one flight
- Analyze (time permitting) to extract mission duration information

**NOTE: Squawk 7100 is used for SAR by REGA**

In [10]:
def rega_flight_track(flightradar_key,save_path_root,rega_flight_positions_path) :
  # API Parameters
  API_TOKEN = flightradar_key
  url = "https://fr24api.flightradar24.com/api/flight-tracks"

  headers = {
      'Accept': 'application/json',
      'Accept-Version': 'v1',
      'Authorization': f'Bearer {API_TOKEN}',
      }
  
  # Define Save Path
  progress_save_path = "../data/raw" + "/progress_and_errors" + save_path_root.split("../data/raw")[1]
  last_progress_save = "No progress save yet"

  # Flight ID extraction
  rega_flight_positions = pd.read_csv(rega_flight_positions_path)
  flight_ids = list(rega_flight_positions['fr24_id'].unique())

  # Initialize the DataFrame to store data
  df_flight_track = pd.DataFrame(columns=["flight_id", "mission"]).set_index("flight_id")

  # FR_24 ID Based Request Loop
  for i, flight_id in enumerate(flight_ids) :
    # API Parameterization with FR_24 ID and Wait Time
    time.sleep(0.67) 
    params = {
      'flight_id': flight_id #	Aircraft registration numbers (comma-separated values)
    }

    # Debugging line
    clear_output(wait=True)    
    print(f"Request number {i+1} of {len(flight_ids)}, requesting for flight_id {flight_id}") 
    print(last_progress_save)

    try:  
      response = requests.get(url, headers=headers, params=params)
      response.raise_for_status()
      data = response.json()
      print(json.dumps(data, indent=4))
      
      if isinstance(data[0], dict) and "tracks" in data[0] and isinstance(data[0]["tracks"], list) and data[0]["tracks"] :        
        # If data is found, append it to the DataFrame
        tracks_data = data[0]["tracks"]  
        df_flight_track.loc[flight_id] = [tracks_data]

      else:
        print(f"No data found for flight_id {flight_id}")

    except requests.exceptions.HTTPError as http_err:
      print(f"HTTP error occurred: {http_err}")
      time_of_error = int(time.time())
      save_path = progress_save_path + str(time_of_error) + "_http_error.csv"
      df_flight_track.to_csv(save_path, index=True)

    except Exception as err:
      print(f"An error occurred: {err}")
      time_of_error = int(time.time())
      save_path = progress_save_path + str(time_of_error) + "_error.csv"
      df_flight_track.to_csv(save_path, index=True)

    # In loop progress saving for every 400th mission date range
    if (i+1) % 500 == 0 :
      time_of_progress_save = int(time.time())
      save_path = progress_save_path + "mission_" + str(i+1) + "_of_" + str(len(flight_ids)) + "_" +str(time_of_progress_save) + ".csv"
      df_flight_track.to_csv(save_path, index=True)
      last_progress_save = f"The last progress save was on the {i+1}th mission at {str(time_of_progress_save)}"
      

  # Final Saving
  time_of_run = int(time.time())
  save_path = save_path_root + "final_" + str(time_of_run) + ".csv"
  df_flight_track.to_csv(save_path, index=True)

  print("GREAT SUCCESS!!!!!!!!!!")
  return df_flight_track

In [11]:
rega_flight_positions_path = "../data/raw/rega_flight_positions_reduced.csv"
save_path_root = "../data/raw/df_flight_tracks_20221201_20241231_" # No need to end with .csv, leave as "_" for timestamp
rega_flight_tracks = rega_flight_track(flightradar_key,save_path_root,rega_flight_positions_path)

Request number 31175 of 31175, requesting for flight_id 38f3b814
The last progress save was on the 31000th mission at 1738817100
[
    {
        "fr24_id": "38f3b814",
        "tracks": [
            {
                "timestamp": "2025-01-31T21:47:59Z",
                "lat": 46.52454,
                "lon": 6.64084,
                "alt": 0,
                "gspeed": 0,
                "vspeed": 0,
                "track": 45,
                "squawk": "0",
                "callsign": "RGA04",
                "source": "ADSB"
            },
            {
                "timestamp": "2025-01-31T21:48:48Z",
                "lat": 46.5244,
                "lon": 6.64056,
                "alt": 1800,
                "gspeed": 25,
                "vspeed": 576,
                "track": 263,
                "squawk": "0",
                "callsign": "RGA04",
                "source": "ADSB"
            },
            {
                "timestamp": "2025-01-31T21:48:58Z",
                "

In [12]:
# Load the DataFrame where the index is 'flight_id'
rega_flight_tracks = pd.read_csv("../data/raw/df_flight_tracks_20221201_20241231_final_1738817319.csv", index_col="flight_id")
# Print the number of unique 'flight_id' in the DataFrame
print(f"The number of individual mission tracks is: {len(rega_flight_tracks)}")
rega_flight_tracks 

The number of individual mission tracks is: 29082


Unnamed: 0_level_0,mission
flight_id,Unnamed: 1_level_1
2e638c5c,"[{'timestamp': '2022-11-30T22:34:28Z', 'lat': ..."
2e63ab93,"[{'timestamp': '2022-11-30T23:30:46Z', 'lat': ..."
2e63d85d,"[{'timestamp': '2022-12-01T00:58:57Z', 'lat': ..."
2e63e64c,"[{'timestamp': '2022-12-01T01:27:54Z', 'lat': ..."
2e63f7bd,"[{'timestamp': '2022-12-01T02:06:37Z', 'lat': ..."
...,...
38f387f3,"[{'timestamp': '2025-01-31T20:37:07Z', 'lat': ..."
38f39327,"[{'timestamp': '2025-01-31T20:52:37Z', 'lat': ..."
38f39e8a,"[{'timestamp': '2025-01-31T21:09:23Z', 'lat': ..."
38f3a2e1,"[{'timestamp': '2025-01-31T21:15:40Z', 'lat': ..."


In [13]:
# Extract the json-format list of dictionaries from flight_id '2e638c5c' (flight_id is the index) 
mission_data = rega_flight_tracks.loc['2e638c5c']['mission']
print(mission_data)

[{'timestamp': '2022-11-30T22:34:28Z', 'lat': 42.37343, 'lon': -71.01981, 'alt': 0, 'gspeed': 1, 'vspeed': 0, 'track': 351, 'squawk': '0', 'callsign': 'SAZ73', 'source': 'ADSB'}, {'timestamp': '2022-11-30T22:34:47Z', 'lat': 42.37362, 'lon': -71.0196, 'alt': 0, 'gspeed': 4, 'vspeed': 0, 'track': 45, 'squawk': '3435', 'callsign': 'SAZ73', 'source': 'ADSB'}, {'timestamp': '2022-11-30T22:35:01Z', 'lat': 42.37372, 'lon': -71.01938, 'alt': 0, 'gspeed': 3, 'vspeed': 0, 'track': 64, 'squawk': '3435', 'callsign': 'SAZ73', 'source': 'ADSB'}, {'timestamp': '2022-11-30T22:35:14Z', 'lat': 42.37365, 'lon': -71.01916, 'alt': 0, 'gspeed': 3, 'vspeed': 0, 'track': 118, 'squawk': '3435', 'callsign': 'SAZ73', 'source': 'ADSB'}, {'timestamp': '2022-11-30T22:35:30Z', 'lat': 42.37339, 'lon': -71.01879, 'alt': 0, 'gspeed': 7, 'vspeed': 0, 'track': 135, 'squawk': '3435', 'callsign': 'SAZ73', 'source': 'ADSB'}, {'timestamp': '2022-11-30T22:35:40Z', 'lat': 42.37317, 'lon': -71.01852, 'alt': 0, 'gspeed': 7, 'vsp

In [14]:

# Convert the string to a list of dictionaries
mission_data_list = json.loads(mission_data.replace("'", '"'))

# Convert the list of dictionaries to a DataFrame
mission_data_df = pd.DataFrame(mission_data_list)
mission_data_df

Unnamed: 0,timestamp,lat,lon,alt,gspeed,vspeed,track,squawk,callsign,source
0,2022-11-30T22:34:28Z,42.37343,-71.01981,0,1,0,351,0,SAZ73,ADSB
1,2022-11-30T22:34:47Z,42.37362,-71.01960,0,4,0,45,3435,SAZ73,ADSB
2,2022-11-30T22:35:01Z,42.37372,-71.01938,0,3,0,64,3435,SAZ73,ADSB
3,2022-11-30T22:35:14Z,42.37365,-71.01916,0,3,0,118,3435,SAZ73,ADSB
4,2022-11-30T22:35:30Z,42.37339,-71.01879,0,7,0,135,3435,SAZ73,ADSB
...,...,...,...,...,...,...,...,...,...,...
201,2022-11-30T23:55:00Z,46.45308,-59.89533,34975,508,64,76,3435,SAZ73,ADSB
202,2022-11-30T23:56:01Z,46.48730,-59.69284,34975,510,-64,76,3435,SAZ73,ADSB
203,2022-11-30T23:57:02Z,46.52095,-59.49165,34975,511,-64,76,3435,SAZ73,ADSB
204,2022-11-30T23:58:03Z,46.55457,-59.28832,35000,512,192,76,3435,SAZ73,ADSB
