## Load API keys

In [1]:
import os
import yaml
import pandas as pd
import requests
import json
import time
import random
from datetime import datetime, timedelta

from IPython.display import clear_output

In [2]:
def load_api_keys(yaml_path=None):
    """
    Load API keys from a YAML file.

    :param yaml_path: Path to the YAML file (optional).
    :return: A dictionary with API keys.
    """
    # Default path to the keys.yml file
    if not yaml_path:
        yaml_path = os.path.expanduser("../project_keys.yml")
    
    try:
        with open(yaml_path, 'r') as file:
            data = yaml.safe_load(file)
            return data.get('api_keys', {})
    except FileNotFoundError:
        raise FileNotFoundError(f"API keys file not found at: {yaml_path}")
    except yaml.YAMLError as e:
        raise Exception(f"Error parsing YAML file: {e}")

In [3]:
# Load API keys
api_keys = load_api_keys()

# Access individual keys
flightradar_key = api_keys.get('Flightradar24_flight-lab-01')

## Flightradar API requests

#### REGA Aircraft List

In [4]:
rega_heli_regs = ['HB-ZRN',
                  'HB-ZRP',
                  'HB-ZRQ',
                  'HB-ZRR',
                  'HB-ZRS',
                  'HB-ZRT',
                  'HB-ZQO',
                  'HB-ZRU',
                  'HB-ZRW',
                  'HB-ZRX',
                  'HB-ZRY',
                  'HB-ZRZ',
                  'HB-ZRJ',
                  'HB-TIB',
                  'HB-ZQI',
                  'HB-ZQJ',
                  'HB-ZQL',
                  'HB-ZQM',
                  'HB-ZQN']

rega_airplane_regs = ['HB-JWC',
                      'HB-JWA',
                      'HB-JWB']

rega_ac_regs = rega_heli_regs + rega_airplane_regs

print(len(rega_heli_regs))
print(len(rega_airplane_regs))
print(len(rega_ac_regs))

rega_ac_regs_string = ''
for reg in rega_ac_regs :
    rega_ac_regs_string += reg + ','

rega_ac_regs_string


19
3
22


'HB-ZRN,HB-ZRP,HB-ZRQ,HB-ZRR,HB-ZRS,HB-ZRT,HB-ZQO,HB-ZRU,HB-ZRW,HB-ZRX,HB-ZRY,HB-ZRZ,HB-ZRJ,HB-TIB,HB-ZQI,HB-ZQJ,HB-ZQL,HB-ZQM,HB-ZQN,HB-JWC,HB-JWA,HB-JWB,'

## Flight positions

- **I have 3'397'535 credits remaining**
- **Run one year every 5 minutes -> Expected queries at 8 credits per query (60/5)x24x365x8 = 840'960 credits**
- **One year at 5m intervals would take approx. 29h**
- **Assess how many individual missions this gets me per year and validate credit usage for the number of queries**
- **See how many years I can do with full mission track data given 2'500'000 credits affords 62'500 missions worth of tracks, approx. 8 per day per registration (62'500 missions would take about 16h to pull once I have the flight IDs)**

### CREDITS NOTE: Consider doing just 2024 in review for REGA and using the rest for Pilatus (considering about 4000 SNs for Pilatus...)

In [5]:
def rega_flight_positions(flightradar_key,rega_ac_regs_string,save_path_root,start_date,end_date,splits_per_hour) :
  # API Parameters
  API_TOKEN = flightradar_key
  url = "https://fr24api.flightradar24.com/api/historic/flight-positions/full"
  
  params = {
    'registrations': rega_ac_regs_string #	Aircraft registration numbers (comma-separated values)
  }

  headers = {
          'Accept': 'application/json',
          'Accept-Version': 'v1',
          'Authorization': f'Bearer {API_TOKEN}',
          }

  # Define Save Path
  progress_save_path = "../data/raw" + "/progress_and_errors" + save_path_root.split("../data/raw")[1]
  last_progress_save = "No progress save yet"

  # Define date ranges
  dates = pd.date_range(start=start_date, end=end_date, freq='h')
  query_no = 0

  # Initialize the DataFrame to store data
  df_flight_position = pd.DataFrame()

  # Loop through the dates and timestamps to search for REGA data in API
  for j, date in enumerate(dates): 
    timestamps = []

    # Generate the timestamps for the hour based on the splits per hour
    for split in range(splits_per_hour):
      minute = int((60/splits_per_hour) * split)
      timestamp = int(datetime(date.year, date.month, date.day, date.hour, minute).timestamp())  # Generate the timestamp for the hour
      timestamps.append(timestamp)
    
    # Loop through the timestamps
    for k, timestamp in enumerate(timestamps): 
        query_no += 1
        time.sleep(0.67) 
        params['timestamp'] = timestamp 
        
        # Debugging line
        clear_output(wait=True)    
        print(f"Date range from {start_date} to {end_date}")
        print(f"Query {query_no} of expected {len(timestamps)*len(dates)}")
        print(last_progress_save)
        print(f"Date {j+1} of {len(dates)} and Timestamp {k+1} of {len(timestamps)}")
        print(f"Requesting on date {date} at timestamp {timestamp}") 

        try:
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            data = response.json()

            if isinstance(data, dict) and "data" in data and isinstance(data["data"], list) and data["data"]:
              print(json.dumps(data, indent=4))  # Debugging line
              # If data is found, append it to the DataFrame
              df_flight_position = pd.concat([df_flight_position, pd.DataFrame(data["data"])], ignore_index=True)

            else:
              print(f"No data found for date {date} at timestamp {timestamp}")
              
        except requests.exceptions.HTTPError as http_err:
              print(f"HTTP error occurred: {http_err}")
              time_of_error = int(time.time())
              save_path = progress_save_path + str(time_of_error) + "_http_error.csv"
              df_flight_position.to_csv(save_path, index=False)
  
        except Exception as err:
              print(f"An error occurred: {err}")
              time_of_error = int(time.time())
              save_path = progress_save_path + str(time_of_error) + "_error.csv"
              df_flight_position.to_csv(save_path, index=False)
    
    # In loop progress saving for every 7th day in date range
    if (query_no) % (7*24*splits_per_hour) == 0 :
      time_of_progress_save = int(time.time())
      save_path = progress_save_path + str(date) + "_" + str(time_of_progress_save) + ".csv"
      df_flight_position.to_csv(save_path, index=False)
      last_progress_save = f"The last progress save was on {str(date)} at {str(time_of_progress_save)}"

  # Final Saving
  time_of_run = int(time.time())
  save_path = save_path_root + str(splits_per_hour) + "_splitsperhour_final_" + str(time_of_run) + ".csv"
  df_flight_position.to_csv(save_path, index=False)

  print("GREAT SUCCESS!!!!!!!!!!")
  return df_flight_position

In [6]:
save_path_root = "../data/raw/df_flight_position_20221201_20231231_" # No need to end with .csv, leave as "_" for timestamp
start_date = datetime(2023, 11, 16, 0, 0, 0)
end_date = datetime(2023, 12, 31, 23, 59, 59)
splits_per_hour = 12

rega_flight_positions = rega_flight_positions(flightradar_key,rega_ac_regs_string,save_path_root,start_date,end_date,splits_per_hour) 

Date range from 2023-11-16 00:00:00 to 2023-12-31 23:59:59
Query 177 of expected 13248
No progress save yet
Date 15 of 1104 and Timestamp 9 of 12
Requesting on date 2023-11-16 14:00:00 at timestamp 1700142000


KeyboardInterrupt: 

In [8]:
df1 = pd.read_csv("../data/raw/df_flight_position_20240101_20241231_2024-05-12 23:00:00_1738403045.csv")
df2 = pd.read_csv("../data/raw/df_flight_position_202405013_20241231_2024-11-17 23:00:00_1738489014.csv")
df3 = pd.read_csv("../data/raw/df_flight_position_20241118_20250131_12_splitsperhour_final_1738513140.csv")
df4 = pd.read_csv("../data/raw/df_flight_position_20221201_20231231_2023-05-10 23:00:00_1738568568.csv")
df5 = pd.read_csv("../data/raw/df_flight_position_20221201_20231231_2023-11-15 23:00:00_1738663321.csv")

# Append df1 and df2
df = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)
df.info()
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 202416 entries, 0 to 202415
Data columns (total 22 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   fr24_id       202416 non-null  object 
 1   flight        185665 non-null  object 
 2   callsign      201967 non-null  object 
 3   lat           202416 non-null  float64
 4   lon           202416 non-null  float64
 5   track         202416 non-null  int64  
 6   alt           202416 non-null  int64  
 7   gspeed        202416 non-null  int64  
 8   vspeed        202416 non-null  int64  
 9   squawk        202416 non-null  int64  
 10  timestamp     202416 non-null  object 
 11  source        202416 non-null  object 
 12  hex           202416 non-null  object 
 13  type          202416 non-null  object 
 14  reg           202416 non-null  object 
 15  painted_as    200513 non-null  object 
 16  operating_as  200513 non-null  object 
 17  orig_iata     171097 non-null  object 
 18  orig

Unnamed: 0,fr24_id,flight,callsign,lat,lon,track,alt,gspeed,vspeed,squawk,...,hex,type,reg,painted_as,operating_as,orig_iata,orig_icao,dest_iata,dest_icao,eta
0,337327d8,RGA2,RGA02,47.65457,7.52488,8,2575,124,-448,7100,...,4B43B1,EC45,HB-ZQN,RGA,RGA,BSL,LFSB,,,
1,337327d8,RGA2,RGA02,47.82994,7.66921,40,2525,154,0,7100,...,4B43B1,EC45,HB-ZQN,RGA,RGA,BSL,LFSB,,,
2,337327d8,RGA2,RGA02,48.00197,7.85825,45,3500,144,576,7100,...,4B43B1,EC45,HB-ZQN,RGA,RGA,BSL,LFSB,,,
3,337327d8,RGA2,RGA02,48.12985,8.04826,213,3200,21,-640,7100,...,4B43B1,EC45,HB-ZQN,RGA,RGA,BSL,LFSB,,,
4,33733dd4,RGA2,RGA02,48.07436,7.92854,233,3025,86,-128,7100,...,4B43B1,EC45,HB-ZQN,RGA,RGA,QCF,EDTF,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202411,32d8b704,SAZ74,SAZ74,41.08882,-75.14168,114,8575,312,-1792,6073,...,4B1A02,CL60,HB-JWC,SAZ,RGA,SLC,KSLC,TEB,KTEB,2023-11-15T22:39:49Z
202412,32d8b704,SAZ74,SAZ74,41.03737,-74.63141,103,3525,291,-896,6073,...,4B1A02,CL60,HB-JWC,SAZ,RGA,SLC,KSLC,TEB,KTEB,2023-11-15T22:38:09Z
202413,32d8b704,SAZ74,SAZ74,41.04739,-74.11976,111,2625,236,-128,6073,...,4B1A02,CL60,HB-JWC,SAZ,RGA,SLC,KSLC,TEB,KTEB,2023-11-15T22:42:39Z
202414,32d8b704,SAZ74,SAZ74,40.86341,-74.05853,183,0,112,-640,6073,...,4B1A02,CL60,HB-JWC,SAZ,RGA,SLC,KSLC,TEB,KTEB,2023-11-15T22:44:33Z


#### Flight tracks

- Need to think of how to organize data, think IGC where each row encodes all information and each column is one flight
- Analyze (time permitting) to extract mission duration information

**NOTE: Squawk 7100 is used for SAR by REGA**

In [1]:
def rega_flight_track(flightradar_key,save_path_root,rega_flight_positions_path) :
  # API Parameters
  API_TOKEN = flightradar_key
  url = "https://fr24api.flightradar24.com/api/flight-tracks"

  headers = {
      'Accept': 'application/json',
      'Accept-Version': 'v1',
      'Authorization': f'Bearer {API_TOKEN}',
      }
  
  # Define Save Path
  progress_save_path = "../data/raw" + "/progress_and_errors" + save_path_root.split("../data/raw")[1]
  last_progress_save = "No progress save yet"

  # Flight ID extraction
  rega_flight_positions = pd.read_csv(rega_flight_positions_path)
  flight_ids = list(rega_flight_positions['fr24_id'].unique())

  # Initialize the DataFrame to store data
  df_flight_track = pd.DataFrame(columns=["flight_id", "mission"]).set_index("flight_id")

  # FR_24 ID Based Request Loop
  for i, flight_id in enumerate(flight_ids) :
    # API Parameterization with FR_24 ID and Wait Time
    time.sleep(0.67) 
    params = {
      'flight_id': flight_id #	Aircraft registration numbers (comma-separated values)
    }

    # Debugging line
    clear_output(wait=True)    
    print(f"Request number {i+1} of {len(flight_ids)}, requesting for flight_id {flight_id}") 

    try:  
      response = requests.get(url, headers=headers, params=params)
      response.raise_for_status()
      data = response.json()
      print(json.dumps(data, indent=4))
      
      if isinstance(data[0], dict) and "tracks" in data[0] and isinstance(data[0]["tracks"], list) and data[0]["tracks"] :        
        # If data is found, append it to the DataFrame
        tracks_data = data[0]["tracks"]  
        df_flight_track.loc[flight_id] = [tracks_data]

      else:
        print(f"No data found for flight_id {flight_id}")

    except requests.exceptions.HTTPError as http_err:
      print(f"HTTP error occurred: {http_err}")
      time_of_error = int(time.time())
      save_path = progress_save_path + str(time_of_error) + "_http_error.csv"
      df_flight_track.to_csv(save_path, index=False)

    except Exception as err:
      print(f"An error occurred: {err}")
      time_of_error = int(time.time())
      save_path = progress_save_path + str(time_of_error) + "_error.csv"
      df_flight_track.to_csv(save_path, index=False)

    # In loop progress saving for every 100th mission date range
    if (i+1) % 400 == 0 :
      time_of_progress_save = int(time.time())
      save_path = save_path_root + str(date) + "_" + str(time_of_progress_save) + ".csv"
      df_flight_track.to_csv(save_path, index=False)
      last_progress_save = f"The last progress save was on the {i+1}th mission at {str(time_of_progress_save)}"
      

  # Final Saving
  time_of_run = int(time.time())
  save_path = save_path_root + "final_" + str(time_of_run) + ".csv"
  df_flight_track.to_csv(save_path, index=False)

  print("GREAT SUCCESS!!!!!!!!!!")
  return df_flight_track

In [30]:
rega_flight_positions_path = "../data/raw/df_flight_position_start_end_final_12324513.csv"
save_path_root = "../data/raw/df_flight_tracks_start_end_" # No need to end with .csv, leave as "_" for timestamp
# rega_flight_tracks = rega_flight_track(flightradar_key,save_path_root,rega_flight_positions_path)

Request number 10 of 10, requesting for flight_id 3877f9db
[
    {
        "fr24_id": "3877f9db",
        "tracks": [
            {
                "timestamp": "2024-12-24T12:05:50Z",
                "lat": 46.2215,
                "lon": 7.34523,
                "alt": 0,
                "gspeed": 5,
                "vspeed": 768,
                "track": 36,
                "squawk": "0",
                "callsign": "RGA18",
                "source": "ADSB"
            },
            {
                "timestamp": "2024-12-24T12:05:58Z",
                "lat": 46.22186,
                "lon": 7.34539,
                "alt": 0,
                "gspeed": 9,
                "vspeed": 704,
                "track": 12,
                "squawk": "0",
                "callsign": "RGA18",
                "source": "ADSB"
            },
            {
                "timestamp": "2024-12-24T12:06:37Z",
                "lat": 46.21939,
                "lon": 7.33183,
                "alt": 0,

In [31]:
rega_flight_tracks

Unnamed: 0_level_0,missions
flight_id,Unnamed: 1_level_1
387701e5,"[{'timestamp': '2024-12-24T02:32:06Z', 'lat': ..."
3877897e,"[{'timestamp': '2024-12-24T07:50:50Z', 'lat': ..."
387795d7,"[{'timestamp': '2024-12-24T08:20:54Z', 'lat': ..."
38779e5a,"[{'timestamp': '2024-12-24T08:47:33Z', 'lat': ..."
3877dab0,"[{'timestamp': '2024-12-24T11:00:36Z', 'lat': ..."
3877e7e1,"[{'timestamp': '2024-12-24T11:27:13Z', 'lat': ..."
3877ec82,"[{'timestamp': '2024-12-24T11:37:37Z', 'lat': ..."
3877efb5,"[{'timestamp': '2024-12-24T11:44:17Z', 'lat': ..."
3877f25b,"[{'timestamp': '2024-12-24T11:49:47Z', 'lat': ..."
3877f9db,"[{'timestamp': '2024-12-24T12:05:50Z', 'lat': ..."


In [34]:
single_track_example = pd.DataFrame(rega_flight_tracks.loc['387701e5'].values[0])
single_track_example

Unnamed: 0,timestamp,lat,lon,alt,gspeed,vspeed,track,squawk,callsign,source
0,2024-12-24T02:32:06Z,46.54710,6.61792,0,0,832,270,0,RGA04,ADSB
1,2024-12-24T02:32:14Z,46.54683,6.61765,0,11,192,210,0,RGA04,ADSB
2,2024-12-24T02:32:21Z,46.54610,6.61732,0,47,128,194,0,RGA04,ADSB
3,2024-12-24T02:32:27Z,46.54362,6.61652,0,76,512,192,0,RGA04,ADSB
4,2024-12-24T02:32:30Z,46.54253,6.61618,0,82,512,190,0,RGA04,ADSB
...,...,...,...,...,...,...,...,...,...,...
129,2024-12-24T02:43:24Z,46.54573,6.61698,1750,39,-320,13,7100,RGA04,ADSB
130,2024-12-24T02:43:27Z,46.54619,6.61719,1750,33,-192,21,7100,RGA04,ADSB
131,2024-12-24T02:43:31Z,46.54660,6.61748,1725,25,-256,30,7100,RGA04,ADSB
132,2024-12-24T02:43:34Z,46.54688,6.61775,1725,17,-192,38,7100,RGA04,ADSB
