## Load API keys

In [44]:
import os
import yaml
import pandas as pd
import requests
import json
import time
import random
from datetime import datetime, timedelta

from IPython.display import clear_output

In [45]:
def load_api_keys(yaml_path=None):
    """
    Load API keys from a YAML file.

    :param yaml_path: Path to the YAML file (optional).
    :return: A dictionary with API keys.
    """
    # Default path to the keys.yml file
    if not yaml_path:
        yaml_path = os.path.expanduser("../project_keys.yml")
    
    try:
        with open(yaml_path, 'r') as file:
            data = yaml.safe_load(file)
            return data.get('api_keys', {})
    except FileNotFoundError:
        raise FileNotFoundError(f"API keys file not found at: {yaml_path}")
    except yaml.YAMLError as e:
        raise Exception(f"Error parsing YAML file: {e}")

In [46]:
# Load API keys
api_keys = load_api_keys()

# Access individual keys
flightradar_key = api_keys.get('Flightradar24_flight-lab-01')

## Flightradar API requests

### Swedish Flight Ambulance Service PC-24

In [47]:
# Swedish flight ambulance registrations
sva_regs = ['SE-RVA','SE-RVB','SE-RVC','SE-RVD','SE-RVE','SE-RVF']

# Put all elements of the sva_regs list into a string separated by commas
sva_ac_regs_string = ','.join(sva_regs)
sva_ac_regs_string

'SE-RVA,SE-RVB,SE-RVC,SE-RVD,SE-RVE,SE-RVF'

## Flight positions

- **I have 309'493 credits remaining**
- **2'000 missions cost 80'000 for tracks and 16'000 for positions i.e. about 100k credits**
- **Budget allows for approx. 6'000 missions**


In [48]:
def sva_flight_positions(flightradar_key,sva_ac_regs_string,save_path_root,start_date,end_date,splits_per_hour) :
  # API Parameters
  API_TOKEN = flightradar_key
  url = "https://fr24api.flightradar24.com/api/historic/flight-positions/full"
  
  params = {
    'registrations': sva_ac_regs_string #	Aircraft registration numbers (comma-separated values)
  }

  headers = {
          'Accept': 'application/json',
          'Accept-Version': 'v1',
          'Authorization': f'Bearer {API_TOKEN}',
          }

  # Define Save Path
  progress_save_path = "../data/raw" + "/progress_and_errors" + save_path_root.split("../data/raw")[1]
  last_progress_save = "No progress save yet"

  # Define date ranges
  dates = pd.date_range(start=start_date, end=end_date, freq='h')
  query_no = 0

  # Initialize the DataFrame to store data
  df_flight_position = pd.DataFrame()
  break_flag = False

  # Loop through the dates and timestamps to search for REGA data in API
  for j, date in enumerate(dates): 
    timestamps = []

    # Generate the timestamps for the hour based on the splits per hour
    for split in range(splits_per_hour):
      minute = int((60/splits_per_hour) * split)
      timestamp = int(datetime(date.year, date.month, date.day, date.hour, minute).timestamp())  # Generate the timestamp for the hour
      timestamps.append(timestamp)
    
    # Loop through the timestamps
    for k, timestamp in enumerate(timestamps): 
        query_no += 1
        time.sleep(0.67) 
        params['timestamp'] = timestamp 
        
        # Credit check for track extraction
        credits_remaining = (218788 - (query_no*8))
        missions_extractable = (credits_remaining/40)
        if (j==0 and k==0) or (len(df_flight_position)==0) :
          missions_in_set = 5457
        else :
          missions_in_set = len(list(df_flight_position['fr24_id'].unique())) + 5457
        
        # Debugging line
        clear_output(wait=True)    
        print(f"Date range from {start_date} to {end_date}")
        print(f"Query {query_no} of expected {len(timestamps)*len(dates)}")
        print(last_progress_save)
        print(f"Date {j+1} of {len(dates)} and Timestamp {k+1} of {len(timestamps)}")
        print(f"Will attempt requesting on date {date} at timestamp {timestamp}") 

        try:
            if missions_in_set < missions_extractable:
              print(f"Missions in set {missions_in_set} < Missions extractable {missions_extractable}, will proceed with request!")
              print(f"Credits Remaining: {credits_remaining}")
              response = requests.get(url, headers=headers, params=params)
              response.raise_for_status()
              data = response.json()

              if isinstance(data, dict) and "data" in data and isinstance(data["data"], list) and data["data"]:
                print(json.dumps(data, indent=4))  # Debugging line
                # If data is found, append it to the DataFrame
                df_flight_position = pd.concat([df_flight_position, pd.DataFrame(data["data"])], ignore_index=True)

              else:
                print(f"No data found for date {date} at timestamp {timestamp}")

            else:
              break_flag = True
              print(f"WARNING!!! Missions in set {missions_in_set} > Missions extractable {missions_extractable}, will cancel request!")
              break
              
        except requests.exceptions.HTTPError as http_err:
              print(f"HTTP error occurred: {http_err}")
              time_of_error = int(time.time())
              save_path = progress_save_path + str(time_of_error) + "_http_error.csv"
              df_flight_position.to_csv(save_path, index=False)
  
        except Exception as err:
              print(f"An error occurred: {err}")
              time_of_error = int(time.time())
              save_path = progress_save_path + str(time_of_error) + "_error.csv"
              df_flight_position.to_csv(save_path, index=False)
    
    # In loop progress saving for every 7th day in date range
    if (query_no) % (7*24*splits_per_hour) == 0 :
      time_of_progress_save = int(time.time())
      save_path = progress_save_path + str(date) + "_" + str(time_of_progress_save) + ".csv"
      df_flight_position.to_csv(save_path, index=False)
      last_progress_save = f"The last progress save was on {str(date)} at {str(time_of_progress_save)}"
    
    if break_flag :
      break

  # Final Saving
  time_of_run = int(time.time())
  save_path = save_path_root + str(splits_per_hour) + "_splitsperhour_final_" + str(time_of_run) + ".csv"
  df_flight_position.to_csv(save_path, index=False)

  print("GREAT SUCCESS!!!!!!!!!!")
  return df_flight_position

In [49]:
save_path_root = "../data/raw/df_flight_position_20241103_20241231_" # No need to end with .csv, leave as "_" for timestamp
start_date = datetime(2024, 11, 3, 0, 0, 0)
end_date = datetime(2024, 12, 31, 23, 59, 59)
splits_per_hour = 2

# sva_flight_positions = sva_flight_positions(flightradar_key,sva_ac_regs_string,save_path_root,start_date,end_date,splits_per_hour)

Date range from 2024-11-03 00:00:00 to 2024-12-31 23:59:59
Query 39 of expected 2832
No progress save yet
Date 20 of 1416 and Timestamp 1 of 2
Will attempt requesting on date 2024-11-03 19:00:00 at timestamp 1730656800
GREAT SUCCESS!!!!!!!!!!


In [53]:
# Load the data
sva_flight_positions = pd.read_csv("../data/raw/sva_flight_positions_full.csv")

# Drop duplicates based on unique ID 'fr24_id'
sva_flight_positions.drop_duplicates(subset=['fr24_id'], inplace=True)

 # Sort by unique ID 'fr24_id'
sva_flight_positions.sort_values(by=['fr24_id'], inplace=True)

# Verify the list length
print(len(sva_flight_positions))

# Save the cleaned data
sva_flight_positions.to_csv("../data/raw/sva_flight_positions_reduced.csv", index=False)


5462


#### Flight tracks

**NOTE: does SVA use a specific SQWAK for emergencies?**

In [54]:
def sva_flight_track(flightradar_key,save_path_root,sva_flight_positions_path) :
  # API Parameters
  API_TOKEN = flightradar_key
  url = "https://fr24api.flightradar24.com/api/flight-tracks"

  headers = {
      'Accept': 'application/json',
      'Accept-Version': 'v1',
      'Authorization': f'Bearer {API_TOKEN}',
      }
  
  # Define Save Path
  progress_save_path = "../data/raw" + "/progress_and_errors" + save_path_root.split("../data/raw")[1]
  last_progress_save = "No progress save yet"

  # Flight ID extraction
  sva_flight_positions = pd.read_csv(sva_flight_positions_path)
  flight_ids = list(sva_flight_positions['fr24_id'].unique())

  # Initialize the DataFrame to store data
  df_flight_track = pd.DataFrame(columns=["flight_id", "mission"]).set_index("flight_id")

  # FR_24 ID Based Request Loop
  for i, flight_id in enumerate(flight_ids) :
    # API Parameterization with FR_24 ID and Wait Time
    time.sleep(0.67) 
    params = {
      'flight_id': flight_id #	Aircraft registration numbers (comma-separated values)
    }

    # Debugging line
    clear_output(wait=True)    
    print(f"Request number {i+1} of {len(flight_ids)}, requesting for flight_id {flight_id}") 
    print(last_progress_save)

    try:  
      response = requests.get(url, headers=headers, params=params)
      response.raise_for_status()
      data = response.json()
      print(json.dumps(data, indent=4))
      
      if isinstance(data[0], dict) and "tracks" in data[0] and isinstance(data[0]["tracks"], list) and data[0]["tracks"] :        
        # If data is found, append it to the DataFrame
        tracks_data = data[0]["tracks"]  
        df_flight_track.loc[flight_id] = [tracks_data]

      else:
        print(f"No data found for flight_id {flight_id}")

    except requests.exceptions.HTTPError as http_err:
      print(f"HTTP error occurred: {http_err}")
      time_of_error = int(time.time())
      save_path = progress_save_path + str(time_of_error) + "_http_error.csv"
      df_flight_track.to_csv(save_path, index=True)

    except Exception as err:
      print(f"An error occurred: {err}")
      time_of_error = int(time.time())
      save_path = progress_save_path + str(time_of_error) + "_error.csv"
      df_flight_track.to_csv(save_path, index=True)

    # In loop progress saving for every 400th mission date range
    if (i+1) % 200 == 0 :
      time_of_progress_save = int(time.time())
      save_path = progress_save_path + "mission_" + str(i+1) + "_of_" + str(len(flight_ids)) + "_" +str(time_of_progress_save) + ".csv"
      df_flight_track.to_csv(save_path, index=True)
      last_progress_save = f"The last progress save was on the {i+1}th mission at {str(time_of_progress_save)}"
      

  # Final Saving
  time_of_run = int(time.time())
  save_path = save_path_root + "final_" + str(time_of_run) + ".csv"
  df_flight_track.to_csv(save_path, index=True)

  print("GREAT SUCCESS!!!!!!!!!!")
  return df_flight_track

In [55]:
sva_flight_positions_path = "../data/raw/sva_flight_positions_reduced.csv"
save_path_root = "../data/raw/df_flight_tracks_20240101_20241103_" # No need to end with .csv, leave as "_" for timestamp
rega_flight_tracks = sva_flight_track(flightradar_key,save_path_root,sva_flight_positions_path)

Request number 5462 of 5462, requesting for flight_id 37d4b992
The last progress save was on the 5400th mission at 1738858465
[
    {
        "fr24_id": "37d4b992",
        "tracks": [
            {
                "timestamp": "2024-11-03T14:30:48Z",
                "lat": 63.19818,
                "lon": 14.49448,
                "alt": 0,
                "gspeed": 0,
                "vspeed": 0,
                "track": 295,
                "squawk": "0",
                "callsign": "SWE22V",
                "source": "ADSB"
            },
            {
                "timestamp": "2024-11-03T15:41:09Z",
                "lat": 63.19812,
                "lon": 14.49416,
                "alt": 0,
                "gspeed": 3,
                "vspeed": 0,
                "track": 233,
                "squawk": "0",
                "callsign": "SWE22V",
                "source": "ADSB"
            },
            {
                "timestamp": "2024-11-03T15:41:23Z",
                "lat

In [56]:
# Load the DataFrame where the index is 'flight_id'
sva_flight_tracks = pd.read_csv("../data/raw/df_flight_tracks_20240101_20241103_final_1738858556.csv", index_col="flight_id")
# Print the number of unique 'flight_id' in the DataFrame
print(f"The number of individual mission tracks is: {len(sva_flight_tracks)}")
sva_flight_tracks 

The number of individual mission tracks is: 5452


Unnamed: 0_level_0,mission
flight_id,Unnamed: 1_level_1
33734149,"[{'timestamp': '2024-01-01T00:26:42Z', 'lat': ..."
33734f15,"[{'timestamp': '2024-01-01T00:58:26Z', 'lat': ..."
33735909,"[{'timestamp': '2024-01-01T01:22:42Z', 'lat': ..."
33737cab,"[{'timestamp': '2024-01-01T02:58:41Z', 'lat': ..."
33738170,"[{'timestamp': '2024-01-01T03:11:58Z', 'lat': ..."
...,...
37d4219d,"[{'timestamp': '2024-11-03T10:45:02Z', 'lat': ..."
37d452e7,"[{'timestamp': '2024-11-03T12:09:18Z', 'lat': ..."
37d47127,"[{'timestamp': '2024-11-03T12:56:55Z', 'lat': ..."
37d48046,"[{'timestamp': '2024-11-03T13:17:45Z', 'lat': ..."


In [60]:
# Extract the json-format list of dictionaries from flight_id '2e638c5c' (flight_id is the index) 
mission_data = sva_flight_tracks.loc['33734f15']['mission']
print(mission_data)

[{'timestamp': '2024-01-01T00:58:26Z', 'lat': 63.7957, 'lon': 20.28351, 'alt': 0, 'gspeed': 1, 'vspeed': 0, 'track': 222, 'squawk': '0', 'callsign': 'SWE04W', 'source': 'ADSB'}, {'timestamp': '2024-01-01T01:01:09Z', 'lat': 63.79534, 'lon': 20.28299, 'alt': 0, 'gspeed': 3, 'vspeed': 0, 'track': 165, 'squawk': '6003', 'callsign': 'SWE04W', 'source': 'ADSB'}, {'timestamp': '2024-01-01T01:01:31Z', 'lat': 63.79507, 'lon': 20.28356, 'alt': 0, 'gspeed': 4, 'vspeed': 0, 'track': 135, 'squawk': '6003', 'callsign': 'SWE04W', 'source': 'ADSB'}, {'timestamp': '2024-01-01T01:01:51Z', 'lat': 63.7948, 'lon': 20.28411, 'alt': 0, 'gspeed': 4, 'vspeed': 0, 'track': 137, 'squawk': '6003', 'callsign': 'SWE04W', 'source': 'ADSB'}, {'timestamp': '2024-01-01T01:01:58Z', 'lat': 63.79469, 'lon': 20.28433, 'alt': 0, 'gspeed': 5, 'vspeed': 0, 'track': 140, 'squawk': '6003', 'callsign': 'SWE04W', 'source': 'ADSB'}, {'timestamp': '2024-01-01T01:02:05Z', 'lat': 63.79458, 'lon': 20.28455, 'alt': 0, 'gspeed': 5, 'vsp

In [61]:
# Convert the string to a list of dictionaries
mission_data_list = json.loads(mission_data.replace("'", '"'))

# Convert the list of dictionaries to a DataFrame
mission_data_df = pd.DataFrame(mission_data_list)
mission_data_df

Unnamed: 0,timestamp,lat,lon,alt,gspeed,vspeed,track,squawk,callsign,source
0,2024-01-01T00:58:26Z,63.79570,20.28351,0,1,0,222,0,SWE04W,ADSB
1,2024-01-01T01:01:09Z,63.79534,20.28299,0,3,0,165,6003,SWE04W,ADSB
2,2024-01-01T01:01:31Z,63.79507,20.28356,0,4,0,135,6003,SWE04W,ADSB
3,2024-01-01T01:01:51Z,63.79480,20.28411,0,4,0,137,6003,SWE04W,ADSB
4,2024-01-01T01:01:58Z,63.79469,20.28433,0,5,0,140,6003,SWE04W,ADSB
...,...,...,...,...,...,...,...,...,...,...
272,2024-01-01T01:52:37Z,63.79205,20.28662,0,9,0,87,6003,SWE04W,ADSB
273,2024-01-01T01:52:40Z,63.79209,20.28691,0,9,0,67,6003,SWE04W,ADSB
274,2024-01-01T01:53:02Z,63.79288,20.28691,0,9,0,343,6003,SWE04W,ADSB
275,2024-01-01T01:53:08Z,63.79312,20.28678,0,10,0,348,6003,SWE04W,ADSB
