# **Flightradar24 API requests**

#### Import necessary libraries

In [None]:
import os
import yaml
import pandas as pd
import requests
import json
import time
import random
from datetime import datetime, timedelta

#### Create a yaml file with your API keys

To create a 'project_keys.yml' file with the necessary API keys, follow these steps:

1. Open a text editor (e.g., Notepad, VSCode).
2. Copy the following content and paste it into the editor:

api_keys:

  Flightradar24_flight-lab-01: YOUR_API_KEY

3. Replace 'YOUR_API_KEY' with your actual API key.
4. Save the file as 'project_keys.yml' in the desired directory.

Example of the YAML structure:
api_keys:
  Flightradar24_flight-lab-01: YOUR_API_KEY

#### Define function to load API keys from a YAML file

In [3]:
def load_api_keys(yaml_path=None):
    """
    Load API keys from a YAML file.

    :param yaml_path: Path to the YAML file (optional). If not provided, it defaults to "../project_keys.yml".
    :return: A dictionary containing the API keys found in the YAML file.
    """
    # If no file path is provided, default to "../project_keys.yml"
    if not yaml_path:
        yaml_path = os.path.expanduser("../project_keys.yml")
    
    try:
        # Open and read the YAML file
        with open(yaml_path, 'r') as file:
            data = yaml.safe_load(file)  # Parse the YAML file
            # Return the 'api_keys' section of the file, or an empty dictionary if not found
            return data.get('api_keys', {})

    except FileNotFoundError:
        # Handle the case where the YAML file is not found
        raise FileNotFoundError(f"API keys file not found at: {yaml_path}")

    except yaml.YAMLError as e:
        # Handle errors that occur while parsing the YAML file
        raise Exception(f"Error parsing YAML file: {e}")


#### Load route inputs which will be the inputs for the API request

Your df_routes should point to the dataframe with the routes you are interested in.

In [None]:
# Define a generic file path as an argument or variable
file_path = 'yourcsv.csv'

# Read the CSV file into a DataFrame
df_routes = pd.read_csv(file_path)

# Bring the routes in a format that is required as input for the Flightradar24 API request
routes = []
for index, row in df_routes.iterrows():
    departure = row[0]  
    arrival = row[1]   
    formatted_route = f'{departure}-{arrival}'  
    routes.append(formatted_route)

#### Request flight positions for a specific time period and specific routes

This code extracts flight position data from the Flightradar24 API for a given set of routes and dates, using an API token for authentication. It generates timestamps for each hour within the date range, sends requests to the API, and processes the response by storing the flight data in a DataFrame. The data is saved periodically to handle errors and is finally cleaned by removing unnecessary columns before saving the full dataset to a CSV file.

In [None]:
import pandas as pd
from datetime import datetime
import random
import time
import requests
import json


def fetch_flight_data(start_date_str, end_date_str, routes):
    """
    Fetch flight data from the Flightradar24 API for a specified date range and routes.

    :param start_date_str: The start date as a string in 'YYYY-MM-DD' format.
    :param end_date_str: The end date as a string in 'YYYY-MM-DD' format.
    :param routes: List of routes to query for.
    :return: DataFrame containing the flight position data.
    """
    
    # Load API keys
    api_keys = load_api_keys()

    # Get the specific key for Flightradar24
    flightradar_key = api_keys.get('Flightradar24_flight-lab-01')

    # Set the API token from the retrieved key
    API_TOKEN = flightradar_key

    # Define the base URL for the Flightradar24 API
    url = "https://fr24api.flightradar24.com/api/historic/flight-positions/full"

    # Set parameters for the API request
    params = {
        'categories': 'P',  # Filter for only passenger flights
        'limit': 1  # Limit to one result per request
    }

    # Define the headers for the API request, including authorization
    headers = {
        'Accept': 'application/json',
        'Accept-Version': 'v1',
        'Authorization': f'Bearer {API_TOKEN}',
    }

    # Convert the string inputs into datetime objects
    start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
    end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
    
    # Generate a list of all dates within the range
    dates = pd.date_range(start=start_date, end=end_date, freq='D')

    # Initialize a variable for tracking the number of queries made
    query_no = 0

    # Create an empty dataframe to store flight position data
    df_flight_position = pd.DataFrame()

    # Loop through each route in the 'routes' list
    for i, route in enumerate(routes): 
        # Set the route in the parameters for each API request
        params['routes'] = route

        # Loop through each date in the generated date range
        for j, date in enumerate(dates): 
            timestamps = []  # Initialize an empty list to store timestamps for the current day
            query_no += 1  # Increment the query number

            # Loop through each hour of the day to generate timestamps
            for hour in range(24):
                timestamp = int(datetime(date.year, date.month, date.day, hour).timestamp())  # Generate timestamp for the specific hour
                timestamps.append(timestamp)

            # Randomize the order of timestamps to avoid overloading the API
            random.shuffle(timestamps)

            # Flag to indicate if data was successfully retrieved
            data_found = False

            # Loop through each randomized timestamp to make the API request
            for timestamp in timestamps: 
                time.sleep(0.67)  # Add a short delay between requests to avoid hitting the API too quickly
                params['timestamp'] = timestamp  # Set the timestamp in the request parameters

                # Clear the output of the previous print statements in the notebook
                clear_output(wait=True)

                # Print progress information to keep track of the script's status
                print(f"Date range from {start_date_str} to {end_date_str}")
                print(f"Query {query_no} of expected {len(routes)*len(dates)}")
                print(f"Route {i+1} of {len(routes)} and Date {j+1} of {len(dates)}")
                print(f"Requesting for route {route}, on date {date}, at timestamp {timestamp}")

                try:
                    # Send the GET request to the Flightradar24 API
                    response = requests.get(url, headers=headers, params=params)
                    response.raise_for_status()  # Check for HTTP errors

                    # Parse the response JSON data
                    data = response.json()

                    # Check if the response contains the expected flight data
                    if isinstance(data, dict) and "data" in data and isinstance(data["data"], list) and data["data"]:
                        print(json.dumps(data, indent=4))  # Optional debugging line to view raw data
                        # Append the flight data to the dataframe
                        df_flight_position = pd.concat([df_flight_position, pd.DataFrame(data["data"])], ignore_index=True)
                        data_found = True  # Set flag to stop further looping
                        break  # Exit the loop if data was found

                    else:
                        print(f"No data found for route {route} at timestamp {timestamp}")
                  
                except requests.exceptions.HTTPError as http_err:
                    # Handle HTTP errors (e.g., 404, 500)
                    print(f"HTTP error occurred: {http_err}")
                    time_of_error = int(time.time())  # Get the current timestamp
                    save_path = "../df_flight_position_" + str(time_of_error) + "_http_error_v2.csv"
                    df_flight_position.to_csv(save_path, index=False)  # Save progress in case of error

                except Exception as err:
                    # Handle other exceptions (e.g., network issues)
                    print(f"An error occurred: {err}")
                    time_of_error = int(time.time())  # Get the current timestamp
                    save_path = "../df_flight_position_" + str(time_of_error) + "_error_v2.csv"
                    df_flight_position.to_csv(save_path, index=False)  # Save progress in case of error

            # Save progress after processing each route and date
            time_of_progress_save = int(time.time())
            save_path = "../df_flight_position_" + str(route) + "_" + str(time_of_progress_save) + ".csv"
            df_flight_position.to_csv(save_path, index=False)

    # Final save of the flight position data after processing all routes and dates
    time_of_run = int(time.time())  # Get the current timestamp
    save_path = "../df_flight_position_final_" + str(time_of_run) + ".csv"

    # Drop unnecessary columns from the final dataset
    df_flight_position = df_flight_position.drop(columns=["lat", "lon", "track", "alt", "gspeed", "squawk", "vspeed"])

    # Save the final dataset to a CSV file
    df_flight_position.to_csv(save_path, index=False)

    # Print a success message
    print("GREAT SUCCESS!!!!!!!!!!")

# Example usage: User provides date range and routes
fetch_flight_data('2024-01-01', '2024-12-31', routes)
