# Fuentes

Los datos utilizados en este estudio fueron obtenidos de una API de datos de vuelos llamada Amadeus. Estos datos incluyen vuelos desde todos los países hacia todos los países, con fechas que van desde junio de 2024 hasta junio de 2025. Debido a las limitaciones de la API, solo se pudo extraer un máximo de 2000 solicitudes a la vez, lo que prolongó el proceso de extracción. En total, se recopilaron más de 24,000 registros de vuelos durante este periodo de tiempo.

In [28]:
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [29]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import random


Funcion para acceder al API de amadeus que  da informacion de los vuelos

In [52]:
client_id = 'JhoB1Ptw9TZAMtXa7KevcLvcFAMaVbtd'
client_secret = '38dmfIdj6bIi33DW'

# Function to obtain OAuth token
def get_access_token(client_id, client_secret):
    url = "https://test.api.amadeus.com/v1/security/oauth2/token"
    headers = {
        'Content-Type': 'application/x-www-form-urlencoded'
    }
    data = {
        'grant_type': 'client_credentials',
        'client_id': client_id,
        'client_secret': client_secret
    }
    try:
        response = requests.post(url, headers=headers, data=data,verify=False)
        response.raise_for_status()
        return response.json()['access_token']
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except requests.exceptions.RequestException as req_err:
        print(f"Request exception occurred: {req_err}")
    return None


Funcion para pedir la informacion de los vuelos

In [31]:

# Function to fetch flight offers
def fetch_flight_offers(origin, destination, departure_date, adults=1):
    access_token = get_access_token(client_id, client_secret)
    
    if not access_token:
        print("Failed to obtain access token. Unable to fetch flight offers.")
        return None
    
    url = "https://test.api.amadeus.com/v2/shopping/flight-offers"
    headers = {
        'Authorization': f'Bearer {access_token}',
        'Content-Type': 'application/json'
    }
    params = {
        'originLocationCode': origin,
        'destinationLocationCode': destination,
        'departureDate': departure_date,
        'adults': adults,
        'currencyCode': 'USD'
    }
    
    try:
        response = requests.get(url, headers=headers, params=params,verify=False)
        response.raise_for_status()
        data = response.json()
        
        if 'data' in data and len(data['data']) > 0:
            print("Flight offer retrieved.")
            return data['data']  
        else:
            print("No flight offers data found.")
            return None
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except requests.exceptions.RequestException as req_err:
        print(f"Request exception occurred: {req_err}")
    
    return None

Funcion para calcular el numero de stops.

In [32]:

def calculate_num_stops(flight):
    
    num_segments = len(flight['itineraries'][0]['segments'])
    
    num_stops = num_segments - 1
    return num_stops


Funcion para convertir la duracion a numero de horas

In [33]:
def parse_duration(duration):
    hours_str = ''
    minutes_str = ''
    found_hours = False
    
    for char in duration:
        if char.isdigit():
            if 'H' in duration:
                if not found_hours:
                    hours_str += char
                else:
                    minutes_str += char
            elif 'M' in duration:
                minutes_str += char
        elif char == 'H':
            found_hours = True
            hours_str += char
        elif char == 'M':
            minutes_str += char
    
    hours = int(hours_str[:-1]) if hours_str else 0
    minutes = int(minutes_str[:-1]) if minutes_str else 0
   

    return float(hours) + float(minutes) / 60.0 if hours_str and minutes_str else None

Diccionario de paises con sus aeropuertos mas recurrentes.


In [34]:
airports_dict = {
    "Afghanistan": "KBL",
    "Albania": "TIA",
    "Algeria": "ALG",
    "Andorra": "LEU",
    "Angola": "LAD",
    "Antigua and Barbuda": "ANU",
    "Argentina": "EZE",
    "Armenia": "EVN",
    "Australia": "SYD",
    "Austria": "VIE",
    "Azerbaijan": "GYD",
    "Bahamas": "NAS",
    "Bahrain": "BAH",
    "Bangladesh": "DAC",
    "Barbados": "BGI",
    "Belarus": "MSQ",
    "Belgium": "BRU",
    "Belize": "BZE",
    "Benin": "COO",
    "Bhutan": "PBH",
    "Bolivia": "VVI",
    "Bosnia and Herzegovina": "SJJ",
    "Botswana": "GBE",
    "Brazil": "GRU",
    "Brunei Darussalam": "BWN",
    "Bulgaria": "SOF",
    "Burkina Faso": "OUA",
    "Burundi": "BJM",
    "Cabo Verde": "SID",
    "Cambodia": "PNH",
    "Cameroon": "DLA",
    "Canada": "YYZ",
    "Central African Republic": "BGF",
    "Chad": "NDJ",
    "Chile": "SCL",
    "China": "PEK",
    "Colombia": "BOG",
    "Comoros": "HAH",
    "Congo": "FIH",
    "Costa Rica": "SJO",
    "Ivory Coast": "ABJ",
    "Croatia": "ZAG",
    "Cuba": "HAV",
    "Cyprus": "LCA",
    "Czech Republic": "PRG",
    "South Korea": "FNJ",
    "Congo": "FIH",
    "Denmark": "CPH",
    "Djibouti": "JIB",
    "Dominica": "DOM",
    "Dominican Republic": "SDQ",
    "Ecuador": "UIO",
    "Egypt": "CAI",
    "El Salvador": "SAL",
    "Equatorial Guinea": "SSG",
    "Eritrea": "ASM",
    "Estonia": "TLL",
    "Eswatini": "MTS",
    "Ethiopia": "ADD",
    "Fiji": "NAN",
    "Finland": "HEL",
    "France": "CDG",
    "Gabon": "LBV",
    "Gambia": "BJL",
    "Georgia": "TBS",
    "Germany": "FRA",
    "Ghana": "ACC",
    "Greece": "ATH",
    "Grenada": "GND",
    "Guatemala": "GUA",
    "Guinea": "CKY",
    "Guinea Bissau": "OXB",
    "Guyana": "GEO",
    "Haiti": "PAP",
    "Honduras": "SAP",
    "Hungary": "BUD",
    "Iceland": "KEF",
    "India": "DEL",
    "Indonesia": "CGK",
    "Iran": "THR",
    "Iraq": "BGW",
    "Ireland": "DUB",
    "Israel": "TLV",
    "Italy": "FCO",
    "Jamaica": "SIA",
    "Japan": "NRT",
    "Jordan": "AMM",
    "Kazakhstan": "ALA",
    "Kenya": "NBO",
    "Kiribati": "TRW",
    "Kuwait": "KWI",
    "Kyrgyzstan": "FRU",
    "Lao": "VTE",
    "Latvia": "RIX",
    "Lebanon": "BEY",
    "Lesotho": "MSU",
    "Liberia": "ROB",
    "Libya": "TIP",
    "Liechtenstein": "LI",
    "Lithuania": "VNO",
    "Luxembourg": "LUX",
    "Madagascar": "TNR",
    "Malawi": "LLW",
    "Malaysia": "KUL",
    "Maldives": "MLE",
    "Mali": "BKO",
    "Malta": "MLA",
    "Marshall Islands": "MAJ",
    "Mauritania": "NKC",
    "Mauritius": "MRU",
    "Mexico": "MEX",
    "Micronesia": "KSA",
    "Monaco": "MUC",
    "Mongolia": "ULN",
    "Montenegro": "TIV",
    "Morocco": "CMN",
    "Mozambique": "LUM",
    "Myanmar": "RGN",
    "Namibia": "WDH",
    "Nauru": "INU",
    "Nepal": "LUA",
    "Netherlands": "AMS",
    "New Zealand": "AKL",
    "Nicaragua": "MGA",
    "Niger": "NIM",
    "Nigeria": "LOS",
    "North Macedonia": "SKP",
    "Norway": "OSL",
    "Oman": "MCT",
    "Pakistan": "KHI",
    "Palau": "ROR",
    "Panama": "PTY",
    "Papua New Guinea": "POM",
    "Paraguay": "ASU",
    "Peru": "LIM",
    "Philippines": "MNL",
    "Poland": "WAW",
    "Portugal": "LIS",
    "Qatar": "DOH",
    "Republic of Korea": "IIA",
    "Republic of Moldova": "KIV",
    "Romania": "OTP",
    "Russian Federation": "SVO",
    "Rwanda": "KGL",
    "Saint Kitts and Nevis": "SKB",
    "Saint Lucia": "UVF",
    "Saint Vincent and the Grenadines": "SVD",
    "Samoa": "APW",
    "San Marino": "RMI",
    "Sao Tome and Principe": "TMS",
    "Saudi Arabia": "DMM",
    "Senegal": "DSS",
    "Serbia": "BEG",
    "Seychelles": "SEZ",
    "Sierra Leone": "FNA",
    "Singapore": "SIN",
    "Slovakia": "BTS",
    "Slovenia": "LJU",
    "Solomon Islands": "HIR",
    "Somalia": "MGQ",
    "South Africa": "JNB",
    "South Sudan": "JUB",
    "Spain": "MAD",
    "Sri Lanka": "CMB",
    "Sudan": "KRT",
    "Suriname": "PBM",
    "Sweden": "ARN",
    "Switzerland": "ZRH",
    "Syrian Arab Republic": "DAM",
    "Tajikistan": "DYU",
    "Thailand": "BKK",
    "Timor-Leste": "DIL",
    "Togo": "LFW",
    "Tonga": "TBU",
    "Trinidad and Tobago": "POS",
    "Tunisia": "TUN",
    "Turkey": "IST",
    "Turkmenistan": "ASB",
    "Tuvalu": "FUN",
    "Uganda": "EBB",
    "Ukraine": "KBP",
    "United Arab Emirates": "DXB",
    "United Kingdom": "LHR",
    "United Republic of Tanzania": "DAR",
    "United States of America": "DEN",
    "Uruguay": "MVD",
    "Uzbekistan": "TAS",
    "Vanuatu": "VLI",
    "Venezuela": "CCS",
    "Viet Nam": "SGN",
    "Yemen": "SAH",
    "Zambia": "LUN",
    "Zimbabwe": "HRE"
}


Se consigue product de itertools para conseguir las combinaciones de los paises

In [36]:

import time
import random
import pandas as pd
from itertools import product
from datetime import datetime, timedelta


funcion para tener dias random

In [37]:
def get_random_date(start, end):
    delta = end - start
    random_days = random.randrange(delta.days)
    return start + timedelta(days=random_days)


Para obtener la información de los vuelos deseada, se implementó una función que hace uso de una API de datos de vuelos llamada Amadeus. La función realiza seis llamadas para obtener datos como el número de paradas, la fecha , la hora,  la duración del vuelo, la aerolínea, el país de salida, el país de destino y el precio. Debido a las restricciones de la API, solo se pueden hacer hasta 2000 solicitudes a la vez, por lo que se dividió la obtención de datos en varias llamadas. Después de cada serie de llamadas, se incluyó un periodo de espera (sleep) para evitar sobrecargar la API y cumplir con las políticas de uso establecidas por el proveedor de servicios.

In [53]:
import time
import random
import pandas as pd
from itertools import product
from datetime import datetime, timedelta
import requests

# Número de llamadas API por lote
num_api_calls = 6
# Duración del sleep después de cada lote de llamadas API
sleep_duration = 1  # en segundos
# Número total de llamadas API permitidas
total_api_calls = 2000

# Lista para almacenar los datos de los vuelos
data = []

# Contador de llamadas API
api_call_count = 0

# Contador total de llamadas API restantes
remaining_api_calls = total_api_calls

# Índice de la última combinación procesada
last_processed_index = 26131

# Define el rango de fechas
start_date = datetime.now() + timedelta(days=3)  # 3 días desde hoy
end_date = datetime(2025, 6, 27)

# Función para generar una fecha aleatoria dentro del rango especificado
def get_random_date(start, end):
    delta = end - start
    random_days = random.randrange(delta.days)
    return start + timedelta(days=random_days)

# Vuelve a generar combinaciones para una nueva iteración
combinations = list(product(airports_dict.items(), airports_dict.items()))

try:
    while remaining_api_calls > 0:
        # Itera sobre las combinaciones empezando desde el último índice procesado
        for index in range(last_processed_index, len(combinations)):
            (origin_country, origin_code), (destination_country, destination_code) = combinations[index]
            if origin_country != destination_country:
                try:
                    # Genera una fecha aleatoria dentro del rango especificado
                    date = get_random_date(start_date, end_date).strftime('%Y-%m-%d')
                    flight_offers = fetch_flight_offers(origin_code, destination_code, date)
                    remaining_api_calls -= 1  # Decremente el contador de llamadas API restantes

                    if flight_offers:
                        random_flight = random.choice(flight_offers)  # Selecciona una oferta de vuelo al azar
                        
                        # Extrae detalles necesarios incluyendo el nombre de la aerolínea
                        num_stops = calculate_num_stops(random_flight)
                        airline_code = random_flight['itineraries'][0]['segments'][0]['carrierCode']
                        departure_time = random_flight['itineraries'][0]['segments'][0]['departure']['at']
                        duration = random_flight['itineraries'][0]['duration']
                        price = random_flight['price']['grandTotal']
                        
                        # Convierte la duración a horas y minutos en formato float
                        duration_hours_minutes = parse_duration(duration)
                        
                        # Extrae códigos IATA de origen y destino
                        origin_iata = random_flight['itineraries'][0]['segments'][0]['departure']['iataCode']
                        destination_iata = random_flight['itineraries'][0]['segments'][-1]['arrival']['iataCode']
                        
                        # Obtiene los nombres de los países usando el diccionario airports_dict
                        origin_country_name = origin_country
                        destination_country_name = destination_country
                        
                        # Agrega los datos a la lista
                        data.append({
                            'Hora de Salida': departure_time,
                            'Duración': duration_hours_minutes,
                            'Precio': price,
                            'Código de Aerolínea': airline_code,
                            'Número de Paradas': num_stops,
                            'País de Origen': origin_country_name,
                            'País de Destino': destination_country_name
                        })
                        
                        # Incrementa el contador de llamadas API
                        api_call_count += 1
                        
                        # Verifica si se alcanzó el límite de llamadas API por lote
                        if api_call_count >= num_api_calls:
                            last_processed_index = index + 1  # Actualiza el último índice procesado
                            time.sleep(sleep_duration)
                            api_call_count = 0  # Reinicia el contador de llamadas API
                            break
                except requests.exceptions.HTTPError as e:
                    if e.response.status_code == 429:
                        print(f"Error 429 encontrado en el índice de combinación: {index}")
                        break  # Sale del bucle si se encuentra un error 429
                    else:
                        print(f"Se produjo un error HTTP: {e}")
                except Exception as e:
                    print(f"Error encontrado: {e}")
            
            last_processed_index = index + 1  # Actualiza el último índice procesado

        # Sale del bucle exterior si se encuentra un error 429 o si remaining_api_calls llega a 0
        if 'e' in locals() and isinstance(e, requests.exceptions.HTTPError) and e.response.status_code == 429:
            break
        if remaining_api_calls <= 0:
            break
except Exception as e:
    print(f"Se encontró un error: {e}")

# Crea un DataFrame después de completar las llamadas API o al encontrar un error
flights_df = pd.DataFrame(data)
print(flights_df)

# Guarda el último índice procesado en un archivo
with open('last_processed_index.txt', 'w') as f:
    f.write(str(last_processed_index))

# Guarda las llamadas API restantes en un archivo
with open('remaining_api_calls.txt', 'w') as f:
    f.write(str(remaining_api_calls)))


No flight offers data found.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
No flight offers data found.
Flight offer retrieved.
Flight offer retrieved.
No flight offers data found.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
No flight offers data found.
No flight offers data found.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
No flight offers data found.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight offer retrieved.
Flight off

Los resultados pasan de un dataframe a un csv

In [54]:
flightsdf14=flights_df

flightsdf14.to_csv('flightsdf14.csv', index=False)


