In [1]:
import requests
import pandas as pd
import joblib
import numpy as np
from datetime import datetime

# ==========================
# 7Ô∏è‚É£  PR√âDICTION EN TEMPS R√âEL (API AviationStack)
# ==========================

API_KEY = "3057f8d1b6d9283ee6a287082a3388af"  
JFK_IATA = "JFK"  # Code IATA pour filtrer les vols au d√©part de JFK

# Charger le mod√®le entra√Æn√©
pipeline = joblib.load("best_flight_delay_model2.pkl")

# üõ´ **1. R√©cup√©ration des vols en temps r√©el**
def fetch_live_flights():
    print("\nüîÑ R√©cup√©ration des vols en temps r√©el...")
    all_flights = []
    limit = 100  # Nombre maximal de r√©sultats par requ√™te
    offset = 0   # D√©but de la pagination

    while True:
        url = f"http://api.aviationstack.com/v1/flights?access_key={API_KEY}&dep_iata={JFK_IATA}&flight_status=active&limit={limit}&offset={offset}"
        response = requests.get(url)

        if response.status_code != 200:
            print(f"‚ö†Ô∏è Erreur API : {response.status_code}")
            break

        data = response.json().get("data", [])

        if not data:  # Arr√™ter la boucle si plus de r√©sultats
            break

        all_flights.extend(data)
        offset += limit  # Passer √† la page suivante
        print(f"‚úÖ {len(all_flights)} vols r√©cup√©r√©s jusqu'√† pr√©sent...")

    print(f"‚úÖ {len(all_flights)} vols au total r√©cup√©r√©s avec succ√®s.")
    return all_flights

# üîÑ **2. Transformation des donn√©es en format utilisable**
def prepare_live_data(flight_data, pipeline):
    if not flight_data:
        return None
    
    df_live = pd.DataFrame(flight_data)
    
    df_live['FL_DATE'] = pd.to_datetime(df_live['flight_date'])
    df_live['YEAR'] = df_live['FL_DATE'].dt.year
    df_live['MONTH'] = df_live['FL_DATE'].dt.month
    df_live['DAY'] = df_live['FL_DATE'].dt.day
    df_live['DAY_OF_WEEK'] = df_live['FL_DATE'].dt.dayofweek
    df_live['IS_WEEKEND'] = df_live['DAY_OF_WEEK'].apply(lambda x: 1 if x in [5, 6] else 0)
    
    df_live['SEASON'] = df_live['MONTH'].map({12: 'Winter', 1: 'Winter', 2: 'Winter',
                                              3: 'Spring', 4: 'Spring', 5: 'Spring',
                                              6: 'Summer', 7: 'Summer', 8: 'Summer',
                                              9: 'Autumn', 10: 'Autumn', 11: 'Autumn'})
    
    df_live['AIRLINE_CODE'] = df_live['airline'].apply(lambda x: x.get('iata', 'UNKNOWN'))
    df_live['DEST'] = df_live['arrival'].apply(lambda x: x.get('iata', 'UNKNOWN'))
    df_live['DEP_DELAY'] = df_live['departure'].apply(lambda x: x.get('delay', 0))
    df_live['HOUR'] = pd.to_datetime(df_live['departure'].apply(lambda x: x.get('scheduled', ''))).dt.hour
    
    def convert_to_hhmm(timestamp):
        if pd.isna(timestamp) or timestamp == "":
            return np.nan
        dt = pd.to_datetime(timestamp, errors='coerce')
        return dt.hour * 100 + dt.minute if not pd.isna(dt) else np.nan
    
    df_live['CRS_DEP_TIME'] = df_live['departure'].apply(lambda x: convert_to_hhmm(x.get('scheduled', '')))
    df_live['DEP_TIME'] = df_live['departure'].apply(lambda x: convert_to_hhmm(x.get('actual', x.get('estimated', x.get('scheduled', 0)))))
    df_live['WHEELS_OFF'] = df_live['departure'].apply(lambda x: convert_to_hhmm(x.get('actual_runway', x.get('estimated_runway', x.get('estimated', x.get('scheduled', 0))))))
    df_live['WHEELS_ON'] = df_live['arrival'].apply(lambda x: convert_to_hhmm(x.get('actual_runway', x.get('estimated_runway', x.get('estimated', x.get('scheduled', 0))))))
    
    def calculate_duration(start, end):
        if pd.isna(start) or pd.isna(end) or start == "" or end == "":
            return np.nan
        return (pd.to_datetime(end) - pd.to_datetime(start)).seconds // 60
    
    df_live['CRS_ELAPSED_TIME'] = df_live.apply(lambda x: calculate_duration(x['departure'].get('scheduled', ''), x['arrival'].get('scheduled', '')), axis=1)
    df_live['ELAPSED_TIME'] = df_live.apply(lambda x: calculate_duration(x['departure'].get('actual', x['departure'].get('estimated', x['departure'].get('scheduled', ''))),
                                                                          x['arrival'].get('actual', x['arrival'].get('estimated', x['arrival'].get('scheduled', '')))), axis=1)
    
    delay_keys = ['DELAY_DUE_CARRIER', 'DELAY_DUE_WEATHER', 'DELAY_DUE_NAS', 'DELAY_DUE_SECURITY', 'DELAY_DUE_LATE_AIRCRAFT']
    for key in delay_keys:
        df_live[key] = 0
    
    nan_columns = df_live.columns[df_live.isna().any()].tolist()
    if nan_columns:
        print(f"‚ö†Ô∏è Colonnes avec NaN apr√®s transformation: {nan_columns}")
    
    df_live.fillna(0, inplace=True)
    return df_live

# üöÄ **3. Pr√©diction des retards en temps r√©el**
def predict_scheduled_flights():
    live_flights = fetch_live_flights()
    if not live_flights:
        return

    live_data = prepare_live_data(live_flights, pipeline)
    if live_data is None:
        return
    
    if live_data.isnull().sum().sum() > 0:
        print("‚ö†Ô∏è Des valeurs NaN subsistent apr√®s le traitement des donn√©es. V√©rifiez l'API.")
        return
    
    predictions = pipeline.predict(live_data)

    print("\nüìä **Pr√©dictions des Retards de Vols SCHEDULED au d√©part de JFK :**\n")
    for i, flight in enumerate(live_flights):
        print(f"‚úàÔ∏è Vol {flight['flight']['iata']} √† destination de {flight['arrival']['iata']}")
        print(f"   üïí D√©part pr√©vu : {flight['departure']['scheduled']}")
        print(f"   üîπ Pr√©diction de retard : {predictions[i]:.2f} min\n")

# Ex√©cuter la pr√©diction en temps r√©el
predict_scheduled_flights()



üîÑ R√©cup√©ration des vols en temps r√©el...
‚úÖ 100 vols SCHEDULED au d√©part de JFK r√©cup√©r√©s avec succ√®s.
‚ö†Ô∏è Colonnes avec NaN apr√®s transformation: ['aircraft', 'live', 'AIRLINE_CODE', 'WHEELS_ON', 'ELAPSED_TIME']

üìä **Pr√©dictions des Retards de Vols SCHEDULED au d√©part de JFK :**

‚úàÔ∏è Vol CA982 √† destination de PEK
   üïí D√©part pr√©vu : 2025-03-17T02:05:00+00:00
   üîπ Pr√©diction de retard : 15.63 min

‚úàÔ∏è Vol SV6869 √† destination de RDU
   üïí D√©part pr√©vu : 2025-03-17T14:55:00+00:00
   üîπ Pr√©diction de retard : 21.83 min

‚úàÔ∏è Vol VS2492 √† destination de SAV
   üïí D√©part pr√©vu : 2025-03-17T13:59:00+00:00
   üîπ Pr√©diction de retard : 16.03 min

‚úàÔ∏è Vol KE7369 √† destination de SAV
   üïí D√©part pr√©vu : 2025-03-17T13:59:00+00:00
   üîπ Pr√©diction de retard : 16.03 min

‚úàÔ∏è Vol AF2386 √† destination de SAV
   üïí D√©part pr√©vu : 2025-03-17T13:59:00+00:00
   üîπ Pr√©diction de retard : 16.03 min

‚úàÔ∏è Vol AD7617 √† destin