In [None]:
import numpy as np
import pandas as pd
import joblib
import warnings
from datetime import datetime
import scrapbook as sb
warnings.filterwarnings('ignore')

In [None]:
feature_columns = []
model = None

In [None]:
json_ticket = None

In [9]:
def categorize_period(hour):
    """Categoriza per√≠odo del d√≠a"""
    if 6 <= hour < 12:
        return 'morning'
    elif 12 <= hour < 18:
        return 'afternoon'
    elif 18 <= hour < 22:
        return 'evening'
    else:
        return 'night'

def get_season(month):
    """Obtiene la estaci√≥n del a√±o"""
    if month in [12, 1, 2]:
        return 'winter'
    elif month in [3, 4, 5]:
        return 'spring'
    elif month in [6, 7, 8]:
        return 'summer'
    else:
        return 'fall'

def categorize_distance(distance):
    """Categoriza vuelos por distancia"""
    if distance < 500:
        return 'short'
    elif distance < 1500:
        return 'medium'
    else:
        return 'long'

In [10]:
def transform_flight_data(flight_json, feature_columns):
    """
    Transforma un JSON de vuelo en features preparadas para el modelo.
    Esta funci√≥n replica EXACTAMENTE la transformaci√≥n del entrenamiento.

    Args:
        flight_json: dict con datos del vuelo
        feature_columns: lista de columnas esperadas por el modelo

    Returns:
        DataFrame con una fila lista para predicci√≥n
    """
    # Convertir JSON a DataFrame
    df = pd.DataFrame([flight_json])

    # Normalizar nombres de columnas
    df.columns = df.columns.str.upper()

    # ========== CREAR FEATURES DERIVADAS ==========

    # 1. DAY_OF_WEEK desde fecha
    if all(col in df.columns for col in ['YEAR', 'MONTH', 'DAY']):
        df['DAY_OF_WEEK'] = pd.to_datetime(df[['YEAR', 'MONTH', 'DAY']]).dt.dayofweek

    # 2. HOUR desde SCHEDULED_DEPARTURE
    if 'SCHEDULED_DEPARTURE' in df.columns:
        df['HOUR'] = df['SCHEDULED_DEPARTURE'] // 100

        # PERIOD_OF_DAY
        df['PERIOD_OF_DAY'] = df['HOUR'].apply(categorize_period)

        # IS_NIGHT_FLIGHT (22:00 - 06:00)
        df['IS_NIGHT_FLIGHT'] = df['HOUR'].apply(
            lambda h: 1 if (h >= 22 or h < 6) else 0
        )

        # IS_RUSH_HOUR (6-9 AM, 5-8 PM)
        df['IS_RUSH_HOUR'] = df['HOUR'].apply(
            lambda h: 1 if h in [6, 7, 8, 9, 17, 18, 19, 20] else 0
        )

    # 3. IS_WEEKEND
    if 'DAY_OF_WEEK' in df.columns:
        df['IS_WEEKEND'] = (df['DAY_OF_WEEK'] >= 5).astype(int)

    # 4. SEASON
    if 'MONTH' in df.columns:
        df['SEASON'] = df['MONTH'].apply(get_season)

    # 5. DISTANCE_CATEGORY
    if 'DISTANCE' in df.columns:
        df['DISTANCE_CATEGORY'] = df['DISTANCE'].apply(categorize_distance)

    # ========== ONE-HOT ENCODING ==========

    categorical_cols = [
        'AIRLINE', 'ORIGIN_AIRPORT', 'DESTINATION_AIRPORT',
        'PERIOD_OF_DAY', 'SEASON', 'DISTANCE_CATEGORY'
    ]

    categorical_cols = [col for col in categorical_cols if col in df.columns]

    # Aplicar One-Hot Encoding
    df_encoded = pd.get_dummies(
        df,
        columns=categorical_cols,
        prefix=categorical_cols,
        drop_first=False
    )

    # ========== ALINEAR CON FEATURES DEL ENTRENAMIENTO ==========

    # Agregar columnas faltantes con valor 0
    for col in feature_columns:
        if col not in df_encoded.columns:
            df_encoded[col] = 0

    # Mantener solo las columnas del entrenamiento en el mismo orden
    df_encoded = df_encoded[feature_columns]

    return df_encoded

In [None]:
try:
    # Cargar modelo
    model = joblib.load('../../data-science/models/best_model.pkl')
    model = model
    print("‚úì Modelo cargado: models/best_model.pkl")

    # Cargar metadata
    metadata = joblib.load('../../data-science/metadata/prediction_metadata.pkl')
    feature_columns = metadata['feature_columns']
    best_model_name = metadata['best_model_name']
    delay_threshold = metadata['delay_threshold']

    print(f"\nüìä Informaci√≥n del modelo:")
    print(f"  ‚Üí Tipo: {best_model_name}")
    print(f"  ‚Üí Features esperadas: {len(feature_columns)}")
    print(f"  ‚Üí Threshold de retraso: {delay_threshold} minutos")

    # M√©tricas
    metrics = metadata['metrics']
    if best_model_name == 'DecisionTree':
        model_metrics = metrics['decision_tree']
    else:
        model_metrics = metrics['random_forest']

    print(f"\nüéØ M√©tricas del modelo:")
    print(f"  ‚Üí Accuracy: {model_metrics['accuracy']:.4f}")
    print(f"  ‚Üí ROC-AUC: {model_metrics['roc_auc']:.4f}")

except FileNotFoundError as e:
    print(f"\n‚ùå Error: No se encontraron los archivos del modelo")
    print(f"   {e}")
    print("\nüí° Soluci√≥n: Primero ejecuta '1_train_model.py'")
    exit(1)

‚úì Modelo cargado: models/best_model.pkl

üìä Informaci√≥n del modelo:
  ‚Üí Tipo: DecisionTree
  ‚Üí Features esperadas: 663
  ‚Üí Threshold de retraso: 15 minutos

üéØ M√©tricas del modelo:
  ‚Üí Accuracy: 0.6537
  ‚Üí ROC-AUC: 0.7119


In [None]:
# ============================================================================
# 2. FUNCI√ìN DE PREDICCI√ìN
# ============================================================================

def predict_flight_delay(flight_json,show_details=True):
    """
    Predice si un vuelo se retrasar√°

    Args:
        flight_json: dict con datos del vuelo
        show_details: si mostrar detalles

    Returns:
        dict con predicci√≥n y probabilidades
    """
    # Transformar datos
    flight_prepared = transform_flight_data(flight_json, feature_columns)

    # Predecir
    prediction = model.predict(flight_prepared)[0]
    probabilities = model.predict_proba(flight_prepared)[0]

    # Resultado
    result = {
        'prevision': 'Retrasado' if prediction == 1 else 'Puntual',
        'probabilidad': round(float(probabilities[1]), 2),
        'probabilidad_puntual': round(float(probabilities[0]), 2),
        'probabilidad_retrasado': round(float(probabilities[1]), 2)
    }

    if show_details:
        print("\n" + "=" * 70)
        print("üìã DATOS DEL VUELO")
        print("=" * 70)
        for key, value in flight_json.items():
            print(f"  {key:25s}: {value}")

        print("\n" + "=" * 70)
        print("üîÆ PREDICCI√ìN")
        print("=" * 70)
        status_emoji = "üî¥" if prediction == 1 else "üü¢"
        print(f"  {status_emoji} Estado Previsto: {result['prevision']}")
        print(f"  üìä Probabilidad Puntual:   {result['probabilidad_puntual']:.2%}")
        print(f"  üìä Probabilidad Retrasado: {result['probabilidad_retrasado']:.2%}")
        print("=" * 70)

    return result

In [None]:
#============================================================================
# 3. EJEMPLOS DE PREDICCI√ìN
# ============================================================================

print("\n" + "=" * 80)
print("PASO 2: EJEMPLOS DE PREDICCI√ìN")
print("=" * 80)

# ---------------------- EJEMPLO 1 ----------------------
print("\nüîπ EJEMPLO 1: Vuelo matutino de corta distancia")
print("-" * 80)

flight_1 = {
    "airline": "AA",
    "origin_airport": "LAX",
    "destination_airport": "SFO",
    "scheduled_departure": 800,
    "distance": 337,
    "year": 2025,
    "month": 11,
    "day": 15
}

result_1 = predict_flight_delay(flight_1)


PASO 2: EJEMPLOS DE PREDICCI√ìN

üîπ EJEMPLO 1: Vuelo matutino de corta distancia
--------------------------------------------------------------------------------

üìã DATOS DEL VUELO
  airline                  : AA
  origin_airport           : LAX
  destination_airport      : SFO
  scheduled_departure      : 800
  distance                 : 337
  year                     : 2025
  month                    : 11
  day                      : 15

üîÆ PREDICCI√ìN
  üü¢ Estado Previsto: Puntual
  üìä Probabilidad Puntual:   60.47%
  üìä Probabilidad Retrasado: 39.53%


In [None]:
# ---------------------- EJEMPLO 2 ----------------------
print("\nüîπ EJEMPLO 2: Vuelo de tarde en hora pico, larga distancia")
print("-" * 80)

flight_2 = {
    "airline": "DL",
    "origin_airport": "JFK",
    "destination_airport": "LAX",
    "scheduled_departure": 1800,
    "distance": 2475,
    "year": 2025,
    "month": 12,
    "day": 20
}

result_2 = predict_flight_delay(flight_2)


üîπ EJEMPLO 2: Vuelo de tarde en hora pico, larga distancia
--------------------------------------------------------------------------------

üìã DATOS DEL VUELO
  airline                  : DL
  origin_airport           : JFK
  destination_airport      : LAX
  scheduled_departure      : 1800
  distance                 : 2475
  year                     : 2025
  month                    : 12
  day                      : 20

üîÆ PREDICCI√ìN
  üî¥ Estado Previsto: Retrasado
  üìä Probabilidad Puntual:   35.30%
  üìä Probabilidad Retrasado: 64.70%


In [None]:
# ---------------------- EJEMPLO 3 ----------------------
print("\nüîπ EJEMPLO 3: Vuelo nocturno de fin de semana")
print("-" * 80)

flight_3 = {
    "airline": "UA",
    "origin_airport": "ORD",
    "destination_airport": "MIA",
    "scheduled_departure": 2230,
    "distance": 1197,
    "year": 2025,
    "month": 11,
    "day": 16
}

result_3 = predict_flight_delay(flight_3)


üîπ EJEMPLO 3: Vuelo nocturno de fin de semana
--------------------------------------------------------------------------------

üìã DATOS DEL VUELO
  airline                  : UA
  origin_airport           : ORD
  destination_airport      : MIA
  scheduled_departure      : 2230
  distance                 : 1197
  year                     : 2025
  month                    : 11
  day                      : 16

üîÆ PREDICCI√ìN
  üî¥ Estado Previsto: Retrasado
  üìä Probabilidad Puntual:   28.85%
  üìä Probabilidad Retrasado: 71.15%


In [None]:
# ============================================================================
# 4. PREDICCI√ìN EN BATCH
# ============================================================================

print("\n" + "=" * 80)
print("PASO 3: PREDICCI√ìN EN BATCH (M√∫ltiples vuelos)")
print("=" * 80)

flights_batch = [
    {
        "airline": "AA",
        "origin_airport": "DFW",
        "destination_airport": "LAX",
        "scheduled_departure": 900,
        "distance": 1235,
        "year": 2025,
        "month": 11,
        "day": 10
    },
    {
        "airline": "DL",
        "origin_airport": "ATL",
        "destination_airport": "SEA",
        "scheduled_departure": 1430,
        "distance": 2182,
        "year": 2025,
        "month": 11,
        "day": 10
    },
    {
        "airline": "UA",
        "origin_airport": "SFO",
        "destination_airport": "BOS",
        "scheduled_departure": 630,
        "distance": 2704,
        "year": 2025,
        "month": 11,
        "day": 10
    },
    {
        "airline": "AA",
        "origin_airport": "MIA",
        "destination_airport": "JFK",
        "scheduled_departure": 1930,
        "distance": 1089,
        "year": 2025,
        "month": 11,
        "day": 10
    },
    {
        "airline": "DL",
        "origin_airport": "LAX",
        "destination_airport": "ORD",
        "scheduled_departure": 2200,
        "distance": 1745,
        "year": 2025,
        "month": 11,
        "day": 10
    }
]

print(f"\nProcesando {len(flights_batch)} vuelos...\n")

results_batch = []
for i, flight in enumerate(flights_batch, 1):
    result = predict_flight_delay(flight, show_details=False)
    results_batch.append({
        'Vuelo': f"{flight['airline']} {flight['origin_airport']}-{flight['destination_airport']}",
        'Hora': f"{flight['scheduled_departure']:04d}",
        'Distancia': flight['distance'],
        'Predicci√≥n': result['prevision'],
        'Prob. Retraso': f"{result['probabilidad']:.2%}"
    })

df_results = pd.DataFrame(results_batch)
print(df_results.to_string(index=False))


PASO 3: PREDICCI√ìN EN BATCH (M√∫ltiples vuelos)

Procesando 5 vuelos...

     Vuelo Hora  Distancia Predicci√≥n Prob. Retraso
AA DFW-LAX 0900       1235    Puntual        32.93%
DL ATL-SEA 1430       2182  Retrasado        57.16%
UA SFO-BOS 0630       2704    Puntual        33.80%
AA MIA-JFK 1930       1089    Puntual        42.23%
DL LAX-ORD 2200       1745  Retrasado        64.25%


In [17]:
# ============================================================================
# 5. ESTAD√çSTICAS DEL BATCH
# ============================================================================

print("\n" + "=" * 80)
print("PASO 4: AN√ÅLISIS DEL BATCH")
print("=" * 80)

delayed_count = sum(1 for r in results_batch if r['Predicci√≥n'] == 'Retrasado')
ontime_count = len(results_batch) - delayed_count

print(f"\nüìä Distribuci√≥n de predicciones:")
print(f"  ‚Üí Puntuales:  {ontime_count} ({ontime_count/len(results_batch)*100:.1f}%)")
print(f"  ‚Üí Retrasados: {delayed_count} ({delayed_count/len(results_batch)*100:.1f}%)")

avg_delay_prob = np.mean([
    float(r['Prob. Retraso'].strip('%'))/100
    for r in results_batch
])
print(f"\nüìà Probabilidad promedio de retraso: {avg_delay_prob:.2%}")



PASO 4: AN√ÅLISIS DEL BATCH

üìä Distribuci√≥n de predicciones:
  ‚Üí Puntuales:  3 (60.0%)
  ‚Üí Retrasados: 2 (40.0%)

üìà Probabilidad promedio de retraso: 46.07%


In [None]:
predict_api = predict_flight_delay(json_ticket)

sb.glue('predict_api', predict_api)