# Phantom Airlines IROPS - Delay Prediction Model

This notebook implements a flight delay prediction model using Snowflake ML features:

- **Feature Store**: Centralized feature management with point-in-time correctness
- **Model Registry**: Version control, lineage tracking, and deployment
- **Model Observability**: Performance monitoring and drift detection

## Business Context
Predicting flight delays enables proactive crew repositioning and passenger rebooking,
reducing downstream cascading impacts and operational costs.

## 1. Environment Setup

In [None]:
import os
import warnings
warnings.filterwarnings('ignore')

from snowflake.snowpark import Session
from snowflake.snowpark import functions as F
from snowflake.snowpark.types import *

from snowflake.ml.feature_store import (
    FeatureStore,
    FeatureView,
    Entity,
    CreationMode
)
from snowflake.ml.registry import Registry
from snowflake.ml.modeling.preprocessing import StandardScaler, OneHotEncoder
from snowflake.ml.modeling.pipeline import Pipeline
from snowflake.ml.modeling.xgboost import XGBClassifier
from snowflake.ml.modeling.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix
)

import pandas as pd
import numpy as np

In [None]:
connection_name = os.getenv("SNOWFLAKE_CONNECTION_NAME") or "default"
session = Session.builder.config("connection_name", connection_name).create()

DATABASE = os.getenv("IROPS_DATABASE", "PHANTOM_IROPS")
WAREHOUSE = os.getenv("IROPS_WAREHOUSE", "PHANTOM_IROPS_WH")

session.use_database(DATABASE)
session.use_warehouse(WAREHOUSE)

print(f"Connected to: {session.get_current_account()}")
print(f"Database: {DATABASE}")
print(f"Warehouse: {WAREHOUSE}")

## 2. Feature Store Setup

In [None]:
session.sql("CREATE SCHEMA IF NOT EXISTS FEATURE_STORE").collect()

fs = FeatureStore(
    session=session,
    database=DATABASE,
    name="FEATURE_STORE",
    default_warehouse=WAREHOUSE,
    creation_mode=CreationMode.CREATE_IF_NOT_EXIST
)

print(f"Feature Store initialized: {DATABASE}.FEATURE_STORE")

### 2.1 Define Entities

In [None]:
flight_entity = Entity(
    name="FLIGHT",
    join_keys=["FLIGHT_ID"],
    desc="Individual flight identifier for delay prediction"
)
fs.register_entity(flight_entity)

airport_entity = Entity(
    name="AIRPORT",
    join_keys=["AIRPORT_CODE"],
    desc="Airport for weather and operational features"
)
fs.register_entity(airport_entity)

route_entity = Entity(
    name="ROUTE",
    join_keys=["ORIGIN", "DESTINATION"],
    desc="Route for historical performance features"
)
fs.register_entity(route_entity)

print("Entities registered: FLIGHT, AIRPORT, ROUTE")

### 2.2 Create Flight Features

In [None]:
flight_features_query = f"""
SELECT 
    FLIGHT_ID,
    FLIGHT_DATE AS FEATURE_TIMESTAMP,
    ORIGIN,
    DESTINATION,
    AIRCRAFT_TYPE_CODE,
    HOUR(SCHEDULED_DEPARTURE_UTC) AS DEPARTURE_HOUR,
    DAYOFWEEK(FLIGHT_DATE) AS DAY_OF_WEEK,
    DAYOFYEAR(FLIGHT_DATE) AS DAY_OF_YEAR,
    MONTH(FLIGHT_DATE) AS MONTH,
    DISTANCE_NM,
    BLOCK_TIME_SCHEDULED_MIN,
    PASSENGERS_BOOKED,
    CASE 
        WHEN HOUR(SCHEDULED_DEPARTURE_UTC) BETWEEN 6 AND 9 THEN 'MORNING_RUSH'
        WHEN HOUR(SCHEDULED_DEPARTURE_UTC) BETWEEN 16 AND 19 THEN 'EVENING_RUSH'
        WHEN HOUR(SCHEDULED_DEPARTURE_UTC) BETWEEN 22 AND 5 THEN 'OVERNIGHT'
        ELSE 'MIDDAY'
    END AS TIME_OF_DAY_BUCKET,
    CASE 
        WHEN DAYOFWEEK(FLIGHT_DATE) IN (1, 7) THEN TRUE 
        ELSE FALSE 
    END AS IS_WEEKEND,
    CASE 
        WHEN MONTH(FLIGHT_DATE) IN (6, 7, 8) THEN 'SUMMER'
        WHEN MONTH(FLIGHT_DATE) IN (11, 12, 1, 2) THEN 'WINTER'
        ELSE 'SHOULDER'
    END AS SEASON
FROM {DATABASE}.RAW.FLIGHTS
WHERE FLIGHT_DATE IS NOT NULL
"""

flight_features_df = session.sql(flight_features_query)

flight_feature_view = FeatureView(
    name="FLIGHT_SCHEDULE_FEATURES",
    entities=[flight_entity],
    feature_df=flight_features_df,
    timestamp_col="FEATURE_TIMESTAMP",
    refresh_freq="1 day",
    desc="Flight schedule and temporal features for delay prediction"
)

flight_fv = fs.register_feature_view(
    feature_view=flight_feature_view,
    version="v1",
    block=True
)

print(f"Registered: {flight_fv.name} v{flight_fv.version}")

### 2.3 Create Airport/Weather Features

In [None]:
airport_features_query = f"""
SELECT 
    a.AIRPORT_CODE,
    CURRENT_TIMESTAMP() AS FEATURE_TIMESTAMP,
    a.IS_HUB,
    a.HUB_TYPE,
    a.GATES_COUNT,
    a.DAILY_OPERATIONS,
    COALESCE(w.WEATHER_IMPACT_SCORE, 20) AS WEATHER_IMPACT_SCORE,
    COALESCE(w.VISIBILITY_CATEGORY, 'VFR') AS VISIBILITY_CATEGORY,
    COALESCE(w.IS_THUNDERSTORM, FALSE) AS IS_THUNDERSTORM,
    COALESCE(w.IS_FREEZING, FALSE) AS IS_FREEZING,
    COALESCE(w.GROUND_STOP_ACTIVE, FALSE) AS GROUND_STOP_ACTIVE,
    COALESCE(w.WIND_SPEED_KNOTS, 10) AS WIND_SPEED_KNOTS,
    COALESCE(w.CEILING_FEET, 10000) AS CEILING_FEET
FROM {DATABASE}.RAW.AIRPORTS a
LEFT JOIN (
    SELECT * FROM {DATABASE}.RAW.WEATHER_DATA
    QUALIFY ROW_NUMBER() OVER (PARTITION BY AIRPORT_CODE ORDER BY OBSERVATION_TIME DESC) = 1
) w ON a.AIRPORT_CODE = w.AIRPORT_CODE
"""

airport_features_df = session.sql(airport_features_query)

airport_feature_view = FeatureView(
    name="AIRPORT_OPERATIONAL_FEATURES",
    entities=[airport_entity],
    feature_df=airport_features_df,
    timestamp_col="FEATURE_TIMESTAMP",
    refresh_freq="15 minutes",
    desc="Airport infrastructure and current weather features"
)

airport_fv = fs.register_feature_view(
    feature_view=airport_feature_view,
    version="v1",
    block=True
)

print(f"Registered: {airport_fv.name} v{airport_fv.version}")

### 2.4 Create Route Historical Features

In [None]:
route_features_query = f"""
WITH route_stats AS (
    SELECT 
        ORIGIN,
        DESTINATION,
        AVG(DEPARTURE_DELAY_MINUTES) AS AVG_DELAY_30D,
        STDDEV(DEPARTURE_DELAY_MINUTES) AS STDDEV_DELAY_30D,
        COUNT(*) AS FLIGHT_COUNT_30D,
        SUM(CASE WHEN DEPARTURE_DELAY_MINUTES > 15 THEN 1 ELSE 0 END) / NULLIF(COUNT(*), 0) AS DELAY_RATE_30D,
        SUM(CASE WHEN STATUS = 'CANCELLED' THEN 1 ELSE 0 END) / NULLIF(COUNT(*), 0) AS CANCEL_RATE_30D,
        PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY DEPARTURE_DELAY_MINUTES) AS P90_DELAY_30D
    FROM {DATABASE}.RAW.FLIGHTS
    WHERE FLIGHT_DATE BETWEEN DATEADD('day', -30, CURRENT_DATE()) AND DATEADD('day', -1, CURRENT_DATE())
      AND DEPARTURE_DELAY_MINUTES IS NOT NULL
    GROUP BY ORIGIN, DESTINATION
)
SELECT 
    ORIGIN,
    DESTINATION,
    CURRENT_TIMESTAMP() AS FEATURE_TIMESTAMP,
    COALESCE(AVG_DELAY_30D, 0) AS ROUTE_AVG_DELAY_30D,
    COALESCE(STDDEV_DELAY_30D, 0) AS ROUTE_STDDEV_DELAY_30D,
    COALESCE(FLIGHT_COUNT_30D, 0) AS ROUTE_FLIGHT_COUNT_30D,
    COALESCE(DELAY_RATE_30D, 0) AS ROUTE_DELAY_RATE_30D,
    COALESCE(CANCEL_RATE_30D, 0) AS ROUTE_CANCEL_RATE_30D,
    COALESCE(P90_DELAY_30D, 0) AS ROUTE_P90_DELAY_30D
FROM route_stats
"""

route_features_df = session.sql(route_features_query)

route_feature_view = FeatureView(
    name="ROUTE_HISTORICAL_FEATURES",
    entities=[route_entity],
    feature_df=route_features_df,
    timestamp_col="FEATURE_TIMESTAMP",
    refresh_freq="1 day",
    desc="Historical route performance metrics (30-day window)"
)

route_fv = fs.register_feature_view(
    feature_view=route_feature_view,
    version="v1",
    block=True
)

print(f"Registered: {route_fv.name} v{route_fv.version}")

## 3. Training Data Generation

In [None]:
spine_query = f"""
SELECT 
    FLIGHT_ID,
    ORIGIN,
    DESTINATION,
    ORIGIN AS AIRPORT_CODE,
    FLIGHT_DATE AS LABEL_TIMESTAMP,
    CASE 
        WHEN STATUS = 'CANCELLED' THEN 'CANCELLED'
        WHEN DEPARTURE_DELAY_MINUTES <= 0 THEN 'ON_TIME'
        WHEN DEPARTURE_DELAY_MINUTES <= 15 THEN 'MINOR_DELAY'
        WHEN DEPARTURE_DELAY_MINUTES <= 60 THEN 'MODERATE_DELAY'
        ELSE 'SEVERE_DELAY'
    END AS DELAY_CATEGORY
FROM {DATABASE}.RAW.FLIGHTS
WHERE FLIGHT_DATE < CURRENT_DATE()
  AND STATUS IN ('ARRIVED', 'CANCELLED', 'DELAYED')
  AND DEPARTURE_DELAY_MINUTES IS NOT NULL
"""

spine_df = session.sql(spine_query)
print(f"Training spine rows: {spine_df.count()}")

In [None]:
training_data = fs.generate_training_set(
    spine_df=spine_df,
    features=[
        flight_fv,
        airport_fv,
        route_fv
    ],
    spine_timestamp_col="LABEL_TIMESTAMP",
    spine_label_cols=["DELAY_CATEGORY"],
    include_feature_view_timestamp_col=False
)

training_df = training_data.to_snowpark_dataframe()
print(f"Training data shape: {training_df.count()} rows")
training_df.limit(5).show()

## 4. Model Training with Snowflake ML

In [None]:
train_df, test_df = training_df.random_split([0.8, 0.2], seed=42)

print(f"Training set: {train_df.count()} rows")
print(f"Test set: {test_df.count()} rows")

In [None]:
NUMERIC_COLS = [
    "DEPARTURE_HOUR",
    "DAY_OF_WEEK",
    "DAY_OF_YEAR",
    "MONTH",
    "DISTANCE_NM",
    "BLOCK_TIME_SCHEDULED_MIN",
    "PASSENGERS_BOOKED",
    "GATES_COUNT",
    "DAILY_OPERATIONS",
    "WEATHER_IMPACT_SCORE",
    "WIND_SPEED_KNOTS",
    "CEILING_FEET",
    "ROUTE_AVG_DELAY_30D",
    "ROUTE_STDDEV_DELAY_30D",
    "ROUTE_DELAY_RATE_30D",
    "ROUTE_P90_DELAY_30D"
]

BOOLEAN_COLS = [
    "IS_HUB",
    "IS_WEEKEND",
    "IS_THUNDERSTORM",
    "IS_FREEZING",
    "GROUND_STOP_ACTIVE"
]

LABEL_COL = "DELAY_CATEGORY"
FEATURE_COLS = NUMERIC_COLS + BOOLEAN_COLS

In [None]:
scaler = StandardScaler(
    input_cols=NUMERIC_COLS,
    output_cols=[f"{c}_SCALED" for c in NUMERIC_COLS]
)

xgb_classifier = XGBClassifier(
    input_cols=[f"{c}_SCALED" for c in NUMERIC_COLS] + BOOLEAN_COLS,
    label_cols=[LABEL_COL],
    output_cols=["PREDICTED_DELAY_CATEGORY"],
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    random_state=42
)

pipeline = Pipeline(steps=[
    ("scaler", scaler),
    ("classifier", xgb_classifier)
])

print("Pipeline created with StandardScaler + XGBClassifier")

In [None]:
print("Training model...")
pipeline.fit(train_df)
print("Model training complete!")

## 5. Model Evaluation

In [None]:
predictions_df = pipeline.predict(test_df)

predictions_pd = predictions_df.select(
    LABEL_COL, 
    "PREDICTED_DELAY_CATEGORY"
).to_pandas()

y_true = predictions_pd[LABEL_COL]
y_pred = predictions_pd["PREDICTED_DELAY_CATEGORY"]

accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
precision = precision_score(y_true=y_true, y_pred=y_pred, average='weighted')
recall = recall_score(y_true=y_true, y_pred=y_pred, average='weighted')
f1 = f1_score(y_true=y_true, y_pred=y_pred, average='weighted')

metrics = {
    "accuracy": float(accuracy),
    "precision_weighted": float(precision),
    "recall_weighted": float(recall),
    "f1_weighted": float(f1)
}

print("Model Performance Metrics:")
print(f"  Accuracy:  {accuracy:.4f}")
print(f"  Precision: {precision:.4f}")
print(f"  Recall:    {recall:.4f}")
print(f"  F1 Score:  {f1:.4f}")

In [None]:
cm = confusion_matrix(y_true=y_true, y_pred=y_pred)
labels = sorted(y_true.unique())

cm_df = pd.DataFrame(cm, index=labels, columns=labels)
print("Confusion Matrix:")
print(cm_df)

## 6. Model Registry - Registration & Versioning

In [None]:
session.sql("CREATE SCHEMA IF NOT EXISTS ML_MODELS").collect()

registry = Registry(
    session=session,
    database_name=DATABASE,
    schema_name="ML_MODELS"
)

print(f"Model Registry initialized: {DATABASE}.ML_MODELS")

In [None]:
sample_input = train_df.select(FEATURE_COLS).limit(100)

model_version = registry.log_model(
    model=pipeline,
    model_name="DELAY_PREDICTION_MODEL",
    version_name="V1",
    sample_input_data=sample_input,
    metrics=metrics,
    conda_dependencies=["scikit-learn", "xgboost"],
    comment="XGBoost classifier for flight delay category prediction. Features from Feature Store."
)

print(f"Model registered: {model_version.model_name} version {model_version.version_name}")

In [None]:
model_version.set_metric("training_samples", train_df.count())
model_version.set_metric("test_samples", test_df.count())
model_version.set_metric("num_features", len(FEATURE_COLS))

print("Additional metrics recorded")

## 7. Model Observability Setup

In [None]:
observability_table_sql = f"""
CREATE TABLE IF NOT EXISTS {DATABASE}.ML_MODELS.DELAY_MODEL_PREDICTIONS_LOG (
    PREDICTION_ID VARCHAR(50) DEFAULT UUID_STRING(),
    PREDICTION_TIMESTAMP TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    MODEL_NAME VARCHAR(100),
    MODEL_VERSION VARCHAR(20),
    FLIGHT_ID VARCHAR(50),
    PREDICTED_CATEGORY VARCHAR(50),
    PREDICTION_PROBABILITY FLOAT,
    ACTUAL_CATEGORY VARCHAR(50),
    IS_CORRECT BOOLEAN,
    FEATURE_VALUES VARIANT,
    LATENCY_MS INTEGER
)
"""
session.sql(observability_table_sql).collect()

monitoring_view_sql = f"""
CREATE OR REPLACE VIEW {DATABASE}.ML_MODELS.DELAY_MODEL_PERFORMANCE_MONITOR AS
WITH daily_metrics AS (
    SELECT 
        DATE_TRUNC('day', PREDICTION_TIMESTAMP) AS PREDICTION_DATE,
        MODEL_VERSION,
        COUNT(*) AS PREDICTION_COUNT,
        SUM(CASE WHEN IS_CORRECT THEN 1 ELSE 0 END) AS CORRECT_COUNT,
        AVG(LATENCY_MS) AS AVG_LATENCY_MS,
        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY LATENCY_MS) AS P95_LATENCY_MS
    FROM {DATABASE}.ML_MODELS.DELAY_MODEL_PREDICTIONS_LOG
    WHERE ACTUAL_CATEGORY IS NOT NULL
    GROUP BY DATE_TRUNC('day', PREDICTION_TIMESTAMP), MODEL_VERSION
)
SELECT 
    PREDICTION_DATE,
    MODEL_VERSION,
    PREDICTION_COUNT,
    CORRECT_COUNT,
    CORRECT_COUNT / NULLIF(PREDICTION_COUNT, 0) AS DAILY_ACCURACY,
    AVG_LATENCY_MS,
    P95_LATENCY_MS,
    LAG(CORRECT_COUNT / NULLIF(PREDICTION_COUNT, 0)) OVER (
        PARTITION BY MODEL_VERSION ORDER BY PREDICTION_DATE
    ) AS PREV_DAY_ACCURACY,
    (CORRECT_COUNT / NULLIF(PREDICTION_COUNT, 0)) - LAG(CORRECT_COUNT / NULLIF(PREDICTION_COUNT, 0)) OVER (
        PARTITION BY MODEL_VERSION ORDER BY PREDICTION_DATE
    ) AS ACCURACY_DRIFT
FROM daily_metrics
"""
session.sql(monitoring_view_sql).collect()

print("Observability infrastructure created:")
print(f"  - {DATABASE}.ML_MODELS.DELAY_MODEL_PREDICTIONS_LOG (table)")
print(f"  - {DATABASE}.ML_MODELS.DELAY_MODEL_PERFORMANCE_MONITOR (view)")

In [None]:
drift_alert_sql = f"""
CREATE OR REPLACE VIEW {DATABASE}.ML_MODELS.DELAY_MODEL_DRIFT_ALERTS AS
SELECT 
    PREDICTION_DATE,
    MODEL_VERSION,
    DAILY_ACCURACY,
    ACCURACY_DRIFT,
    CASE 
        WHEN DAILY_ACCURACY < 0.70 THEN 'CRITICAL'
        WHEN DAILY_ACCURACY < 0.80 THEN 'WARNING'
        WHEN ABS(ACCURACY_DRIFT) > 0.05 THEN 'DRIFT_DETECTED'
        ELSE 'HEALTHY'
    END AS ALERT_STATUS,
    CASE 
        WHEN DAILY_ACCURACY < 0.70 THEN 'Accuracy below 70% - consider retraining'
        WHEN DAILY_ACCURACY < 0.80 THEN 'Accuracy below 80% - monitor closely'
        WHEN ABS(ACCURACY_DRIFT) > 0.05 THEN 'Significant accuracy change detected'
        ELSE 'Model performing within expected parameters'
    END AS ALERT_MESSAGE
FROM {DATABASE}.ML_MODELS.DELAY_MODEL_PERFORMANCE_MONITOR
WHERE PREDICTION_DATE >= DATEADD('day', -7, CURRENT_DATE())
"""
session.sql(drift_alert_sql).collect()

print(f"Drift detection alerts created: {DATABASE}.ML_MODELS.DELAY_MODEL_DRIFT_ALERTS")

## 8. Inference View

In [None]:
inference_view_sql = f"""
CREATE OR REPLACE VIEW {DATABASE}.ML_MODELS.DELAY_PREDICTIONS AS
WITH upcoming_flights AS (
    SELECT 
        f.FLIGHT_ID,
        f.FLIGHT_NUMBER,
        f.FLIGHT_DATE,
        f.ORIGIN,
        f.DESTINATION,
        f.SCHEDULED_DEPARTURE_UTC,
        f.STATUS,
        f.AIRCRAFT_TYPE_CODE,
        HOUR(f.SCHEDULED_DEPARTURE_UTC) AS DEPARTURE_HOUR,
        DAYOFWEEK(f.FLIGHT_DATE) AS DAY_OF_WEEK,
        DAYOFYEAR(f.FLIGHT_DATE) AS DAY_OF_YEAR,
        MONTH(f.FLIGHT_DATE) AS MONTH,
        f.DISTANCE_NM,
        f.BLOCK_TIME_SCHEDULED_MIN,
        f.PASSENGERS_BOOKED,
        a.IS_HUB,
        a.GATES_COUNT,
        a.DAILY_OPERATIONS,
        COALESCE(w.WEATHER_IMPACT_SCORE, 20) AS WEATHER_IMPACT_SCORE,
        COALESCE(w.IS_THUNDERSTORM, FALSE) AS IS_THUNDERSTORM,
        COALESCE(w.IS_FREEZING, FALSE) AS IS_FREEZING,
        COALESCE(w.GROUND_STOP_ACTIVE, FALSE) AS GROUND_STOP_ACTIVE,
        COALESCE(w.WIND_SPEED_KNOTS, 10) AS WIND_SPEED_KNOTS,
        COALESCE(w.CEILING_FEET, 10000) AS CEILING_FEET,
        CASE 
            WHEN DAYOFWEEK(f.FLIGHT_DATE) IN (1, 7) THEN TRUE 
            ELSE FALSE 
        END AS IS_WEEKEND,
        COALESCE(r.ROUTE_AVG_DELAY_30D, 0) AS ROUTE_AVG_DELAY_30D,
        COALESCE(r.ROUTE_STDDEV_DELAY_30D, 0) AS ROUTE_STDDEV_DELAY_30D,
        COALESCE(r.ROUTE_DELAY_RATE_30D, 0) AS ROUTE_DELAY_RATE_30D,
        COALESCE(r.ROUTE_P90_DELAY_30D, 0) AS ROUTE_P90_DELAY_30D
    FROM {DATABASE}.RAW.FLIGHTS f
    JOIN {DATABASE}.RAW.AIRPORTS a ON f.ORIGIN = a.AIRPORT_CODE
    LEFT JOIN (
        SELECT * FROM {DATABASE}.RAW.WEATHER_DATA
        QUALIFY ROW_NUMBER() OVER (PARTITION BY AIRPORT_CODE ORDER BY OBSERVATION_TIME DESC) = 1
    ) w ON f.ORIGIN = w.AIRPORT_CODE
    LEFT JOIN (
        SELECT 
            ORIGIN, DESTINATION,
            AVG(DEPARTURE_DELAY_MINUTES) AS ROUTE_AVG_DELAY_30D,
            STDDEV(DEPARTURE_DELAY_MINUTES) AS ROUTE_STDDEV_DELAY_30D,
            SUM(CASE WHEN DEPARTURE_DELAY_MINUTES > 15 THEN 1 ELSE 0 END) / NULLIF(COUNT(*), 0) AS ROUTE_DELAY_RATE_30D,
            PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY DEPARTURE_DELAY_MINUTES) AS ROUTE_P90_DELAY_30D
        FROM {DATABASE}.RAW.FLIGHTS
        WHERE FLIGHT_DATE BETWEEN DATEADD('day', -30, CURRENT_DATE()) AND DATEADD('day', -1, CURRENT_DATE())
          AND DEPARTURE_DELAY_MINUTES IS NOT NULL
        GROUP BY ORIGIN, DESTINATION
    ) r ON f.ORIGIN = r.ORIGIN AND f.DESTINATION = r.DESTINATION
    WHERE f.FLIGHT_DATE >= CURRENT_DATE()
      AND f.STATUS IN ('SCHEDULED', 'BOARDING')
)
SELECT 
    FLIGHT_ID,
    FLIGHT_NUMBER,
    FLIGHT_DATE,
    ORIGIN,
    DESTINATION,
    SCHEDULED_DEPARTURE_UTC,
    STATUS,
    WEATHER_IMPACT_SCORE,
    IS_THUNDERSTORM,
    GROUND_STOP_ACTIVE,
    ROUTE_AVG_DELAY_30D
FROM upcoming_flights
"""
session.sql(inference_view_sql).collect()

print(f"Inference view created: {DATABASE}.ML_MODELS.DELAY_PREDICTIONS")

## 9. Summary

In [None]:
print("="*60)
print("DELAY PREDICTION MODEL - DEPLOYMENT COMPLETE")
print("="*60)
print("\nFeature Store Artifacts:")
print(f"  - Entity: FLIGHT, AIRPORT, ROUTE")
print(f"  - Feature View: FLIGHT_SCHEDULE_FEATURES v1")
print(f"  - Feature View: AIRPORT_OPERATIONAL_FEATURES v1")
print(f"  - Feature View: ROUTE_HISTORICAL_FEATURES v1")
print("\nModel Registry:")
print(f"  - Model: DELAY_PREDICTION_MODEL V1")
print(f"  - Algorithm: XGBoost Classifier")
print(f"  - Features: {len(FEATURE_COLS)}")
print(f"  - Accuracy: {accuracy:.4f}")
print("\nObservability:")
print(f"  - Predictions Log: {DATABASE}.ML_MODELS.DELAY_MODEL_PREDICTIONS_LOG")
print(f"  - Performance Monitor: {DATABASE}.ML_MODELS.DELAY_MODEL_PERFORMANCE_MONITOR")
print(f"  - Drift Alerts: {DATABASE}.ML_MODELS.DELAY_MODEL_DRIFT_ALERTS")
print("\nInference:")
print(f"  - View: {DATABASE}.ML_MODELS.DELAY_PREDICTIONS")
print("="*60)

In [None]:
session.close()
print("Session closed.")