# Turn Success Prediction Model

Predicts probability of successful aircraft turn (on-time departure after inbound arrival).

**Target Variable:** `turn_success_flag` (binary)
**Algorithm:** LightGBM Classifier via Snowflake ML
**Output:** `IROP_GNN_RISK.ML_PROCESSING.TURN_PREDICTIONS`

In [None]:
import snowflake.snowpark as snowpark
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col, lit, when, avg, current_timestamp, array_construct
from snowflake.snowpark.types import FloatType, IntegerType, ArrayType, StringType
from snowflake.ml.modeling.lightgbm import LGBMClassifier
from snowflake.ml.registry import Registry
import uuid

In [None]:
session = get_active_session()
session.use_database('IROP_GNN_RISK')
session.use_schema('ATOMIC')
print(f"Connected: {session.get_current_database()}.{session.get_current_schema()}")

In [None]:
flights_df = session.table('FLIGHT_INSTANCE')
rotations_df = session.table('AIRCRAFT_ROTATION')
airports_df = session.table('AIRPORT_CAPABILITY')

print(f"Flights: {flights_df.count()} rows")
print(f"Rotations: {rotations_df.count()} rows")

In [None]:
turn_df = rotations_df.filter(col('PREV_FLIGHT_KEY').isNotNull()).alias('r').join(
    flights_df.alias('current'),
    col('r.FLIGHT_KEY') == col('current.FLIGHT_KEY')
).join(
    flights_df.alias('prev'),
    col('r.PREV_FLIGHT_KEY') == col('prev.FLIGHT_KEY')
).join(
    airports_df.alias('apt'),
    col('current.DEPARTURE_STATION') == col('apt.STATION_CODE'),
    'left'
)

features_df = turn_df.select(
    col('r.FLIGHT_KEY'),
    col('r.TAIL_NUMBER'),
    when(col('current.TURN_SUCCESS_PROB') >= 0.7, 1).otherwise(0).alias('TURN_SUCCESS_FLAG'),
    col('current.TURN_BUFFER_MINUTES'),
    col('prev.CURRENT_DELAY_ARRIVAL').alias('INBOUND_DELAY'),
    col('current.PAX_COUNT'),
    col('current.CONNECTING_PAX_PCT'),
    when(col('apt.HUB_FLAG') == True, 1).otherwise(0).alias('IS_HUB'),
    col('apt.GATE_COUNT').cast(IntegerType()).alias('GATE_COUNT'),
    when(col('r.MEL_APU_FLAG') == True, 1).otherwise(0).alias('HAS_MEL'),
    col('r.AOG_RISK_SCORE').cast(FloatType()).alias('AOG_RISK')
).na.fill(0)

print(f"Turn feature dataset: {features_df.count()} rows")
features_df.show(5)

In [None]:
train_df, test_df = features_df.random_split([0.8, 0.2], seed=42)
print(f"Training set: {train_df.count()} rows")
print(f"Test set: {test_df.count()} rows")

In [None]:
feature_cols = [
    'TURN_BUFFER_MINUTES', 'INBOUND_DELAY', 'PAX_COUNT',
    'CONNECTING_PAX_PCT', 'IS_HUB', 'GATE_COUNT', 'HAS_MEL', 'AOG_RISK'
]
target_col = 'TURN_SUCCESS_FLAG'

model = LGBMClassifier(
    input_cols=feature_cols,
    label_cols=[target_col],
    output_cols=['PREDICTED_TURN_SUCCESS'],
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1
)

model.fit(train_df)
print("Model training complete")

In [None]:
predictions = model.predict(test_df)
predictions.select('FLIGHT_KEY', 'TURN_SUCCESS_FLAG', 'PREDICTED_TURN_SUCCESS').show(10)

accuracy = predictions.filter(
    col('TURN_SUCCESS_FLAG') == col('PREDICTED_TURN_SUCCESS')
).count() / predictions.count()
print(f"Accuracy: {accuracy:.2%}")

In [None]:
all_predictions = model.predict(features_df)

output_df = all_predictions.select(
    lit(str(uuid.uuid4())[:8].upper()).alias('PREDICTION_ID'),
    col('FLIGHT_KEY'),
    current_timestamp().alias('SNAPSHOT_TS'),
    (1 - col('PREDICTED_TURN_SUCCESS')).cast(FloatType()).alias('TURN_SUCCESS_PROB'),
    array_construct(
        when(col('HAS_MEL') == 1, lit('MEL_ACTIVE')),
        when(col('INBOUND_DELAY') > 30, lit('INBOUND_DELAYED')),
        when(col('TURN_BUFFER_MINUTES') < 45, lit('TIGHT_TURN'))
    ).alias('TURN_RISK_FLAGS'),
    lit('v1.0').alias('MODEL_VERSION'),
    lit(None).alias('FEATURE_IMPORTANCE')
)

session.use_schema('ML_PROCESSING')
output_df.write.mode('overwrite').save_as_table('TURN_PREDICTIONS')
print(f"Saved {output_df.count()} predictions to ML_PROCESSING.TURN_PREDICTIONS")

In [None]:
reg = Registry(session=session, database_name='IROP_GNN_RISK', schema_name='ML_PROCESSING')

model_version = reg.log_model(
    model=model,
    model_name='TURN_SUCCESS_MODEL',
    version_name='v1',
    sample_input_data=train_df.limit(10),
    comment='LightGBM classifier for turn success prediction'
)

print(f"Model registered: {model_version.model_name} v{model_version.version_name}")