In [2]:
import pandas as pd
import joblib

# Load the pre-trained model, scaler, and encoder
model = joblib.load("../trained_modules_R/random_forest_model.pkl")
scaler = joblib.load("../trained_modules_R/standard_scaler.pkl")
label_encoders = joblib.load("../trained_modules_R/label_encoder.pkl")

# Example flight data for testing
flight_data = {
    'MONTH': 7,
    'DAY_OF_WEEK': 3,
    'TMAX': 85,
    'AWND': 10,
    'AIRPORT_FLIGHTS_MONTH': 12000,
    'AIRLINE_FLIGHTS_MONTH': 2000,
    'AIRLINE_AIRPORT_FLIGHTS_MONTH': 500,
    'PRCP': 0.1,
    'DEP_TIME_BLK': '0800-0859',
    'origin_airport': 'San Francisco International (SFO)',
    'carrier_name': 'Delta Air Lines Inc.'
}

# Data preparation function
def dataprep(user_data):
    user_data_df = pd.DataFrame([user_data])
    for feature, le in label_encoders.items():
        user_data_df[feature] = le.transform(user_data_df[feature])
    user_data_df = user_data_df.drop(columns=['origin_airport', 'carrier_name'])  # Drop non-numeric columns
    user_data_scaled = scaler.transform(user_data_df)
    return user_data_scaled

# Prepare the data
prepped_data = dataprep(flight_data)

# Make the prediction
prediction_result = model.predict_proba(prepped_data) * 100
delay_percentage = prediction_result[0][1]  # Probability of class 1 (delay)

print(f"The likelihood of flight delay is {delay_percentage:.2f}%")


The likelihood of flight delay is 27.75%
