In [None]:
import pandas as pd
import numpy as np
import shap
from sklearn.model_selection import train_test_split, RandomizedSearchCV, ParameterSampler
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [None]:
# load dataset
df = pd.read_csv('../../datasets/atis_dataset_preprocessed.csv', sep=';')

In [None]:
# define label columns
label_columns = ['ILS', 'RNAV', 'RNP', 'VISUAL']

# define features and targets
X = df.drop(columns=label_columns)

# one-hot encode features that are categorical
X = pd.get_dummies(X, columns=['airport_icao', 'runway_designator_side', 'weather_phenomenon', 
                                'runway_designator_number', 'rvr_tendency', 'runway_ils_category'])

In [None]:
y = df[label_columns]

In [None]:
# train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# build and train multi-label model
model = MultiOutputClassifier(SVC(probability=True))
model.fit(X_train, y_train)

In [None]:
# ILS SHAP
explainer_ILS = shap.TreeExplainer(model.estimators_[0])
shap_ILS = explainer_ILS.shap_values(X_test)
shap.summary_plot(shap_ILS, X_test)

In [None]:
# RNAV SHAP
explainer_RNAV = shap.TreeExplainer(model.estimators_[1])
shap_RNAV = explainer_RNAV.shap_values(X_test)
shap.summary_plot(shap_RNAV, X_test)

In [None]:
# RNP SHAP
explainer_RNP = shap.TreeExplainer(model.estimators_[2])
shap_RNP = explainer_RNP.shap_values(X_test)
shap.summary_plot(shap_RNP, X_test)

In [None]:
# VISUAL SHAP
explainer_VISUAL = shap.TreeExplainer(model.estimators_[3])
shap_VISUAL = explainer_VISUAL.shap_values(X_test)
shap.summary_plot(shap_VISUAL, X_test)

In [None]:
# predict and evaluate
y_pred = model.predict(X_test)
empty_mask = y_pred.sum(axis=1) == 0
no_predictions = np.sum(empty_mask)
print(f"Samples with no predicted labels: {no_predictions} out of {len(y_pred)}")
print(X_test[empty_mask])
print(classification_report(y_test, y_pred, target_names=label_columns, zero_division=0))