In [57]:
import pandas as pd
import numpy as np
import shap
from sklearn.model_selection import train_test_split, RandomizedSearchCV, ParameterSampler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [58]:
# load dataset
df = pd.read_csv('../../datasets/atis_dataset_preprocessed.csv', sep=';')

In [59]:
# define label columns
label_columns = ['ILS', 'RNAV', 'RNP', 'VISUAL']

# define features and targets
X = df.drop(columns=label_columns)

# one-hot encode features that are categorical
X = pd.get_dummies(X, columns=['airport_icao', 'runway_designator_side', 'weather_phenomenon', 
                                'runway_designator_number', 'rvr_tendency', 'runway_ils_category'])

In [60]:
y = df[label_columns]

In [61]:
# train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [62]:
# impute
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

In [63]:
# feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [64]:
# build and train multi-label model
model = MultiOutputClassifier(SVC(probability=True, class_weight='balanced', gamma='scale', C=1.0, kernel='rbf'))
model.fit(X_train, y_train)

In [65]:
# ILS SHAP
# explainer_ILS = shap.TreeExplainer(model.estimators_[0])
# shap_ILS = explainer_ILS.shap_values(X_test)
# shap.summary_plot(shap_ILS, X_test)

In [66]:
# RNAV SHAP
# explainer_RNAV = shap.TreeExplainer(model.estimators_[1])
# shap_RNAV = explainer_RNAV.shap_values(X_test)
# shap.summary_plot(shap_RNAV, X_test)

In [67]:
# RNP SHAP
# explainer_RNP = shap.TreeExplainer(model.estimators_[2])
# shap_RNP = explainer_RNP.shap_values(X_test)
# shap.summary_plot(shap_RNP, X_test)

In [68]:
# VISUAL SHAP
# explainer_VISUAL = shap.TreeExplainer(model.estimators_[3])
# shap_VISUAL = explainer_VISUAL.shap_values(X_test)
# shap.summary_plot(shap_VISUAL, X_test)

In [69]:
# predict and evaluate
y_pred = model.predict(X_test)
empty_mask = y_pred.sum(axis=1) == 0
no_predictions = np.sum(empty_mask)
print(f"Samples with no predicted labels: {no_predictions} out of {len(y_pred)}")
print(X_test[empty_mask])
print(classification_report(y_test, y_pred, target_names=label_columns, zero_division=0))

Samples with no predicted labels: 0 out of 1319
[]
              precision    recall  f1-score   support

         ILS       0.92      0.31      0.46       950
        RNAV       0.12      0.72      0.21       159
         RNP       0.09      0.71      0.17       105
      VISUAL       0.75      0.86      0.80       880

   micro avg       0.40      0.59      0.48      2094
   macro avg       0.47      0.65      0.41      2094
weighted avg       0.75      0.59      0.57      2094
 samples avg       0.48      0.59      0.48      2094

