In [25]:
# imports
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score

import shap

import ipywidgets as widgets
from IPython.display import display, clear_output, Image


In [26]:
# Load data
df = pd.read_csv('../data/raw/automotive_data_train_140.csv', sep=';')
df['Ausfall'] = df['Ausfall'].map({'ja': 1, 'nein': 0})
df = df.drop(['Messungsnr', 'KatTemp'], axis=1, errors='ignore')

X = df.drop('Ausfall', axis=1)
y = df['Ausfall']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = pd.DataFrame(
    scaler.fit_transform(X_train), columns=X.columns, index=X_train.index
)
X_test_scaled = pd.DataFrame(
    scaler.transform(X_test), columns=X.columns, index=X_test.index
)


In [None]:
# TAB 1: Feature Selection
feature_selector = widgets.SelectMultiple(
    options=list(X.columns), 
    value=list(X.columns),
    description='Features:',
    layout=widgets.Layout(width='400px', height='250px')
)

def show_dataset(selected_features):
    print(f"Selected: {len(selected_features)} features\n")
    display(df[['Ausfall'] + list(selected_features)].head())

data_output = widgets.interactive_output(show_dataset, {'selected_features': feature_selector})

tab1 = widgets.VBox([
    widgets.HTML("<h3>Select Features</h3>"), 
    feature_selector, 
    data_output
])

In [28]:
# TAB 2: Tree Parameters
min_split = widgets.IntSlider(
    value=15, min=2, max=50, 
    description='min_split:', 
    style={'description_width': '120px'}
)
min_leaf = widgets.IntSlider(
    value=3, min=1, max=20, 
    description='min_leaf:', 
    style={'description_width': '120px'}
)
max_depth = widgets.IntSlider(
    value=4, min=2, max=10, 
    description='max_depth:', 
    style={'description_width': '120px'}
)

def show_tree(selected_features, min_samples_split, min_samples_leaf, max_depth_val):
    if len(selected_features) < 2:
        print("Please select at least 2 features")
        return
    
    X_tr = X_train_scaled[list(selected_features)]
    X_te = X_test_scaled[list(selected_features)]
    
    model = DecisionTreeClassifier(
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_depth=max_depth_val,
        random_state=42
    )
    model.fit(X_tr, y_train)
    pred = model.predict(X_te)
    
    # Display metrics
    print("=== Performance Metrics ===")
    print(f"Accuracy:  {accuracy_score(y_test, pred)*100:.1f}%")
    print(f"Precision: {precision_score(y_test, pred, zero_division=0)*100:.1f}%")
    print(f"Recall:    {recall_score(y_test, pred, zero_division=0)*100:.1f}%")
    print()
    
    # Display tree
    plt.figure(figsize=(18, 10))
    plot_tree(model, feature_names=list(selected_features), 
              class_names=['nein', 'ja'], filled=True, rounded=True)
    plt.tight_layout()
    plt.show()

tree_output = widgets.interactive_output(
    show_tree, 
    {
        'selected_features': feature_selector,
        'min_samples_split': min_split,
        'min_samples_leaf': min_leaf,
        'max_depth_val': max_depth
    }
)

tab2 = widgets.VBox([
    widgets.HTML("<h3>Tree Parameters</h3>"),
    min_split,
    min_leaf,
    max_depth,
    tree_output
])

In [None]:
# TAB 3: Predictions
predict_trigger = widgets.Button(
    description='Predict', 
    button_style='success',
    layout=widgets.Layout(width='150px')
)
pred_output = widgets.Output()

def make_prediction(btn):
    with pred_output:
        clear_output(wait=True)
        
        # Get current values from widgets
        selected_features = list(feature_selector.value)
        
        if len(selected_features) < 1:
            print("⚠️ Please select at least 1 feature in Tab 1 (Datensatz)")
            return
        
        try:
            X_tr = X_train_scaled[selected_features]
            X_te = X_test_scaled[selected_features]
            
            model = DecisionTreeClassifier(
                min_samples_split=min_split.value,
                min_samples_leaf=min_leaf.value,
                max_depth=max_depth.value,
                random_state=42
            )
            model.fit(X_tr, y_train)
            pred = model.predict(X_te)
            
            # Create results dataframe - FIXED LINE
            results = pd.DataFrame({
                'Actual': y_test.map({0: 'nein', 1: 'ja'}).values,
                'Predicted': pd.Series(pred).map({0: 'nein', 1: 'ja'}).values,
                'Correct': pd.Series(pred == y_test.values).map({True: '✓', False: '✗'}).values
            })
            
            print(f"Accuracy: {accuracy_score(y_test, pred)*100:.1f}%\n")
            display(results)
            
        except Exception as e:
            print(f"❌ Error: {str(e)}")

predict_trigger.on_click(make_prediction)

tab3 = widgets.VBox([
    widgets.HTML("<h3>Predictions</h3>"), 
    predict_trigger, 
    pred_output
])

In [50]:
# TAB 4: SHAP-Analyse (INTERACTIVE) - CORRECTED

shap_summary_btn = widgets.Button(description="Summary Plot", layout=widgets.Layout(width='150px'))
shap_bar_btn     = widgets.Button(description="Bar Plot",     layout=widgets.Layout(width='150px'))
shap_waterfall_btn = widgets.Button(description="Waterfall Plot", layout=widgets.Layout(width='150px'))

shap_output = widgets.Output()

def compute_shap():
    """Train model on selected features + compute shap values safely."""
    feats = list(feature_selector.value)
    
    if len(feats) < 1:
        raise ValueError("Please select at least 1 feature")

    model = DecisionTreeClassifier(
        min_samples_split=min_split.value,
        min_samples_leaf=min_leaf.value,
        max_depth=max_depth.value,
        random_state=42
    )
    model.fit(X_train_scaled[feats], y_train)

    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_test_scaled[feats])

    # Binary classification: get class 1 (positive class)
    if isinstance(shap_values, list):
        shap_values = shap_values[1]
    elif len(shap_values.shape) == 3:
        # Shape is (n_samples, n_features, n_classes)
        shap_values = shap_values[:, :, 1]

    return explainer, shap_values, feats


def show_shap_summary(btn):
    with shap_output:
        clear_output(wait=True)
        try:
            _, shap_values, feats = compute_shap()
            shap.summary_plot(shap_values, X_test_scaled[feats], feature_names=feats)
            plt.show()
        except Exception as e:
            print(f"Error: {str(e)}")


def show_shap_bar(btn):
    with shap_output:
        clear_output(wait=True)
        try:
            _, shap_values, feats = compute_shap()
            
            # Calculate mean absolute SHAP values for each feature
            # shap_values should now be (n_samples, n_features)
            mean_abs_shap = np.mean(np.abs(shap_values), axis=0)
            
            # Sort features by importance
            sorted_idx = np.argsort(mean_abs_shap)
            sorted_features = [feats[i] for i in sorted_idx]
            sorted_importance = mean_abs_shap[sorted_idx]
            
            # Create horizontal bar plot
            fig, ax = plt.subplots(figsize=(10, 6))
            y_pos = np.arange(len(sorted_features))
            ax.barh(y_pos, sorted_importance, color='steelblue')
            ax.set_yticks(y_pos)
            ax.set_yticklabels(sorted_features)
            ax.set_xlabel('Mean |SHAP value| (average impact on model output)', fontsize=11)
            ax.set_title('Feature Importance', fontsize=13, fontweight='bold')
            ax.grid(axis='x', alpha=0.3)
            plt.tight_layout()
            plt.show()
        except Exception as e:
            import traceback
            print(f"Error: {str(e)}")
            print(traceback.format_exc())


def show_shap_waterfall(btn):
    with shap_output:
        clear_output(wait=True)
        try:
            explainer, shap_values, feats = compute_shap()

            sample_idx = 0

            shap_vec = shap_values[sample_idx]          # shape (n_features,)
            sample_data = X_test_scaled[feats].iloc[sample_idx].values

            # expected value (scalar)
            base_val = explainer.expected_value
            if isinstance(base_val, (list, np.ndarray)):
                base_val = float(base_val[1])

            # Build Explanation for one sample
            expl = shap.Explanation(
                values=shap_vec,
                base_values=base_val,
                data=sample_data,
                feature_names=feats
            )

            shap.plots.waterfall(expl, max_display=15)
            plt.show()
        except Exception as e:
            print(f"Error: {str(e)}")


# Bind buttons
shap_summary_btn.on_click(show_shap_summary)
shap_bar_btn.on_click(show_shap_bar)
shap_waterfall_btn.on_click(show_shap_waterfall)

tab4 = widgets.VBox([
    widgets.HTML("<h3>SHAP-Analyse (Interaktiv)</h3>"),
    widgets.HBox([shap_summary_btn, shap_bar_btn, shap_waterfall_btn]),
    shap_output
])

In [51]:
# Display Dashboard
tabs = widgets.Tab(children=[tab1, tab2, tab3, tab4])
tabs.set_title(0, 'Datensatz')
tabs.set_title(1, 'Entscheidungsbaum')
tabs.set_title(2, 'Vorhersage')
tabs.set_title(3, 'SHAP-Analyse')

display(widgets.HTML("<h2>Interactive Dashboard</h2>"))
display(tabs)


HTML(value='<h2>Interactive Dashboard</h2>')

Tab(children=(VBox(children=(HTML(value='<h3>Select Features</h3>'), SelectMultiple(description='Features:', i…