In [4]:
import sqlite3

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

from autogluon.tabular import TabularDataset, TabularPredictor

In [5]:
# 1) Load the dataset
# Each row contains: 
#   - igef: unique vehicle identifier
#   - test: name of the performed test (e.g. "test_17", "test_80", etc.)
#   - test_result: categorical outcome ("OK" or "NOK")
conn = sqlite3.connect("dummy_data.db")
df = pd.read_sql("SELECT * FROM dummy_data", conn)
conn.close()

# 2) Define the test to predict and the test split ratio
# The goal is to predict the result of "test_80" based on all other tests
label = "test_80"
test_size = 0.2

# 3) Convert test results to binary form
# "OK" becomes 0, "NOK" becomes 1
# This makes aggregation and modeling easier
df["binary_test_result"] = df["test_result"].map({"OK": 0, "NOK": 1})

# 4) Extract target labels
# For each vehicle (igef), determine the outcome of test_80
# If a vehicle has multiple entries for test_80, take the maximum
# (since NOK=1 overrides OK=0)
y = (
    df[df["test"] == label]
    .groupby("igef")["binary_test_result"]
    .max()
    .rename(label)
    .to_frame()
)

# 5) Build the feature matrix
# Pivot the table so that:
#   - each row = one vehicle
#   - each column = one test (excluding test_80)
#   - cell value = binary test result (0 or 1)
# Missing values are filled with 0, assuming the test was not failed
X = (
    df[df["test"] != label]
    .pivot_table(index="igef", columns="test", values="binary_test_result", aggfunc="max")
    .sort_index(axis=1)
    .fillna(0)
)

# 6) Combine features and target labels
# Keep only vehicles with a known test_80 result
data = X.join(y, how="inner").reset_index()

# 7) Split into training and test sets by vehicle
# Stratify by the label to maintain the same OK/NOK ratio in both sets
# Splitting by vehicle prevents data leakage (a vehicle appears only once)
igefs = data["igef"].unique()
igef_train, igef_test = train_test_split(
    igefs,
    test_size=test_size,
    stratify=data[label],
    random_state=42
)

train = data[data["igef"].isin(igef_train)]
test = data[data["igef"].isin(igef_test)]


In [6]:
# Initialize and train an AutoGluon TabularPredictor model
# label:        column to predict ("test_80")
# eval_metric:  use F1 score for evaluation (suitable for imbalanced OK/NOK data)
# train_data:   training dataset without the vehicle identifier
# presets:      predefined training configuration balancing speed and quality
# num_bag_folds: enables bagging for better generalization (cross-validation based)
# num_stack_levels: enables stacking of models for improved accuracy
# time_limit:   maximum training time in seconds (here: 30 minutes)
predictor = (
    TabularPredictor(label=label, eval_metric="f1")
    .fit(
        train_data=train.drop(columns=["igef"]),
        presets="medium_quality_faster_train",
        num_bag_folds=5,
        num_stack_levels=1,
        time_limit=1800
    )
)

No path specified. Models will be saved in: "AutogluonModels/ag-20251030_085810"
Preset alias specified: 'medium_quality_faster_train' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.12.12
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 24.6.0: Mon Jul 14 11:30:34 PDT 2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T8103
CPU Count:          8
Memory Avail:       5.35 GB / 16.00 GB (33.4%)
Disk Space Avail:   65.62 GB / 228.27 GB (28.7%)
Presets specified: ['medium_quality_faster_train']
Using hyperparameters preset: hyperparameters='default'
Beginning AutoGluon training ... Time limit = 1800s
AutoGluon will save models to "/Users/lpossner/Projects/databot/AutogluonModels/ag-20251030_085810"
Train Data Rows:    800
Train Data Columns: 80
Label Column:       test_80
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label value

In [7]:
# Evaluate model performance on the held-out test set

# 1) Generate a leaderboard of all trained models
#    Shows model names, validation scores, training times, and performance on the test set
leaderboard = predictor.leaderboard(test.drop(columns=["igef"]), silent=True)
print(leaderboard)

# 2) Generate predictions for the test set
#    Drop "igef" since it is only an identifier and not a predictive feature
y_true = test[label]
y_pred = predictor.predict(test.drop(columns=["igef"]))

# 3) Display classification metrics
#    Shows precision, recall, F1-score, and support for each class (OK/NOK)
print(classification_report(y_true, y_pred))

# 4) Display confusion matrix
#    Rows = true labels, Columns = predicted labels
#    Helps visualize false positives/negatives
print(confusion_matrix(y_true, y_pred))

# 5) Compute and display feature importance
#    Quantifies how much each test (feature) contributes to predicting test_80
features_importances = predictor.feature_importance(test.drop(columns=["igef"]))
print(features_importances.head(20))

Computing feature importance via permutation shuffling for 80 features using 200 rows with 5 shuffle sets...
	6.54s	= Expected runtime (1.31s per shuffle set)


                      model  score_test  score_val eval_metric  \
0           CatBoost_BAG_L1    1.000000   1.000000          f1   
1       WeightedEnsemble_L2    1.000000   1.000000          f1   
2    NeuralNetFastAI_BAG_L1    1.000000   1.000000          f1   
3           LightGBM_BAG_L2    1.000000   1.000000          f1   
4      LightGBMLarge_BAG_L2    1.000000   1.000000          f1   
5           CatBoost_BAG_L2    1.000000   1.000000          f1   
6            XGBoost_BAG_L2    1.000000   0.987952          f1   
7     NeuralNetTorch_BAG_L2    1.000000   1.000000          f1   
8    NeuralNetFastAI_BAG_L2    1.000000   1.000000          f1   
9   RandomForestGini_BAG_L2    1.000000   1.000000          f1   
10    ExtraTreesEntr_BAG_L2    1.000000   1.000000          f1   
11    ExtraTreesGini_BAG_L2    1.000000   1.000000          f1   
12  RandomForestEntr_BAG_L2    1.000000   1.000000          f1   
13      WeightedEnsemble_L3    1.000000   1.000000          f1   
14     Lig

	0.55s	= Actual runtime (Completed 5 of 5 shuffle sets)


         importance    stddev   p_value  n  p99_high   p99_low
test_2     0.360000  0.036515  0.000013  5  0.435185  0.284815
test_1     0.283072  0.091498  0.001146  5  0.471467  0.094677
test_4     0.214436  0.035653  0.000088  5  0.287846  0.141026
test_3     0.098195  0.045124  0.004122  5  0.191107  0.005284
test_61    0.000000  0.000000  0.500000  5  0.000000  0.000000
test_60    0.000000  0.000000  0.500000  5  0.000000  0.000000
test_6     0.000000  0.000000  0.500000  5  0.000000  0.000000
test_59    0.000000  0.000000  0.500000  5  0.000000  0.000000
test_58    0.000000  0.000000  0.500000  5  0.000000  0.000000
test_57    0.000000  0.000000  0.500000  5  0.000000  0.000000
test_56    0.000000  0.000000  0.500000  5  0.000000  0.000000
test_0     0.000000  0.000000  0.500000  5  0.000000  0.000000
test_55    0.000000  0.000000  0.500000  5  0.000000  0.000000
test_62    0.000000  0.000000  0.500000  5  0.000000  0.000000
test_53    0.000000  0.000000  0.500000  5  0.000000  0