In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier

import wandb

In [8]:
datasets = {}
train_size = 0.9

for i in tqdm(['a', 'b', 'c']):
    data = pd.read_excel(f"./data/trainingdata_{i}.xls")
    x = data[['x_i1', 'x_i2']].to_numpy()
    y = data['l_i'].to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=train_size)
    datasets[i] = {
        'train': (X_train, y_train),
        'test': (X_test, y_test),
    }

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




In [3]:
def compute_metrics(labels, preds):
    precision, recall, f1, support = precision_recall_fscore_support(labels, preds, average='weighted', labels=np.unique(preds))
    acc = accuracy_score(labels, preds)

    return {
        'acc': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'support': support
    }

In [17]:
classifiers = {
    'GaussianNB': GaussianNB(),
    'LR': LogisticRegression(),
    'SVM (linear)': SVC(kernel='linear'),
    'SVM (RBF)': SVC(kernel='rbf'),
    'GradBoost': GradientBoostingClassifier()
}

for ds_name, data in tqdm(datasets.items()):
    for clf_name, classifier in tqdm(classifiers.items()):
        wandb.init(project='dependable-classification', entity='implication-elimination', config=None)
        wandb.config.model = clf_name
        wandb.config.dataset = ds_name

        classifier.fit(data['train'][0], data['train'][1])

        train_metrics = compute_metrics(data['train'][1], classifier.predict(data['train'][0]))
        test_metrics = compute_metrics(data['test'][1], classifier.predict(data['test'][0]))

        train_metrics = {f"train_{k}": v for k, v in train_metrics.items()} 
        test_metrics = {f"test_{k}": v for k, v in test_metrics.items()}

        wandb.log(train_metrics | test_metrics)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.90351
train_f1,0.94931
train_precision,0.90351
train_recall,1.0
test_acc,0.9094
test_f1,0.95255
test_precision,0.9094
test_recall,1.0
_runtime,95.0
_timestamp,1618056062.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.77222
train_f1,0.73263
train_precision,0.81031
train_recall,0.77222
test_acc,0.75
test_f1,0.7086
test_precision,0.76989
test_recall,0.75
_runtime,4.0
_timestamp,1618057029.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.67444
train_f1,0.80557
train_precision,0.67444
train_recall,1.0
test_acc,0.67
test_f1,0.8024
test_precision,0.67
test_recall,1.0
_runtime,3.0
_timestamp,1618057036.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.67444
train_f1,0.80557
train_precision,0.67444
train_recall,1.0
test_acc,0.67
test_f1,0.8024
test_precision,0.67
test_recall,1.0
_runtime,2.0
_timestamp,1618057042.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.87333
train_f1,0.86977
train_precision,0.87318
train_recall,0.87333
test_acc,0.84
test_f1,0.8288
test_precision,0.84984
test_recall,0.84
_runtime,2.0
_timestamp,1618057048.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁





HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.96
train_f1,0.95953
train_precision,0.96067
train_recall,0.96
test_acc,0.92
test_f1,0.91771
test_precision,0.92353
test_recall,0.92
_runtime,2.0
_timestamp,1618057054.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.84556
train_f1,0.84556
train_precision,0.84558
train_recall,0.84556
test_acc,0.876
test_f1,0.87576
test_precision,0.87835
test_recall,0.876
_runtime,2.0
_timestamp,1618057060.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.846
train_f1,0.84601
train_precision,0.84601
train_recall,0.846
test_acc,0.878
test_f1,0.87759
test_precision,0.88236
test_recall,0.878
_runtime,3.0
_timestamp,1618057068.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.84533
train_f1,0.84536
train_precision,0.84541
train_recall,0.84533
test_acc,0.878
test_f1,0.8777
test_precision,0.88114
test_recall,0.878
_runtime,4.0
_timestamp,1618057076.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.88689
train_f1,0.88686
train_precision,0.88686
train_recall,0.88689
test_acc,0.904
test_f1,0.90382
test_precision,0.90654
test_recall,0.904
_runtime,3.0
_timestamp,1618057082.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁





HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.99622
train_f1,0.99622
train_precision,0.99622
train_recall,0.99622
test_acc,0.984
test_f1,0.984
test_precision,0.98428
test_recall,0.984
_runtime,3.0
_timestamp,1618057088.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.90378
train_f1,0.94946
train_precision,0.90378
train_recall,1.0
test_acc,0.907
test_f1,0.95123
test_precision,0.907
test_recall,1.0
_runtime,1.0
_timestamp,1618057093.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.90378
train_f1,0.94946
train_precision,0.90378
train_recall,1.0
test_acc,0.907
test_f1,0.95123
test_precision,0.907
test_recall,1.0
_runtime,2.0
_timestamp,1618057100.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.90378
train_f1,0.94946
train_precision,0.90378
train_recall,1.0
test_acc,0.907
test_f1,0.95123
test_precision,0.907
test_recall,1.0
_runtime,19.0
_timestamp,1618057123.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.90378
train_f1,0.94946
train_precision,0.90378
train_recall,1.0
test_acc,0.907
test_f1,0.95123
test_precision,0.907
test_recall,1.0
_runtime,83.0
_timestamp,1618057209.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁




