In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

import wandb

In [5]:
datasets = {}
train_size = 0.9

for i in tqdm(['a', 'b', 'c']):
    data = pd.read_excel(f"./data/trainingdata_{i}.xls")
    x = data[['x_i1', 'x_i2']].to_numpy()
    y = data['l_i'].to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=train_size)
    datasets[i] = {
        'train': (X_train, y_train),
        'test': (X_test, y_test),
    }

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




In [6]:
def compute_metrics(labels, preds):
    precision, recall, f1, support = precision_recall_fscore_support(labels, preds, average='weighted', labels=np.unique(preds))
    acc = accuracy_score(labels, preds)

    return {
        'acc': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'support': support
    }

In [8]:
classifiers = {
    'LR': LogisticRegression(),
    'SVM': SVC()
}

for ds_name, data in tqdm(datasets.items()):
    for clf_name, classifier in tqdm(classifiers.items()):
        wandb.init(project='dependable-classification', entity='implication-elimination', config=None)
        wandb.config.model = clf_name
        wandb.config.dataset = ds_name

        classifier.fit(data['train'][0], data['train'][1])

        train_metrics = compute_metrics(data['train'][1], classifier.predict(data['train'][0]))
        test_metrics = compute_metrics(data['test'][1], classifier.predict(data['test'][0]))

        train_metrics = {f"train_{k}": v for k, v in train_metrics.items()} 
        test_metrics = {f"test_{k}": v for k, v in test_metrics.items()}

        wandb.log(train_metrics | test_metrics)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdeutschmann[0m (use `wandb login --relogin` to force relogin)


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.66444
train_f1,0.53227
train_precision,0.44395
train_recall,0.66444
test_acc,0.74
test_f1,0.85057
test_precision,0.74
test_recall,1.0
_runtime,2.0
_timestamp,1618042621.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁





HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.87111
train_f1,0.86751
train_precision,0.87135
train_recall,0.87111
test_acc,0.86
test_f1,0.85601
test_precision,0.85552
test_recall,0.86
_runtime,2.0
_timestamp,1618042627.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.84511
train_f1,0.84514
train_precision,0.84521
train_recall,0.84511
test_acc,0.856
test_f1,0.85619
test_precision,0.85737
test_recall,0.856
_runtime,1.0
_timestamp,1618042632.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁





HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.88711
train_f1,0.88711
train_precision,0.8871
train_recall,0.88711
test_acc,0.902
test_f1,0.90209
test_precision,0.90254
test_recall,0.902
_runtime,3.0
_timestamp,1618042639.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
train_acc,0.90442
train_f1,0.94981
train_precision,0.90442
train_recall,1.0
test_acc,0.9012
test_f1,0.94803
test_precision,0.9012
test_recall,1.0
_runtime,3.0
_timestamp,1618042645.0


0,1
train_acc,▁
train_f1,▁
train_precision,▁
train_recall,▁
test_acc,▁
test_f1,▁
test_precision,▁
test_recall,▁
_runtime,▁
_timestamp,▁




