In [1]:
from jenga.tasks.openml import OpenMLRegressionTask, OpenMLBinaryClassificationTask, OpenMLMultiClassClassificationTask
from jenga.corruptions.generic import MissingValues
from jenga.evaluation.corruption_impact import CorruptionImpactEvaluator

import numpy as np

## Some Helper Functions

In [2]:
num_repetitions = 10

def print_result(results, metric):
    print(f"""
Score ({metric}) on 
  clean data:     {results[0].baseline_score}
  corrupted data: {np.mean(results[0].corrupted_scores)}
"""
    )

## Binary Classification

In [3]:
binary_task = OpenMLBinaryClassificationTask(1471)

The baseline model is internally fitted on the tasks train data.

In [4]:
binary_task_model = binary_task.fit_baseline_model()

print(f"Baseline ROC/AUC score: {binary_task.get_baseline_performance()}")

Baseline ROC/AUC score: 0.5909610347992149


Insert some corruptions and measure their impact.

In [5]:
binary_task_evaluator = CorruptionImpactEvaluator(binary_task)

binary_task_corruption = MissingValues(column='V3', fraction=0.5, na_value=np.nan)
binary_task_results = binary_task_evaluator.evaluate(binary_task_model, num_repetitions, binary_task_corruption)

0/10 (0.072241)


In [6]:
print_result(binary_task_results, "ROC/AUC")


Score (ROC/AUC) on 
  clean data:     0.5909610347992149
  corrupted data: 0.5853965450582252



## Mutli-Class Classification

In [7]:
multi_class_task = OpenMLMultiClassClassificationTask(26)

The baseline model is internally fitted on the tasks train data.

In [8]:
multi_class_task_model = multi_class_task.fit_baseline_model()

print(f"Baseline F1 score: {multi_class_task.get_baseline_performance()}")



Baseline F1 score: 0.7461810988525959


Insert some corruptions and measure their impact.

In [9]:
multi_class_task_evaluator = CorruptionImpactEvaluator(multi_class_task)

multi_class_task_corruption = MissingValues(column='parents', fraction=0.4, na_value=np.nan)
multi_class_task_results = multi_class_task_evaluator.evaluate(multi_class_task_model, num_repetitions, multi_class_task_corruption)

0/10 (0.019579999999999487)


In [10]:
print_result(multi_class_task_results, "F1")


Score (F1) on 
  clean data:     0.7461810988525959
  corrupted data: 0.6857573065971131



## Regression

In [11]:
regression = OpenMLRegressionTask(42545)

The baseline model is internally fitted on the tasks train data.

In [12]:
regression_model = regression.fit_baseline_model()

print(f"Baseline MSE score: {regression.get_baseline_performance()}")

Baseline MSE score: 994.9218977535381


Insert some corruptions and measure their impact.

In [13]:
regression_evaluator = CorruptionImpactEvaluator(regression)

regression_corruption = MissingValues(column='Material', fraction=0.3, na_value=np.nan)
regression_results = regression_evaluator.evaluate(regression_model, num_repetitions, regression_corruption)

0/10 (0.008453999999999517)


In [14]:
print_result(regression_results, "MSE")


Score (MSE) on 
  clean data:     994.9218977535381
  corrupted data: 1106.604385314484

