In [1]:
from jenga.tasks.openml import OpenMLBinaryClassificationTask, OpenMLMultiClassClassificationTask, OpenMLRegressionTask
from jenga.corruptions.generic import MissingValues
    
import pandas as pd
import numpy as np

from data_imputation_paper.imputation.simple import ModeImputer
from data_imputation_paper.imputation.ml import KNNImputer, ForestImputer
from data_imputation_paper.imputation.dl import AutoKerasImputer
from data_imputation_paper.imputation.generative import GAINImputer, VAEImputer
from data_imputation_paper.evaluation import Evaluator

from tensorflow.compat.v1 import logging as tf_logging

In [2]:
tf_logging.set_verbosity(tf_logging.ERROR)

## Make Deterministic

In [3]:
seed = 42

## Create example tasks

In [4]:
task = OpenMLMultiClassClassificationTask(openml_id=4552, seed=seed)

## Insert missing values using jenga

In [5]:
missing_values = [
    MissingValues(column='V2', fraction=0.5, na_value=np.nan, missingness='MCAR'),
    MissingValues(column='V4', fraction=0.5, na_value=np.nan, missingness='MCAR'),
    MissingValues(column='V15', fraction=0.5, na_value=np.nan, missingness='MCAR')
]

## Mode Imputation

In [6]:
arguments = {
    "seed": seed
}

%time Evaluator(task, missing_values, ModeImputer, arguments).evaluate(3).report()

Evaluation result contains 3 target columns: V2, V4, V15
All are in a round-robin fashion imputed and performances are as follows:

Target Column: V2
            train         test
MAE     29.624812    29.482594
MSE   1363.309159  1368.946568
RMSE    36.922279    36.994064

             score metric
baseline  0.375149     F1
imputed   0.363458     F1


Target Column: V4
                train      test
F1_micro     0.833039  0.851590
F1_macro     0.454457  0.459903
F1_weighted  0.757165  0.783374

             score metric
baseline  0.375149     F1
imputed   0.351790     F1


Target Column: V15
                train      test
F1_micro     0.110474  0.125442
F1_macro     0.012435  0.013919
F1_weighted  0.021986  0.028187

             score metric
baseline  0.375149     F1
imputed   0.379695     F1


CPU times: user 21.2 s, sys: 360 ms, total: 21.6 s
Wall time: 22.7 s


## KNN imputation

In [7]:
arguments = {
    "seed": seed,
    "hyperparameter_grid_categorical_imputer": {
        "n_neighbors": [3, 5]
    },
    "hyperparameter_grid_numerical_imputer": {
        "n_neighbors": [3, 5]
    }
}

%time Evaluator(task, missing_values, KNNImputer, arguments).evaluate(3).report()

Evaluation result contains 3 target columns: V2, V4, V15
All are in a round-robin fashion imputed and performances are as follows:

Target Column: V2
            train         test
MAE     30.291350    29.711896
MSE   1442.802407  1407.607303
RMSE    37.983067    37.506083

             score metric
baseline  0.375149     F1
imputed   0.353009     F1


Target Column: V4
                train      test
F1_micro     0.934392  0.935807
F1_macro     0.877264  0.869435
F1_weighted  0.933002  0.934965

             score metric
baseline  0.375149     F1
imputed   0.336378     F1


Target Column: V15
                train      test
F1_micro     0.518682  0.518257
F1_macro     0.424199  0.429181
F1_weighted  0.513344  0.512861

             score metric
baseline  0.375149     F1
imputed   0.378072     F1


CPU times: user 23.1 s, sys: 1.86 s, total: 24.9 s
Wall time: 24.3 s


## Forest imputation

In [8]:
arguments = {
    "seed": seed,
    "hyperparameter_grid_categorical_imputer": {
        "n_estimators": [50, 100]
    },
    "hyperparameter_grid_numerical_imputer": {
        "n_estimators": [50, 100]
    }
}

%time Evaluator(task, missing_values, ForestImputer, arguments).evaluate(3).report()

Evaluation result contains 3 target columns: V2, V4, V15
All are in a round-robin fashion imputed and performances are as follows:

Target Column: V2
            train         test
MAE     28.035203    28.014954
MSE   1246.471307  1288.985086
RMSE    35.304270    35.853690

             score metric
baseline  0.375149     F1
imputed   0.368868     F1


Target Column: V4
                train      test
F1_micro     0.949838  0.957008
F1_macro     0.906107  0.918848
F1_weighted  0.948601  0.955856

             score metric
baseline  0.375149     F1
imputed   0.370004     F1


Target Column: V15
                train      test
F1_micro     0.563989  0.557715
F1_macro     0.496052  0.498791
F1_weighted  0.560301  0.554790

             score metric
baseline  0.375149     F1
imputed   0.354385     F1


CPU times: user 41.6 s, sys: 879 ms, total: 42.5 s
Wall time: 39.6 s


## AutoKeras imputation

In [9]:
arguments = {
    "seed": seed,
    'max_trials': 2,
    'tuner': 'greedy',
    'validation_split': 0.1,
    'epochs': 2
}

%time Evaluator(task, missing_values, AutoKerasImputer, arguments).evaluate(3).report()

Trial 2 Complete [00h 00m 01s]
val_accuracy: 0.018099548295140266

Best val_accuracy So Far: 0.06334841996431351
Total elapsed time: 00h 00m 04s
Epoch 1/2
Epoch 2/2
Evaluation result contains 3 target columns: V2, V4, V15
All are in a round-robin fashion imputed and performances are as follows:

Target Column: V2
            train         test
MAE     28.337478    28.169789
MSE   1235.516494  1239.920704
RMSE    35.139482    35.199894

             score metric
baseline  0.375149     F1
imputed   0.334275     F1


Target Column: V4
                train      test
F1_micro     0.593998  0.617197
F1_macro     0.372923  0.392269
F1_weighted  0.520305  0.550621

             score metric
baseline  0.375149     F1
imputed   0.347820     F1


Target Column: V15
                train      test
F1_micro     0.123419  0.136042
F1_macro     0.047064  0.047856
F1_weighted  0.080608  0.086918

             score metric
baseline  0.375149     F1
imputed   0.350456     F1


CPU times: user 1min 47s,

## GAIN imputation

In [10]:
arguments = {
    "seed": seed,
    "hyperparameter_grid": {
        "gain": {
            "alpha": [80, 120],
            "hint_rate": [0.5, 0.9],
            "noise": [0.001, 0.1]
        },
        "training": {
            "batch_size": [64, 256],
            "epochs": [5, 15]
        }
    }
}

%time Evaluator(task, missing_values, GAINImputer, arguments).evaluate(3).report()

[33m[W 2021-03-10 14:56:01,320][0m Trial 0 failed, because the objective function returned nan.[0m
[33m[W 2021-03-10 14:59:25,885][0m Trial 8 failed, because the objective function returned nan.[0m
[33m[W 2021-03-10 14:59:55,813][0m Trial 23 failed, because the objective function returned nan.[0m
[33m[W 2021-03-10 15:02:06,940][0m Trial 25 failed, because the objective function returned nan.[0m


Evaluation result contains 3 target columns: V2, V4, V15
All are in a round-robin fashion imputed and performances are as follows:

Target Column: V2
            train         test
MAE     30.993606    31.073061
MSE   1567.326846  1553.833452
RMSE    39.585823    39.411223

             score metric
baseline  0.375149     F1
imputed   0.356433     F1


Target Column: V4
                train      test
F1_micro     0.875993  0.887515
F1_macro     0.693090  0.688986
F1_weighted  0.847361  0.858961

             score metric
baseline  0.375149     F1
imputed   0.354751     F1


Target Column: V15
                train      test
F1_micro     0.063254  0.059482
F1_macro     0.035009  0.030950
F1_weighted  0.045970  0.043272

             score metric
baseline  0.375149     F1
imputed   0.356033     F1


CPU times: user 12min 40s, sys: 2min 20s, total: 15min
Wall time: 9min 33s


## VAE imputation

In [11]:
arguments = {
    "seed": seed,
    "hyperparameter_grid": {
        "training": {
            "batch_size": [64, 256],
            "epochs": [5, 15]
        },
        "optimizer": {
            "learning_rate": [0.0005],
            "beta_1": [0.9],
            "beta_2": [0.999],
            "epsilon": [1e-7],
            "amsgrad": [False]
        },
        "neural_architecture": {
            "latent_dim_rel_size": [0.1],
            "n_layers": [1, 2],
            "layer_1_rel_size": [0.5],
            "layer_2_rel_size": [0.25],
        },
    }
}

%time Evaluator(task, missing_values, VAEImputer, arguments).evaluate(3).report()

Evaluation result contains 3 target columns: V2, V4, V15
All are in a round-robin fashion imputed and performances are as follows:

Target Column: V2
            train         test
MAE     35.230799    36.170749
MSE   2238.914073  2323.350147
RMSE    47.302482    48.165597

             score metric
baseline  0.375149     F1
imputed   0.359628     F1


Target Column: V4
                train      test
F1_micro     0.829214  0.830389
F1_macro     0.453314  0.453633
F1_weighted  0.751802  0.753511

             score metric
baseline  0.375149     F1
imputed   0.363702     F1


Target Column: V15
                train      test
F1_micro     0.044278  0.045347
F1_macro     0.012854  0.012906
F1_weighted  0.013700  0.015279

             score metric
baseline  0.375149     F1
imputed   0.358817     F1


CPU times: user 2min 27s, sys: 8.77 s, total: 2min 36s
Wall time: 2min 7s
