In [61]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [62]:
from models.mida import Mida
from models.mice import Mice

import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from fancyimpute import KNN, SoftImpute

from preprocessing.data_loading import DataSet
from preprocessing.missing_value_generation import mcar_generator, mar_generator, mnar_generator
from models.imputation_wrapper import SingleImputationWrapper, MultiImputationWrapper
from analysis.evaluation import ClassificationEvaluation, LnormEvaluation, TimeEvaluation

In [63]:
dataset = DataSet('ionosphere', 'class')
amputations = (mcar_generator(), mar_generator(2), mnar_generator(2))

In [64]:
clf_evaluation = ClassificationEvaluation(KNeighborsClassifier(5), dataset)
lnorm_evaluation = LnormEvaluation(dataset)
time_evaluation = TimeEvaluation(dataset)

In [65]:
# result on complete dataset
clf_evaluation.evaluate_result([dataset.complete_data()], 0, 'None', 'None')
lnorm_evaluation.evaluate_result([dataset.complete_data()], 0, 'None', 'None')

In [66]:
imputations = [SingleImputationWrapper(KNN(3, verbose=False)), SingleImputationWrapper(SoftImpute(verbose=False)),
               MultiImputationWrapper(Mice(40, verbose=False)), MultiImputationWrapper(Mida(40, verbose=False))]

In [67]:
# different methods
for amputation in amputations:
    dataset.init_missing_data(amputation)
    
    # increasing rate p
    for p in [0.2, 0.4, 0.6, 0.8]:
        dataset.ampute_values(p)
        
        # apply each imputation
        for imputation in imputations:
            results, exec_time = imputation.complete(dataset.missing_data())
            clf_evaluation.evaluate_result(results, p, amputation.name(), imputation.name())
            lnorm_evaluation.evaluate_result(results, p, amputation.name(), imputation.name())
            time_evaluation.evaluate_result(exec_time, p, amputation.name(), imputation.name(), imputation.number())
            print(amputation.name() + ': ' + str(p) + ' ' + imputation.name() + ' done')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  result[c][rows] = np.nan


mcar: 0.2 KNN done




mcar: 0.2 SoftImpute done
mcar: 0.2 Mice done
mcar: 0.2 Mida done
mcar: 0.4 KNN done
mcar: 0.4 SoftImpute done
mcar: 0.4 Mice done
mcar: 0.4 Mida done
mcar: 0.6 KNN done
mcar: 0.6 SoftImpute done
mcar: 0.6 Mice done
mcar: 0.6 Mida done
mcar: 0.8 KNN done
mcar: 0.8 SoftImpute done
mcar: 0.8 Mice done
mcar: 0.8 Mida done


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  result[c][rows] = np.nan


mar: 0.2 KNN done
mar: 0.2 SoftImpute done
mar: 0.2 Mice done
mar: 0.2 Mida done
mar: 0.4 KNN done
mar: 0.4 SoftImpute done
mar: 0.4 Mice done
mar: 0.4 Mida done
mar: 0.6 KNN done
mar: 0.6 SoftImpute done
mar: 0.6 Mice done
mar: 0.6 Mida done
mar: 0.8 KNN done
mar: 0.8 SoftImpute done
mar: 0.8 Mice done
mar: 0.8 Mida done


  return (a - mns) / sstd
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  result[c][rows] = np.nan


mnar: 0.2 KNN done
mnar: 0.2 SoftImpute done
mnar: 0.2 Mice done
mnar: 0.2 Mida done
mnar: 0.4 KNN done
mnar: 0.4 SoftImpute done
mnar: 0.4 Mice done
mnar: 0.4 Mida done
mnar: 0.6 KNN done
mnar: 0.6 SoftImpute done
mnar: 0.6 Mice done
mnar: 0.6 Mida done
mnar: 0.8 KNN done
mnar: 0.8 SoftImpute done
mnar: 0.8 Mice done
mnar: 0.8 Mida done


In [68]:
clf_evaluation.evaluation_results
clf_evaluation.dump_results()

In [69]:
lnorm_evaluation.evaluation_results
lnorm_evaluation.dump_results()

In [70]:
time_evaluation.evaluation_results
time_evaluation.dump_results()

In [71]:
clf_evaluation.evaluation_results

Unnamed: 0,"(settings, p)","(settings, mechanism)","(settings, imputation)","(settings, number)","(accuracy, post)","(accuracy, prev)","(f1-score, post)","(f1-score, prev)","(matthews, post)","(matthews, prev)"
0,0,,,1.0,0.804472,0.804472,0.865077,0.865077,0.586464,0.586464
1,0.2,mcar,KNN,1.0,0.806098,0.806098,0.865291,0.865291,0.587230,0.587230
2,0.2,mcar,SoftImpute,1.0,0.795528,0.795528,0.859076,0.859076,0.563824,0.563824
3,0.2,mcar,Mice,1.0,0.787805,0.787805,0.854779,0.854779,0.547339,0.547339
4,0.2,mcar,Mice,5.0,0.791463,0.796341,0.856883,0.859616,0.555155,0.566559
5,0.2,mcar,Mice,10.0,0.797967,0.797561,0.860667,0.860525,0.570424,0.569956
6,0.2,mcar,Mice,15.0,0.792683,0.797967,0.857708,0.860817,0.558778,0.570832
7,0.2,mcar,Mice,20.0,0.797561,0.798374,0.860350,0.860885,0.569928,0.571353
8,0.2,mcar,Mice,25.0,0.795122,0.798374,0.859008,0.860818,0.565210,0.571203
9,0.2,mcar,Mice,30.0,0.799187,0.797967,0.861234,0.860464,0.572911,0.570249
