In [94]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [95]:
from models.mida import Mida
from models.mice import Mice

import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from fancyimpute import KNN, SoftImpute

from preprocessing.data_loading import DataSet
from preprocessing.missing_value_generation import mcar_generator, mar_generator, mnar_generator
from models.imputation_wrapper import SingleImputationWrapper, MultiImputationWrapper
from analysis.evaluation import OutlierEvaluation, LnormEvaluation, TimeEvaluation
from analysis.method_wrapper import OutlierMethodWrapper

In [96]:
dataset = DataSet('ctg', 'outlier')
amputation = mnar_generator(2)
dataset.init_missing_data(amputation)

In [97]:
out_evaluation = OutlierEvaluation(OutlierMethodWrapper(LocalOutlierFactor()), dataset)
lnorm_evaluation = LnormEvaluation(dataset)
time_evaluation = TimeEvaluation()

In [98]:
# result on complete dataset
out_evaluation.evaluate_result([dataset.complete_data()], 0, 'None', 'None')
lnorm_evaluation.evaluate_result([dataset.complete_data()], 0, 'None', 'None')

In [99]:
imputations = [SingleImputationWrapper(KNN(3, verbose=False)), SingleImputationWrapper(SoftImpute(verbose=False)),
               MultiImputationWrapper(Mice(40, verbose=False))]

In [100]:
#increase missing data rate
for p in [0.2, 0.4, 0.6]:
    dataset.ampute_values(p)
    
    # apply each imputation
    for imputation in imputations:
        results, exec_time = imputation.complete(dataset.missing_data())
        out_evaluation.evaluate_result(results, p, amputation.name(), imputation.name())
        lnorm_evaluation.evaluate_result(results, p, amputation.name(), imputation.name())
        time_evaluation.evaluate_result(exec_time, p, amputation.name(), imputation.name(), imputation.number())

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  result[c][rows] = np.nan


In [101]:
out_evaluation.evaluation_results

Unnamed: 0,"(settings, p)","(settings, mechanism)","(settings, imputation)","(settings, number)","(auc, post)","(auc, prev)"
0,0.0,,,1,0.36731,0.36731
1,0.2,mnar,KNN,1,0.37936,0.37936
2,0.2,mnar,SoftImpute,1,0.378787,0.378787
3,0.2,mnar,Mice,1,0.367698,0.367698
4,0.2,mnar,Mice,5,0.386607,0.371481
5,0.2,mnar,Mice,10,0.395101,0.372175
6,0.2,mnar,Mice,15,0.399976,0.371127
7,0.2,mnar,Mice,20,0.401771,0.371265
8,0.2,mnar,Mice,25,0.403986,0.370568
9,0.2,mnar,Mice,30,0.406135,0.371182


In [102]:
lnorm_evaluation.evaluation_results

Unnamed: 0,"(settings, p)","(settings, mechanism)","(settings, imputation)","(settings, number)","(L1, post)","(L1, prev)","(L2, post)","(L2, prev)"
0,0.0,,,1,0.0,0.0,0.0,0.0
1,0.2,mnar,KNN,1,858.56703,858.56703,14.47802,14.47802
2,0.2,mnar,SoftImpute,1,777.316982,777.316982,16.199553,16.199553
3,0.2,mnar,Mice,1,1705.299077,1705.299077,24.12868,24.12868
4,0.2,mnar,Mice,5,1736.44867,1392.708673,24.061484,22.363233
5,0.2,mnar,Mice,10,1725.485014,2217.4317,24.088839,31.428351
6,0.2,mnar,Mice,15,1713.539592,2202.095312,23.916414,30.942531
7,0.2,mnar,Mice,20,1718.037682,2429.735689,24.042368,34.374232
8,0.2,mnar,Mice,25,1723.432975,2432.436177,24.120164,34.531561
9,0.2,mnar,Mice,30,1705.02024,2530.395027,23.821233,36.415818


In [104]:
time_evaluation.evaluation_results

Unnamed: 0,p,mechanism,imputation,number,runtime
0,0.2,mnar,KNN,1,1.010353
1,0.2,mnar,SoftImpute,1,0.162035
2,0.2,mnar,Mice,40,2.489279
3,0.4,mnar,KNN,1,1.142491
4,0.4,mnar,SoftImpute,1,0.291798
5,0.4,mnar,Mice,40,2.932199
6,0.6,mnar,KNN,1,1.16161
7,0.6,mnar,SoftImpute,1,0.246941
8,0.6,mnar,Mice,40,2.71885
