# Example Book - ALL AML

In [1]:
import warnings
warnings.filterwarnings('ignore')
    
import os
import traceback
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
pd.set_option('display.max_rows', 1000)
import numpy as np 
%matplotlib inline
import matplotlib.pyplot as plt

from filter_methods.methods.cfs import cfs

from wrapper_methods.jackstraw.jack import jack_sel
from wrapper_methods.boruta.boruta import boruta_sel

from embedded_methods.methods.lasso import lasso_sel
from embedded_methods.methods.elastic_net import elastic_net_sel
from embedded_methods.methods.stability_selection import stability_selection
from embedded_methods.methods.hsic_lasso import hsic_sel

import weka.core.jvm as jvm
from weka.core.converters import Loader, Instances



In [2]:
# names = ['breast_cancer', 'Carcinom', 'chin', 'quality_control', 'CLL_SUB_111', 'SRBCT', 'Lymphoma', 'GLIOMA', 'CNS', 
# 'colon', 'Data_Cortex_Nuclear','Leukemia', 'LSVT', 'Prostate_GE', 'lung', 'MLL', 'prostate_cancer', 'ovarian',
# 'SMK_CAN_187', 'TOX_171', 'GLI_85']

names = ['breast_cancer']

In [3]:
try:
    jvm.start()
    results = []
    results2 = []

    for name in names:
        train_csv = '../data/' + name + '_clean.csv'
        
        length = []
        # load csv
        df = pd.read_csv(train_csv)

        # X,y
        class_ind = df.columns[len(df.columns) - 1]
        df['class'] = pd.factorize(df['class'])[0] + 1

        features = df.drop(class_ind, axis=1)
        feature_names = np.array(features.columns.values)

        target = df[class_ind]
        
        # START FOR WEKA
        loader = Loader("weka.core.converters.CSVLoader")
        data = loader.load_file(train_csv)

        # define class attribute
        data.class_is_last()

        # attribute selection
        filter_data = Instances.copy_instances(data)
        
        # END FOR WEKA
                    
        print('Start %s' % name)
        length.append(('cfs', len(cfs(filter_data))))
        length.append(('jack', len(jack_sel(features))))
        length.append(('boruta', len(boruta_sel(features, target))))
        length.append(('lasso', len(lasso_sel(features, target))))
        length.append(('EN', len(elastic_net_sel(features, target))))
        length.append(('SS', len(stability_selection(features, target))))
        print('End %s' % name)

        results.append((name, length))
        results2.append((name, min([x[1] for x in length])))

    print(results)
    print(results2)
    
except Exception as e:
    print(traceback.format_exc())
finally:
    jvm.stop()

DEBUG:weka.core.jvm:Adding bundled jars
DEBUG:weka.core.jvm:Classpath=['c:\\users\\linker\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\javabridge\\jars\\rhino-1.7R4.jar', 'c:\\users\\linker\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\javabridge\\jars\\runnablequeue.jar', 'c:\\users\\linker\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\javabridge\\jars\\cpython.jar', 'c:\\users\\linker\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\weka\\lib\\python-weka-wrapper.jar', 'c:\\users\\linker\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\weka\\lib\\weka.jar']
DEBUG:weka.core.jvm:MaxHeapSize=default
DEBUG:weka.core.jvm:Package support disabled


Start breast_cancer
Iteration: 	1 / 50
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	2 / 50
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	3 / 50
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	4 / 50
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	5 / 50
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	6 / 50
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	7 / 50
Confirmed: 	0
Tentative: 	30
Rejected: 	0
Iteration: 	8 / 50
Confirmed: 	21
Tentative: 	9
Rejected: 	0
Iteration: 	9 / 50
Confirmed: 	21
Tentative: 	7
Rejected: 	2
Iteration: 	10 / 50
Confirmed: 	21
Tentative: 	7
Rejected: 	2
Iteration: 	11 / 50
Confirmed: 	21
Tentative: 	7
Rejected: 	2
Iteration: 	12 / 50
Confirmed: 	22
Tentative: 	6
Rejected: 	2
Iteration: 	13 / 50
Confirmed: 	22
Tentative: 	6
Rejected: 	2
Iteration: 	14 / 50
Confirmed: 	22
Tentative: 	6
Rejected: 	2
Iteration: 	15 / 50
Confirmed: 	22
Tentative: 	6
Rejected: 	2
Iteration: 	16 / 50
Confirmed: 	22
Tentative: 	6
Rejected: 

LAS_Alpha= 0.007471821366404614
EN_Alpha= 0.0006566861506961144
EN_L1 ratio= 0.3
End Carcinom
Start chin
Iteration: 	1 / 50
Confirmed: 	0
Tentative: 	22215
Rejected: 	0
Iteration: 	2 / 50
Confirmed: 	0
Tentative: 	22215
Rejected: 	0
Iteration: 	3 / 50
Confirmed: 	0
Tentative: 	22215
Rejected: 	0
Iteration: 	4 / 50
Confirmed: 	0
Tentative: 	22215
Rejected: 	0
Iteration: 	5 / 50
Confirmed: 	0
Tentative: 	22215
Rejected: 	0
Iteration: 	6 / 50
Confirmed: 	0
Tentative: 	22215
Rejected: 	0
Iteration: 	7 / 50
Confirmed: 	0
Tentative: 	22215
Rejected: 	0
Iteration: 	8 / 50
Confirmed: 	0
Tentative: 	965
Rejected: 	21250
Iteration: 	9 / 50
Confirmed: 	317
Tentative: 	648
Rejected: 	21250
Iteration: 	10 / 50
Confirmed: 	317
Tentative: 	648
Rejected: 	21250
Iteration: 	11 / 50
Confirmed: 	317
Tentative: 	648
Rejected: 	21250
Iteration: 	12 / 50
Confirmed: 	351
Tentative: 	540
Rejected: 	21324
Iteration: 	13 / 50
Confirmed: 	351
Tentative: 	540
Rejected: 	21324
Iteration: 	14 / 50
Confirmed: 	351
T

Iteration: 	24 / 50
Confirmed: 	23
Tentative: 	13
Rejected: 	26
Iteration: 	25 / 50
Confirmed: 	23
Tentative: 	13
Rejected: 	26
Iteration: 	26 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	27 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	28 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	29 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	30 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	31 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	32 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	33 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	34 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	35 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	36 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	37 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	38 / 50
Confirmed: 	23
Tentative: 	12
Rejected: 	27
Iteration: 	39 / 50
Confirmed: 	23
Tenta

Iteration: 	32 / 50
Confirmed: 	142
Tentative: 	114
Rejected: 	2052
Iteration: 	33 / 50
Confirmed: 	142
Tentative: 	113
Rejected: 	2053
Iteration: 	34 / 50
Confirmed: 	144
Tentative: 	111
Rejected: 	2053
Iteration: 	35 / 50
Confirmed: 	144
Tentative: 	111
Rejected: 	2053
Iteration: 	36 / 50
Confirmed: 	144
Tentative: 	110
Rejected: 	2054
Iteration: 	37 / 50
Confirmed: 	148
Tentative: 	104
Rejected: 	2056
Iteration: 	38 / 50
Confirmed: 	148
Tentative: 	104
Rejected: 	2056
Iteration: 	39 / 50
Confirmed: 	148
Tentative: 	104
Rejected: 	2056
Iteration: 	40 / 50
Confirmed: 	150
Tentative: 	102
Rejected: 	2056
Iteration: 	41 / 50
Confirmed: 	150
Tentative: 	102
Rejected: 	2056
Iteration: 	42 / 50
Confirmed: 	150
Tentative: 	102
Rejected: 	2056
Iteration: 	43 / 50
Confirmed: 	152
Tentative: 	100
Rejected: 	2056
Iteration: 	44 / 50
Confirmed: 	152
Tentative: 	100
Rejected: 	2056
Iteration: 	45 / 50
Confirmed: 	152
Tentative: 	98
Rejected: 	2058
Iteration: 	46 / 50
Confirmed: 	152
Tentative: 	9

LAS_Alpha= 0.0006711655600916824
EN_Alpha= 0.0005267038650384707
EN_L1 ratio= 0.1
End Lymphoma
Start GLIOMA
Iteration: 	1 / 50
Confirmed: 	0
Tentative: 	4434
Rejected: 	0
Iteration: 	2 / 50
Confirmed: 	0
Tentative: 	4434
Rejected: 	0
Iteration: 	3 / 50
Confirmed: 	0
Tentative: 	4434
Rejected: 	0
Iteration: 	4 / 50
Confirmed: 	0
Tentative: 	4434
Rejected: 	0
Iteration: 	5 / 50
Confirmed: 	0
Tentative: 	4434
Rejected: 	0
Iteration: 	6 / 50
Confirmed: 	0
Tentative: 	4434
Rejected: 	0
Iteration: 	7 / 50
Confirmed: 	0
Tentative: 	4434
Rejected: 	0
Iteration: 	8 / 50
Confirmed: 	0
Tentative: 	694
Rejected: 	3740
Iteration: 	9 / 50
Confirmed: 	210
Tentative: 	484
Rejected: 	3740
Iteration: 	10 / 50
Confirmed: 	210
Tentative: 	484
Rejected: 	3740
Iteration: 	11 / 50
Confirmed: 	210
Tentative: 	484
Rejected: 	3740
Iteration: 	12 / 50
Confirmed: 	227
Tentative: 	368
Rejected: 	3839
Iteration: 	13 / 50
Confirmed: 	227
Tentative: 	368
Rejected: 	3839
Iteration: 	14 / 50
Confirmed: 	227
Tentative: 

Iteration: 	18 / 50
Confirmed: 	11
Tentative: 	11
Rejected: 	1978
Iteration: 	19 / 50
Confirmed: 	13
Tentative: 	9
Rejected: 	1978
Iteration: 	20 / 50
Confirmed: 	13
Tentative: 	9
Rejected: 	1978
Iteration: 	21 / 50
Confirmed: 	13
Tentative: 	9
Rejected: 	1978
Iteration: 	22 / 50
Confirmed: 	14
Tentative: 	8
Rejected: 	1978
Iteration: 	23 / 50
Confirmed: 	14
Tentative: 	8
Rejected: 	1978
Iteration: 	24 / 50
Confirmed: 	14
Tentative: 	8
Rejected: 	1978
Iteration: 	25 / 50
Confirmed: 	14
Tentative: 	8
Rejected: 	1978
Iteration: 	26 / 50
Confirmed: 	15
Tentative: 	7
Rejected: 	1978
Iteration: 	27 / 50
Confirmed: 	15
Tentative: 	7
Rejected: 	1978
Iteration: 	28 / 50
Confirmed: 	15
Tentative: 	7
Rejected: 	1978
Iteration: 	29 / 50
Confirmed: 	16
Tentative: 	6
Rejected: 	1978
Iteration: 	30 / 50
Confirmed: 	16
Tentative: 	6
Rejected: 	1978
Iteration: 	31 / 50
Confirmed: 	16
Tentative: 	6
Rejected: 	1978
Iteration: 	32 / 50
Confirmed: 	17
Tentative: 	5
Rejected: 	1978
Iteration: 	33 / 50
Conf

Iteration: 	43 / 50
Confirmed: 	150
Tentative: 	73
Rejected: 	6906
Iteration: 	44 / 50
Confirmed: 	150
Tentative: 	72
Rejected: 	6907
Iteration: 	45 / 50
Confirmed: 	150
Tentative: 	72
Rejected: 	6907
Iteration: 	46 / 50
Confirmed: 	151
Tentative: 	71
Rejected: 	6907
Iteration: 	47 / 50
Confirmed: 	151
Tentative: 	71
Rejected: 	6907
Iteration: 	48 / 50
Confirmed: 	151
Tentative: 	71
Rejected: 	6907
Iteration: 	49 / 50
Confirmed: 	153
Tentative: 	69
Rejected: 	6907


BorutaPy finished running.

Iteration: 	50 / 50
Confirmed: 	153
Tentative: 	50
Rejected: 	6907
Boruta ind
(array([ 155,  172,  234,  283,  311,  371,  460,  537,  667,  757,  759,
        803,  873,  875,  921,  996, 1080, 1119, 1132, 1143, 1158, 1206,
       1238, 1259, 1399, 1550, 1597, 1603, 1629, 1673, 1684, 1703, 1724,
       1744, 1778, 1816, 1828, 1833, 1881, 1901, 1908, 1925, 1927, 1952,
       1961, 2014, 2019, 2110, 2120, 2127, 2287, 2334, 2347, 2353, 2362,
       2393, 2401, 2440, 2496, 2516, 2545, 2591, 2625, 26

Iteration: 	43 / 50
Confirmed: 	142
Tentative: 	89
Rejected: 	5735
Iteration: 	44 / 50
Confirmed: 	142
Tentative: 	89
Rejected: 	5735
Iteration: 	45 / 50
Confirmed: 	142
Tentative: 	89
Rejected: 	5735
Iteration: 	46 / 50
Confirmed: 	142
Tentative: 	89
Rejected: 	5735
Iteration: 	47 / 50
Confirmed: 	142
Tentative: 	86
Rejected: 	5738
Iteration: 	48 / 50
Confirmed: 	142
Tentative: 	86
Rejected: 	5738
Iteration: 	49 / 50
Confirmed: 	142
Tentative: 	86
Rejected: 	5738


BorutaPy finished running.

Iteration: 	50 / 50
Confirmed: 	142
Tentative: 	64
Rejected: 	5738
Boruta ind
(array([  80,  116,  124,  125,  139,  142,  219,  301,  395,  427,  430,
        520,  529,  548,  718, 1176, 1228, 1479, 1522, 1557, 1621, 1730,
       1815, 1948, 1973, 1996, 2011, 2089, 2188, 2230, 2323, 2358, 2397,
       2400, 2428, 2430, 2488, 2551, 2565, 2585, 2659, 2679, 2682, 2710,
       2716, 2739, 2816, 2832, 2882, 2944, 2964, 2965, 2978, 2992, 2996,
       3009, 3029, 3076, 3077, 3114, 3170, 3241, 3275, 33

LAS_Alpha= 0.0017482736534798556
EN_Alpha= 0.0017482736534798556
EN_L1 ratio= 1.0
End lung
Start MLL
Iteration: 	1 / 50
Confirmed: 	0
Tentative: 	12582
Rejected: 	0
Iteration: 	2 / 50
Confirmed: 	0
Tentative: 	12582
Rejected: 	0
Iteration: 	3 / 50
Confirmed: 	0
Tentative: 	12582
Rejected: 	0
Iteration: 	4 / 50
Confirmed: 	0
Tentative: 	12582
Rejected: 	0
Iteration: 	5 / 50
Confirmed: 	0
Tentative: 	12582
Rejected: 	0
Iteration: 	6 / 50
Confirmed: 	0
Tentative: 	12582
Rejected: 	0
Iteration: 	7 / 50
Confirmed: 	0
Tentative: 	12582
Rejected: 	0
Iteration: 	8 / 50
Confirmed: 	0
Tentative: 	2069
Rejected: 	10513
Iteration: 	9 / 50
Confirmed: 	619
Tentative: 	1450
Rejected: 	10513
Iteration: 	10 / 50
Confirmed: 	619
Tentative: 	1450
Rejected: 	10513
Iteration: 	11 / 50
Confirmed: 	619
Tentative: 	1450
Rejected: 	10513
Iteration: 	12 / 50
Confirmed: 	641
Tentative: 	1063
Rejected: 	10878
Iteration: 	13 / 50
Confirmed: 	641
Tentative: 	1063
Rejected: 	10878
Iteration: 	14 / 50
Confirmed: 	641

LAS_Alpha= 0.0004666915505496598
EN_Alpha= 0.0003415164252411149
EN_L1 ratio= 0.95
End MLL
Start prostate_cancer
Iteration: 	1 / 50
Confirmed: 	0
Tentative: 	12600
Rejected: 	0
Iteration: 	2 / 50
Confirmed: 	0
Tentative: 	12600
Rejected: 	0
Iteration: 	3 / 50
Confirmed: 	0
Tentative: 	12600
Rejected: 	0
Iteration: 	4 / 50
Confirmed: 	0
Tentative: 	12600
Rejected: 	0
Iteration: 	5 / 50
Confirmed: 	0
Tentative: 	12600
Rejected: 	0
Iteration: 	6 / 50
Confirmed: 	0
Tentative: 	12600
Rejected: 	0
Iteration: 	7 / 50
Confirmed: 	0
Tentative: 	12600
Rejected: 	0
Iteration: 	8 / 50
Confirmed: 	0
Tentative: 	294
Rejected: 	12306
Iteration: 	9 / 50
Confirmed: 	89
Tentative: 	205
Rejected: 	12306
Iteration: 	10 / 50
Confirmed: 	89
Tentative: 	205
Rejected: 	12306
Iteration: 	11 / 50
Confirmed: 	89
Tentative: 	205
Rejected: 	12306
Iteration: 	12 / 50
Confirmed: 	104
Tentative: 	190
Rejected: 	12306
Iteration: 	13 / 50
Confirmed: 	104
Tentative: 	175
Rejected: 	12321
Iteration: 	14 / 50
Confirmed: 	



BorutaPy finished running.

Iteration: 	50 / 50
Confirmed: 	584
Tentative: 	3
Rejected: 	14457
Boruta ind
(array([  178,   179,   180,   181,   182,   183,   184,   185,   539,
         540,   541,   542,   543,   544,   545,   546,   547,   548,
         549,   550,   551,   552,   553,   554,   555,   556,   557,
         558,   559,   560,   561,   562,   563,   564,   565,   566,
         567,   568,   569,   570,   571,   572,   573,   574,   575,
         576,   577,   578,   579,   580,   581,   582,   583,   584,
         585,   586,   587,   695,   696,   697,   698,   699,   700,
         701,   702,  1495,  1591,  1592,  1593,  1594,  1595,  1596,
        1597,  1598,  1599,  1600,  1601,  1602,  1603,  1604,  1605,
        1606,  1607,  1608,  1609,  1610,  1656,  1657,  1658,  1659,
        1660,  1663,  1664,  1665,  1666,  1672,  1673,  1674,  1675,
        1676,  1677,  1678,  1679,  1680,  1681,  1682,  1683,  1684,
        1685,  1686,  1687,  1688,  1689,  1690,  1

LAS_Alpha= 0.0060350947268416125
EN_Alpha= 0.0139851812611649
EN_L1 ratio= 0.3
End SMK_CAN_187
Start TOX_171
Iteration: 	1 / 50
Confirmed: 	0
Tentative: 	5748
Rejected: 	0
Iteration: 	2 / 50
Confirmed: 	0
Tentative: 	5748
Rejected: 	0
Iteration: 	3 / 50
Confirmed: 	0
Tentative: 	5748
Rejected: 	0
Iteration: 	4 / 50
Confirmed: 	0
Tentative: 	5748
Rejected: 	0
Iteration: 	5 / 50
Confirmed: 	0
Tentative: 	5748
Rejected: 	0
Iteration: 	6 / 50
Confirmed: 	0
Tentative: 	5748
Rejected: 	0
Iteration: 	7 / 50
Confirmed: 	0
Tentative: 	5748
Rejected: 	0
Iteration: 	8 / 50
Confirmed: 	0
Tentative: 	903
Rejected: 	4845
Iteration: 	9 / 50
Confirmed: 	294
Tentative: 	609
Rejected: 	4845
Iteration: 	10 / 50
Confirmed: 	294
Tentative: 	609
Rejected: 	4845
Iteration: 	11 / 50
Confirmed: 	294
Tentative: 	609
Rejected: 	4845
Iteration: 	12 / 50
Confirmed: 	309
Tentative: 	446
Rejected: 	4993
Iteration: 	13 / 50
Confirmed: 	309
Tentative: 	446
Rejected: 	4993
Iteration: 	14 / 50
Confirmed: 	309
Tentative:

Iteration: 	37 / 50
Confirmed: 	293
Tentative: 	219
Rejected: 	21771
Iteration: 	38 / 50
Confirmed: 	293
Tentative: 	216
Rejected: 	21774
Iteration: 	39 / 50
Confirmed: 	293
Tentative: 	216
Rejected: 	21774
Iteration: 	40 / 50
Confirmed: 	303
Tentative: 	206
Rejected: 	21774
Iteration: 	41 / 50
Confirmed: 	303
Tentative: 	206
Rejected: 	21774
Iteration: 	42 / 50
Confirmed: 	303
Tentative: 	206
Rejected: 	21774
Iteration: 	43 / 50
Confirmed: 	306
Tentative: 	202
Rejected: 	21775
Iteration: 	44 / 50
Confirmed: 	306
Tentative: 	202
Rejected: 	21775
Iteration: 	45 / 50
Confirmed: 	306
Tentative: 	202
Rejected: 	21775
Iteration: 	46 / 50
Confirmed: 	311
Tentative: 	197
Rejected: 	21775
Iteration: 	47 / 50
Confirmed: 	311
Tentative: 	195
Rejected: 	21777
Iteration: 	48 / 50
Confirmed: 	311
Tentative: 	195
Rejected: 	21777
Iteration: 	49 / 50
Confirmed: 	314
Tentative: 	192
Rejected: 	21777


BorutaPy finished running.

Iteration: 	50 / 50
Confirmed: 	314
Tentative: 	180
Rejected: 	21777
Boru