# BIO-SELECT - Marigliano

In [None]:
from sklearn import neighbors, datasets
import pandas as pd
import os
from matplotlib import pyplot as plt
import numpy as np
from sklearn import preprocessing
%matplotlib inline

# set float precision at 2 digits
np.set_printoptions(precision=2)

## Load datasamples from MILE

In [None]:
from sklearn.model_selection import train_test_split

from datasets.EGEOD22619.EGEOD22619Dataset import EGEOD22619Dataset
from datasets.MILE.MileDataset import MileDataset
from datasets.Golub99.GolubDataset import GolubDataset

# ds = MileDataset(full_dataset=True)
# ds = EGEOD22619Dataset()
ds = GolubDataset()

X = ds.get_X()
y = ds.get_y()
#print(X)
print(len(y))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
print(len(X_train[0]))

## Classification using dumb KNN (all features)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=4, algorithm="auto")
classifier = classifier.fit(X_train, y_train)

score = classifier.score(X_test, y_test)
print("score :", score)

## Classification using ExtraTree

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
classifier = ExtraTreesClassifier(n_jobs=-1, n_estimators=25)
classifier = classifier.fit(X_train, y_train)

import sklearn.feature_selection as fs

classifier = fs.SelectKBest(fs.f_regression, k=300) #k is number of features.
print(len(X_train[1]))
X_train_selected = classifier.fit_transform(X_train, y_train)
print(len(X_train_selected[1]))

#score = classifier.score(X_test, y_test)
#print("score :", score)

#plt.plot(classifier.scores_)
plt.hist(classifier.pvalues_, color="red")

#plt.plot(classifier.feature_importances_)

## Feature selection

In [None]:
from skfeature.function.similarity_based import reliefF

print(X_train)
print(y_train)
score = reliefF.reliefF(X_train, np.array(y_train))

# print the best 5 features with their score
n_best_features = sorted(enumerate(score), key=lambda p:p[1], reverse=True)
print(n_best_features[:5])

# get best features indice
idx = reliefF.feature_ranking(score)
print(idx)

In [None]:
from skfeature.function.similarity_based import fisher_score

score = fisher_score.fisher_score(X_train, y_train)

# print the best 5 features with their score
n_best_features = sorted(enumerate(score), key=lambda p:p[1], reverse=True)
print(n_best_features[:5])

# get best features indice
idx = fisher_score.feature_ranking(score)
print(idx)

In [None]:
f = [f[0] for f in n_best_features]
s = [f[1] for f in n_best_features]
plt.plot(s)

### Test with http://featureselection.asu.edu/tutorial.php

Takes the best N features from a random subset of size M, P times

with N in [1, M], M = 1000, P = 10

In [None]:
from sklearn import svm
from sklearn.metrics import accuracy_score
import random

acc = []

max_features = 20

def get_best_features_subset(features_indices):
    max_acc = -1.0
    
    for N in range(1, len(features_indices)):
        selected_features_train = X_train[:, features_indices[:N]]
        selected_features_test = X_test[:, features_indices[:N]]

        clf = svm.LinearSVC()

        clf.fit(selected_features_train, y_train)
        y_predict = clf.predict(selected_features_test)

        last_acc = accuracy_score(y_test, y_predict)
        acc.append(last_acc)

        if last_acc > max_acc:
            max_acc = last_acc
            best_features = features_indices[:N]

    return best_features, max_acc


best_of_best_features = set()
total_of_best_features = 0
P = 3
for _ in range(P):
    random_features_indices = random.sample(range(1, len(X[0])), max_features)
    best_features, max_acc = get_best_features_subset(random_features_indices)

    print("max score %s with %s features" % (max_acc, len(best_features)))
    #print("Best features are %s" % best_features)
    print("")
    
    best_of_best_features.update(best_features)
    total_of_best_features += len(best_features)

print("%s uniques features over a total of %s" % (len(best_of_best_features), total_of_best_features))
print("best of best: %s" % best_of_best_features)

In [None]:
# golub 2000 feats, 10 times
best_features_golub = [1, 5, 6, 9, 18, 20, 21, 22, 23, 24, 25, 26, 28, 30, 31, 36, 39, 40, 42, 43, 46, 47, 56, 60, 61, 63, 65, 67, 68, 69, 71, 72, 74, 75, 76, 77, 82, 84, 87, 89, 90, 95, 97, 99, 101, 103, 107, 110, 111, 112, 113, 114, 115, 116, 121, 125, 128, 130, 131, 132, 133, 137, 138, 139, 140, 141, 142, 144, 145, 146, 148, 150, 151, 158, 162, 163, 165, 168, 170, 171, 176, 177, 183, 184, 185, 186, 187, 188, 189, 191, 192, 198, 199, 201, 202, 203, 204, 205, 208, 210, 212, 213, 214, 217, 219, 220, 221, 224, 227, 228, 229, 232, 237, 245, 252, 256, 257, 260, 262, 263, 267, 269, 271, 275, 276, 278, 281, 283, 288, 289, 291, 296, 297, 298, 299, 301, 302, 303, 305, 306, 307, 309, 310, 311, 313, 315, 316, 317, 319, 321, 323, 325, 328, 329, 330, 331, 333, 336, 338, 339, 340, 342, 343, 344, 350, 352, 354, 359, 361, 363, 364, 365, 366, 367, 368, 370, 371, 374, 375, 377, 378, 382, 385, 387, 390, 391, 393, 395, 396, 398, 401, 407, 409, 414, 416, 417, 419, 420, 422, 423, 424, 425, 426, 427, 428, 429, 431, 436, 440, 441, 443, 444, 445, 446, 449, 451, 452, 453, 455, 456, 459, 461, 464, 465, 466, 468, 469, 470, 471, 473, 476, 477, 478, 479, 480, 482, 485, 486, 489, 490, 491, 492, 494, 496, 498, 499, 500, 502, 504, 505, 510, 512, 514, 515, 516, 517, 520, 521, 525, 527, 530, 532, 534, 536, 539, 542, 545, 547, 548, 549, 550, 552, 554, 557, 560, 561, 563, 565, 569, 572, 574, 576, 577, 582, 587, 589, 590, 592, 593, 595, 596, 597, 604, 605, 606, 608, 610, 612, 613, 615, 616, 617, 621, 622, 623, 624, 625, 626, 628, 629, 630, 631, 635, 636, 637, 638, 643, 644, 646, 647, 648, 650, 652, 654, 655, 656, 659, 661, 663, 666, 667, 670, 672, 674, 675, 676, 678, 681, 682, 683, 689, 692, 693, 694, 698, 700, 702, 705, 709, 710, 711, 713, 714, 715, 717, 718, 721, 724, 727, 730, 731, 732, 733, 738, 740, 741, 742, 745, 746, 748, 749, 751, 752, 753, 756, 759, 760, 762, 763, 765, 766, 770, 773, 774, 775, 777, 779, 784, 787, 790, 791, 793, 796, 799, 800, 802, 804, 809, 810, 811, 812, 813, 814, 818, 823, 825, 831, 832, 839, 840, 842, 843, 846, 850, 851, 854, 856, 857, 865, 866, 871, 874, 875, 876, 879, 882, 884, 887, 888, 892, 893, 896, 897, 899, 902, 905, 906, 908, 909, 910, 914, 915, 918, 920, 925, 928, 929, 930, 934, 936, 938, 939, 942, 943, 946, 950, 951, 952, 953, 955, 956, 958, 959, 961, 963, 964, 965, 966, 967, 968, 969, 971, 973, 974, 975, 978, 981, 982, 985, 986, 987, 990, 991, 992, 993, 995, 998, 999, 1000, 1001, 1002, 1005, 1008, 1011, 1012, 1014, 1017, 1018, 1021, 1026, 1027, 1033, 1034, 1035, 1038, 1040, 1041, 1044, 1046, 1047, 1049, 1051, 1053, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1067, 1068, 1069, 1075, 1076, 1079, 1080, 1081, 1082, 1083, 1084, 1086, 1089, 1091, 1092, 1095, 1098, 1099, 1100, 1103, 1105, 1107, 1109, 1113, 1115, 1118, 1119, 1120, 1125, 1128, 1130, 1136, 1140, 1144, 1147, 1149, 1151, 1152, 1154, 1155, 1156, 1159, 1160, 1161, 1162, 1163, 1164, 1168, 1173, 1175, 1177, 1178, 1180, 1182, 1184, 1185, 1189, 1190, 1191, 1195, 1197, 1199, 1205, 1208, 1210, 1211, 1214, 1216, 1218, 1220, 1224, 1233, 1234, 1235, 1239, 1240, 1241, 1242, 1245, 1247, 1248, 1249, 1251, 1258, 1259, 1268, 1269, 1272, 1273, 1274, 1275, 1276, 1278, 1279, 1280, 1281, 1283, 1286, 1287, 1290, 1293, 1298, 1301, 1302, 1303, 1304, 1305, 1308, 1309, 1310, 1313, 1314, 1318, 1325, 1327, 1332, 1334, 1336, 1337, 1338, 1340, 1342, 1343, 1346, 1347, 1348, 1349, 1351, 1352, 1355, 1357, 1359, 1366, 1370, 1374, 1376, 1382, 1383, 1388, 1389, 1392, 1397, 1398, 1399, 1400, 1401, 1403, 1405, 1407, 1411, 1412, 1414, 1415, 1419, 1423, 1425, 1429, 1432, 1434, 1435, 1436, 1437, 1438, 1440, 1442, 1443, 1444, 1446, 1447, 1449, 1450, 1452, 1454, 1460, 1463, 1466, 1467, 1470, 1471, 1472, 1473, 1475, 1478, 1479, 1484, 1486, 1487, 1491, 1492, 1494, 1495, 1497, 1502, 1503, 1504, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1517, 1518, 1520, 1522, 1524, 1525, 1526, 1527, 1529, 1531, 1533, 1537, 1538, 1539, 1542, 1544, 1545, 1547, 1548, 1549, 1552, 1555, 1556, 1557, 1558, 1562, 1565, 1573, 1574, 1577, 1578, 1579, 1581, 1582, 1583, 1584, 1586, 1588, 1589, 1591, 1592, 1593, 1594, 1600, 1601, 1602, 1603, 1605, 1607, 1608, 1610, 1611, 1614, 1616, 1617, 1618, 1620, 1622, 1624, 1625, 1626, 1627, 1629, 1630, 1633, 1636, 1639, 1640, 1643, 1645, 1648, 1649, 1656, 1658, 1659, 1664, 1669, 1672, 1678, 1682, 1686, 1691, 1694, 1695, 1700, 1708, 1709, 1710, 1711, 1716, 1719, 1721, 1722, 1724, 1730, 1732, 1733, 1736, 1740, 1741, 1742, 1743, 1751, 1753, 1754, 1755, 1758, 1761, 1763, 1764, 1765, 1769, 1770, 1778, 1779, 1781, 1784, 1786, 1791, 1792, 1793, 1795, 1796, 1797, 1799, 1801, 1802, 1804, 1805, 1806, 1807, 1808, 1809, 1811, 1812, 1816, 1817, 1818, 1819, 1820, 1822, 1825, 1828, 1830, 1831, 1832, 1834, 1835, 1841, 1843, 1846, 1848, 1849, 1850, 1852, 1856, 1857, 1858, 1859, 1860, 1862, 1863, 1864, 1866, 1868, 1871, 1874, 1875, 1876, 1880, 1881, 1882, 1883, 1885, 1886, 1887, 1890, 1891, 1893, 1894, 1895, 1897, 1898, 1901, 1902, 1903, 1904, 1905, 1907, 1908, 1909, 1913, 1914, 1915, 1916, 1922, 1927, 1929, 1937, 1938, 1943, 1945, 1947, 1949, 1950, 1951, 1952, 1955, 1956, 1957, 1960, 1962, 1963, 1964, 1966, 1969, 1972, 1973, 1979, 1984, 1991, 1992, 1994, 1995, 1996, 1998, 2002, 2003, 2004, 2005, 2008, 2010, 2011, 2012, 2013, 2017, 2019, 2020, 2021, 2028, 2030, 2031, 2032, 2033, 2036, 2039, 2041, 2047, 2048, 2049, 2050, 2054, 2055, 2058, 2059, 2063, 2065, 2066, 2067, 2071, 2072, 2074, 2075, 2079, 2080, 2081, 2083, 2084, 2085, 2087, 2089, 2091, 2093, 2095, 2096, 2097, 2099, 2101, 2105, 2106, 2109, 2113, 2116, 2118, 2123, 2125, 2126, 2128, 2129, 2130, 2131, 2132, 2135, 2136, 2139, 2143, 2147, 2149, 2153, 2156, 2157, 2160, 2161, 2162, 2163, 2164, 2165, 2167, 2168, 2170, 2171, 2176, 2178, 2180, 2181, 2182, 2183, 2187, 2189, 2190, 2191, 2192, 2194, 2195, 2196, 2197, 2198, 2199, 2202, 2204, 2205, 2206, 2208, 2210, 2212, 2214, 2215, 2222, 2223, 2227, 2229, 2230, 2237, 2238, 2248, 2250, 2253, 2257, 2261, 2262, 2265, 2269, 2271, 2272, 2275, 2276, 2277, 2279, 2280, 2281, 2284, 2285, 2287, 2288, 2292, 2293, 2297, 2301, 2303, 2304, 2311, 2315, 2317, 2318, 2319, 2320, 2321, 2323, 2324, 2325, 2327, 2328, 2329, 2333, 2336, 2337, 2338, 2339, 2340, 2341, 2343, 2345, 2346, 2347, 2352, 2353, 2355, 2356, 2360, 2361, 2364, 2368, 2369, 2370, 2376, 2377, 2378, 2380, 2382, 2383, 2386, 2390, 2391, 2394, 2395, 2396, 2397, 2404, 2405, 2406, 2407, 2408, 2411, 2412, 2420, 2423, 2425, 2427, 2428, 2429, 2435, 2437, 2439, 2441, 2442, 2445, 2446, 2448, 2449, 2451, 2452, 2455, 2457, 2458, 2459, 2464, 2465, 2466, 2467, 2469, 2470, 2471, 2472, 2473, 2477, 2478, 2482, 2485, 2487, 2488, 2489, 2492, 2493, 2494, 2496, 2497, 2499, 2502, 2503, 2505, 2507, 2509, 2510, 2512, 2514, 2516, 2517, 2518, 2521, 2523, 2525, 2529, 2534, 2537, 2541, 2542, 2543, 2544, 2545, 2547, 2551, 2554, 2556, 2557, 2558, 2559, 2563, 2572, 2577, 2578, 2579, 2582, 2584, 2587, 2588, 2592, 2595, 2598, 2599, 2600, 2601, 2602, 2603, 2608, 2609, 2614, 2616, 2617, 2618, 2619, 2620, 2623, 2631, 2632, 2633, 2638, 2639, 2640, 2642, 2643, 2645, 2646, 2647, 2648, 2651, 2653, 2655, 2658, 2659, 2660, 2661, 2662, 2664, 2665, 2666, 2667, 2668, 2670, 2672, 2673, 2674, 2677, 2679, 2680, 2681, 2682, 2684, 2689, 2690, 2694, 2695, 2696, 2697, 2699, 2700, 2702, 2703, 2704, 2709, 2711, 2713, 2714, 2718, 2719, 2720, 2723, 2725, 2726, 2728, 2731, 2732, 2742, 2743, 2744, 2745, 2747, 2750, 2754, 2755, 2757, 2758, 2762, 2764, 2765, 2767, 2769, 2771, 2772, 2773, 2774, 2778, 2780, 2781, 2782, 2784, 2787, 2790, 2791, 2792, 2793, 2794, 2795, 2799, 2801, 2802, 2804, 2805, 2806, 2808, 2809, 2810, 2811, 2818, 2819, 2820, 2826, 2830, 2831, 2832, 2834, 2836, 2839, 2842, 2844, 2845, 2848, 2849, 2850, 2852, 2856, 2858, 2860, 2861, 2862, 2863, 2864, 2865, 2866, 2867, 2868, 2872, 2875, 2879, 2880, 2881, 2883, 2884, 2885, 2897, 2900, 2902, 2906, 2907, 2912, 2913, 2915, 2918, 2920, 2922, 2923, 2925, 2927, 2928, 2929, 2932, 2933, 2936, 2937, 2938, 2939, 2942, 2944, 2949, 2950, 2952, 2955, 2956, 2957, 2958, 2959, 2960, 2962, 2963, 2964, 2966, 2967, 2968, 2969, 2971, 2973, 2981, 2985, 2986, 2990, 2998, 2999, 3000, 3003, 3004, 3007, 3008, 3009, 3010, 3013, 3015, 3016, 3019, 3021, 3022, 3030, 3032, 3038, 3039, 3040, 3042, 3045, 3047, 3048, 3050, 3054, 3058, 3063, 3064, 3068, 3070, 3071, 3072, 3073, 3074, 3076, 3078, 3080, 3081, 3082, 3084, 3088, 3090, 3091, 3093, 3094, 3098, 3099, 3101, 3105, 3106, 3111, 3112, 3113, 3115, 3116, 3117, 3118, 3125, 3126, 3127, 3128, 3129, 3131, 3135, 3137, 3139, 3140, 3145, 3149, 3153, 3154, 3155, 3157, 3158, 3160, 3162, 3165, 3167, 3168, 3169, 3171, 3172, 3174, 3176, 3181, 3182, 3183, 3184, 3186, 3189, 3196, 3198, 3201, 3203, 3205, 3207, 3210, 3212, 3213, 3218, 3220, 3221, 3222, 3226, 3227, 3229, 3232, 3234, 3237, 3238, 3240, 3247, 3248, 3251, 3255, 3257, 3258, 3260, 3262, 3263, 3266, 3267, 3268, 3269, 3272, 3273, 3274, 3276, 3277, 3279, 3284, 3285, 3286, 3287, 3290, 3291, 3293, 3299, 3300, 3301, 3302, 3305, 3306, 3308, 3310, 3311, 3313, 3316, 3317, 3318, 3320, 3321, 3322, 3323, 3326, 3327, 3329, 3331, 3332, 3335, 3337, 3340, 3341, 3342, 3343, 3344, 3347, 3350, 3355, 3358, 3359, 3360, 3361, 3362, 3363, 3364, 3366, 3367, 3368, 3371, 3372, 3373, 3376, 3377, 3378, 3379, 3380, 3383, 3385, 3386, 3389, 3391, 3397, 3399, 3400, 3402, 3403, 3404, 3407, 3409, 3410, 3411, 3412, 3413, 3415, 3417, 3418, 3421, 3422, 3423, 3425, 3426, 3427, 3428, 3431, 3433, 3435, 3437, 3438, 3443, 3444, 3446, 3447, 3451, 3455, 3459, 3462, 3466, 3467, 3470, 3474, 3476, 3477, 3480, 3487, 3489, 3490, 3491, 3493, 3494, 3498, 3499, 3502, 3503, 3504, 3505, 3507, 3508, 3512, 3516, 3518, 3519, 3520, 3522, 3524, 3531, 3538, 3540, 3541, 3542, 3543, 3544, 3545, 3547, 3548, 3551, 3557, 3561, 3562, 3564, 3565, 3567, 3568, 3570, 3575, 3577, 3578, 3580, 3582, 3585, 3587, 3588, 3592, 3593, 3595, 3599, 3601, 3602, 3603, 3604, 3605, 3607, 3619, 3620, 3621, 3623, 3625, 3626, 3629, 3630, 3631, 3633, 3634, 3636, 3637, 3638, 3639, 3642, 3643, 3644, 3646, 3647, 3649, 3650, 3651, 3652, 3654, 3659, 3660, 3661, 3665, 3667, 3669, 3670, 3671, 3672, 3675, 3676, 3678, 3681, 3683, 3684, 3685, 3691, 3695, 3697, 3699, 3703, 3704, 3706, 3708, 3719, 3721, 3724, 3725, 3727, 3729, 3731, 3734, 3736, 3738, 3739, 3742, 3747, 3748, 3752, 3753, 3755, 3756, 3757, 3760, 3761, 3762, 3763, 3767, 3768, 3771, 3772, 3774, 3775, 3776, 3777, 3778, 3780, 3789, 3791, 3792, 3793, 3794, 3795, 3797, 3800, 3802, 3806, 3810, 3813, 3815, 3818, 3820, 3821, 3822, 3826, 3827, 3830, 3831, 3836, 3843, 3845, 3846, 3848, 3849, 3850, 3851, 3852, 3853, 3854, 3855, 3856, 3860, 3861, 3862, 3863, 3865, 3866, 3867, 3869, 3871, 3872, 3874, 3877, 3879, 3883, 3886, 3887, 3894, 3895, 3896, 3897, 3898, 3900, 3902, 3909, 3911, 3912, 3916, 3918, 3919, 3921, 3924, 3925, 3926, 3927, 3929, 3931, 3932, 3935, 3937, 3938, 3939, 3940, 3943, 3944, 3945, 3946, 3947, 3948, 3949, 3950, 3951, 3953, 3955, 3956, 3960, 3961, 3964, 3965, 3967, 3968, 3969, 3973, 3978, 3982, 3985, 3986, 3987, 3989, 3993, 3994, 3995, 3998, 3999, 4000, 4004, 4009, 4010, 4013, 4016, 4017, 4018, 4024, 4029, 4030, 4031, 4033, 4036, 4038, 4039, 4040, 4042, 4043, 4044, 4045, 4046, 4048, 4049, 4051, 4052, 4055, 4067, 4068, 4070, 4071, 4072, 4073, 4077, 4082, 4083, 4086, 4089, 4091, 4092, 4097, 4099, 4104, 4105, 4109, 4115, 4116, 4118, 4119, 4121, 4122, 4124, 4127, 4129, 4131, 4132, 4133, 4136, 4138, 4139, 4140, 4142, 4145, 4146, 4149, 4150, 4152, 4155, 4157, 4159, 4162, 4164, 4165, 4166, 4167, 4168, 4169, 4170, 4172, 4173, 4175, 4176, 4177, 4178, 4181, 4183, 4184, 4189, 4190, 4192, 4193, 4200, 4202, 4204, 4205, 4206, 4207, 4208, 4211, 4213, 4217, 4218, 4219, 4221, 4226, 4228, 4229, 4230, 4231, 4234, 4235, 4236, 4239, 4246, 4249, 4252, 4256, 4257, 4258, 4259, 4260, 4265, 4266, 4267, 4268, 4269, 4270, 4271, 4274, 4278, 4279, 4280, 4282, 4283, 4284, 4289, 4290, 4292, 4293, 4295, 4296, 4297, 4299, 4300, 4301, 4304, 4306, 4307, 4308, 4311, 4313, 4314, 4318, 4320, 4321, 4322, 4323, 4329, 4331, 4332, 4336, 4337, 4338, 4339, 4340, 4341, 4345, 4347, 4348, 4350, 4351, 4352, 4353, 4354, 4356, 4358, 4360, 4362, 4366, 4371, 4372, 4374, 4375, 4376, 4379, 4380, 4381, 4382, 4383, 4389, 4390, 4391, 4394, 4396, 4397, 4400, 4401, 4403, 4406, 4409, 4412, 4414, 4415, 4416, 4417, 4418, 4420, 4422, 4423, 4424, 4425, 4427, 4428, 4435, 4437, 4438, 4440, 4443, 4444, 4445, 4450, 4451, 4456, 4457, 4458, 4459, 4461, 4462, 4464, 4466, 4471, 4472, 4473, 4474, 4475, 4476, 4477, 4478, 4479, 4481, 4482, 4486, 4487, 4491, 4494, 4495, 4500, 4501, 4502, 4505, 4506, 4511, 4512, 4514, 4516, 4520, 4521, 4522, 4523, 4525, 4527, 4528, 4530, 4532, 4534, 4535, 4536, 4543, 4544, 4545, 4548, 4549, 4550, 4552, 4555, 4556, 4557, 4558, 4559, 4561, 4562, 4563, 4564, 4566, 4568, 4569, 4575, 4578, 4579, 4581, 4582, 4584, 4585, 4586, 4587, 4588, 4591, 4592, 4593, 4595, 4596, 4599, 4601, 4602, 4603, 4604, 4607, 4608, 4609, 4613, 4615, 4616, 4620, 4622, 4623, 4625, 4626, 4629, 4630, 4634, 4635, 4637, 4638, 4639, 4640, 4643, 4648, 4649, 4653, 4659, 4660, 4661, 4663, 4667, 4671, 4672, 4674, 4675, 4676, 4677, 4682, 4683, 4685, 4689, 4692, 4693, 4699, 4702, 4703, 4706, 4707, 4708, 4709, 4711, 4714, 4716, 4718, 4720, 4722, 4724, 4728, 4729, 4730, 4734, 4736, 4738, 4740, 4742, 4743, 4745, 4750, 4751, 4753, 4758, 4760, 4761, 4763, 4764, 4766, 4767, 4771, 4772, 4774, 4775, 4776, 4778, 4779, 4780, 4782, 4784, 4787, 4788, 4789, 4793, 4795, 4798, 4800, 4801, 4805, 4806, 4807, 4808, 4809, 4812, 4817, 4818, 4820, 4822, 4824, 4825, 4826, 4827, 4829, 4830, 4831, 4833, 4837, 4838, 4839, 4840, 4841, 4842, 4843, 4846, 4854, 4857, 4858, 4859, 4860, 4862, 4864, 4866, 4867, 4868, 4871, 4872, 4873, 4874, 4877, 4878, 4879, 4881, 4882, 4883, 4884, 4889, 4890, 4891, 4892, 4894, 4895, 4896, 4897, 4898, 4899, 4900, 4901, 4902, 4905, 4906, 4908, 4909, 4910, 4911, 4915, 4918, 4919, 4923, 4924, 4925, 4926, 4929, 4931, 4932, 4935, 4937, 4939, 4940, 4943, 4944, 4946, 4949, 4951, 4954, 4955, 4956, 4957, 4959, 4964, 4966, 4970, 4973, 4980, 4982, 4986, 4987, 4988, 4989, 4993, 4994, 4996, 4997, 4998, 5001, 5002, 5003, 5004, 5005, 5006, 5008, 5009, 5011, 5015, 5016, 5020, 5022, 5023, 5026, 5028, 5029, 5030, 5031, 5033, 5035, 5036, 5037, 5038, 5039, 5040, 5047, 5048, 5049, 5050, 5053, 5055, 5059, 5061, 5063, 5064, 5065, 5067, 5068, 5071, 5074, 5075, 5079, 5080, 5081, 5082, 5086, 5087, 5091, 5092, 5093, 5094, 5097, 5098, 5100, 5101, 5102, 5104, 5106, 5107, 5108, 5110, 5111, 5112, 5113, 5117, 5118, 5119, 5120, 5122, 5124, 5127, 5128, 5131, 5134, 5139, 5140, 5141, 5144, 5145, 5147, 5148, 5151, 5154, 5155, 5156, 5159, 5160, 5161, 5163, 5164, 5165, 5166, 5167, 5168, 5169, 5171, 5177, 5181, 5182, 5184, 5185, 5186, 5187, 5188, 5189, 5192, 5199, 5203, 5204, 5205, 5210, 5211, 5215, 5218, 5219, 5224, 5225, 5228, 5229, 5231, 5233, 5234, 5236, 5237, 5238, 5241, 5242, 5243, 5249, 5250, 5253, 5255, 5258, 5259, 5262, 5263, 5264, 5265, 5267, 5268, 5269, 5270, 5271, 5273, 5274, 5276, 5279, 5286, 5288, 5290, 5291, 5294, 5296, 5297, 5300, 5301, 5302, 5303, 5305, 5307, 5309, 5311, 5315, 5317, 5319, 5321, 5324, 5325, 5326, 5330, 5331, 5333, 5337, 5338, 5339, 5343, 5346, 5348, 5349, 5350, 5351, 5352, 5353, 5359, 5360, 5361, 5363, 5364, 5365, 5368, 5373, 5374, 5375, 5381, 5383, 5384, 5385, 5388, 5389, 5391, 5392, 5397, 5399, 5401, 5403, 5404, 5405, 5407, 5408, 5411, 5412, 5413, 5417, 5418, 5423, 5425, 5426, 5430, 5431, 5432, 5433, 5434, 5435, 5436, 5438, 5439, 5440, 5441, 5442, 5443, 5445, 5446, 5449, 5450, 5452, 5454, 5456, 5465, 5466, 5468, 5471, 5473, 5474, 5475, 5477, 5480, 5482, 5483, 5484, 5488, 5495, 5496, 5497, 5498, 5499, 5500, 5502, 5506, 5510, 5511, 5512, 5515, 5519, 5520, 5523, 5524, 5526, 5533, 5534, 5536, 5537, 5539, 5540, 5542, 5543, 5545, 5546, 5551, 5554, 5558, 5559, 5561, 5563, 5564, 5566, 5567, 5571, 5572, 5578, 5580, 5581, 5584, 5585, 5588, 5589, 5590, 5592, 5594, 5595, 5598, 5599, 5601, 5606, 5608, 5609, 5613, 5615, 5617, 5619, 5620, 5622, 5623, 5624, 5625, 5626, 5631, 5633, 5634, 5635, 5638, 5639, 5642, 5648, 5650, 5651, 5657, 5659, 5663, 5665, 5667, 5668, 5669, 5673, 5676, 5679, 5680, 5681, 5683, 5687, 5691, 5692, 5693, 5695, 5696, 5698, 5703, 5704, 5706, 5707, 5708, 5710, 5711, 5713, 5714, 5715, 5721, 5723, 5724, 5728, 5730, 5731, 5732, 5737, 5738, 5739, 5741, 5742, 5744, 5746, 5752, 5755, 5756, 5757, 5759, 5763, 5764, 5767, 5770, 5772, 5773, 5774, 5775, 5776, 5777, 5778, 5779, 5781, 5783, 5786, 5787, 5789, 5790, 5791, 5792, 5795, 5797, 5798, 5799, 5800, 5802, 5810, 5812, 5814, 5816, 5818, 5819, 5820, 5821, 5822, 5827, 5829, 5830, 5831, 5832, 5834, 5838, 5839, 5841, 5842, 5843, 5844, 5846, 5847, 5848, 5849, 5852, 5854, 5855, 5857, 5858, 5862, 5863, 5867, 5869, 5870, 5872, 5873, 5875, 5876, 5877, 5880, 5884, 5885, 5886, 5888, 5897, 5898, 5899, 5900, 5902, 5906, 5907, 5910, 5913, 5916, 5918, 5921, 5922, 5924, 5926, 5927, 5928, 5929, 5930, 5932, 5934, 5936, 5938, 5942, 5944, 5946, 5948, 5950, 5951, 5952, 5953, 5957, 5959, 5961, 5962, 5966, 5967, 5968, 5969, 5970, 5971, 5972, 5973, 5976, 5978, 5980, 5983, 5984, 5986, 5990, 5992, 5993, 5995, 5997, 5999, 6001, 6004, 6010, 6014, 6017, 6019, 6021, 6023, 6024, 6025, 6030, 6031, 6034, 6036, 6038, 6039, 6041, 6042, 6043, 6045, 6046, 6047, 6051, 6052, 6055, 6057, 6058, 6060, 6062, 6066, 6068, 6069, 6072, 6075, 6078, 6079, 6080, 6082, 6083, 6086, 6088, 6089, 6090, 6091, 6092, 6093, 6096, 6097, 6099, 6100, 6101, 6102, 6105, 6107, 6108, 6109, 6110, 6113, 6114, 6117, 6122, 6126, 6127, 6128, 6129, 6132, 6133, 6136, 6141, 6146, 6147, 6148, 6149, 6151, 6152, 6153, 6160, 6162, 6170, 6172, 6175, 6176, 6178, 6180, 6185, 6186, 6187, 6188, 6190, 6193, 6194, 6195, 6196, 6198, 6199, 6200, 6201, 6204, 6205, 6208, 6209, 6213, 6214, 6215, 6217, 6219, 6220, 6221, 6222, 6223, 6224, 6226, 6231, 6232, 6233, 6234, 6235, 6238, 6240, 6241, 6242, 6243, 6247, 6249, 6251, 6252, 6253, 6254, 6255, 6257, 6259, 6260, 6262, 6266, 6267, 6268, 6269, 6271, 6272, 6274, 6275, 6276, 6277, 6279, 6280, 6282, 6283, 6285, 6287, 6288, 6296, 6299, 6301, 6303, 6304, 6307, 6308, 6312, 6315, 6317, 6318, 6319, 6320, 6321, 6322, 6327, 6328, 6335, 6342, 6343, 6345, 6351, 6352, 6357, 6362, 6364, 6365, 6366, 6370, 6372, 6373, 6374, 6376, 6378, 6383, 6384, 6385, 6387, 6390, 6391, 6392, 6394, 6395, 6396, 6397, 6398, 6399, 6400, 6401, 6402, 6403, 6404, 6409, 6410, 6411, 6412, 6414, 6415, 6417, 6418, 6419, 6420, 6421, 6422, 6429, 6432, 6437, 6438, 6439, 6441, 6442, 6445, 6447, 6449, 6451, 6452, 6454, 6455, 6456, 6457, 6458, 6460, 6466, 6468, 6471, 6472, 6475, 6476, 6479, 6481, 6482, 6483, 6485, 6486, 6490, 6491, 6492, 6494, 6497, 6498, 6501, 6504, 6505, 6507, 6508, 6509, 6511, 6513, 6515, 6516, 6517, 6518, 6519, 6521, 6523, 6524, 6525, 6526, 6527, 6528, 6529, 6530, 6531, 6539, 6540, 6541, 6543, 6547, 6548, 6550, 6551, 6552, 6553, 6556, 6557, 6558, 6559, 6561, 6562, 6563, 6564, 6567, 6570, 6573, 6580, 6581, 6583, 6584, 6586, 6590, 6591, 6595, 6596, 6601, 6606, 6608, 6609, 6610, 6613, 6614, 6615, 6617, 6621, 6622, 6623, 6626, 6627, 6628, 6630, 6634, 6635, 6636, 6638, 6641, 6645, 6646, 6649, 6650, 6652, 6654, 6657, 6658, 6659, 6660, 6663, 6664, 6665, 6666, 6667, 6668, 6672, 6673, 6674, 6675, 6679, 6682, 6686, 6687, 6690, 6692, 6695, 6697, 6698, 6700, 6701, 6702, 6703, 6707, 6708, 6709, 6712, 6713, 6720, 6723, 6724, 6729, 6730, 6731, 6733, 6734, 6736, 6737, 6738, 6741, 6743, 6746, 6747, 6748, 6749, 6750, 6751, 6752, 6755, 6756, 6757, 6758, 6759, 6762, 6763, 6764, 6768, 6769, 6772, 6775, 6776, 6777, 6778, 6781, 6782, 6784, 6787, 6788, 6789, 6790, 6791, 6793, 6794, 6796, 6798, 6799, 6801, 6803, 6806, 6807, 6808, 6809, 6810, 6811, 6814, 6816, 6818, 6819, 6820, 6822, 6825, 6831, 6833, 6834, 6835, 6836, 6838, 6839, 6845, 6846, 6848, 6849, 6850, 6851, 6852, 6854, 6861, 6862, 6863, 6873, 6875, 6878, 6883, 6885, 6886, 6887, 6890, 6892, 6894, 6896, 6902, 6903, 6906, 6908, 6909, 6913, 6914, 6916, 6917, 6919, 6920, 6921, 6923, 6924, 6925, 6927, 6929, 6935, 6940, 6941, 6943, 6946, 6947, 6949, 6952, 6954, 6956, 6963, 6965, 6967, 6971, 6974, 6979, 6982, 6984, 6987, 6994, 6996, 6997, 7000, 7001, 7002, 7003, 7006, 7009, 7010, 7011, 7013, 7014, 7016, 7017, 7020, 7024, 7025, 7026, 7027, 7030, 7031, 7032, 7033, 7035, 7038, 7041, 7043, 7044, 7045, 7046, 7047, 7048, 7051, 7052, 7056, 7057, 7063, 7065, 7066, 7067, 7069, 7070, 7072, 7073, 7077, 7079, 7080, 7082, 7084, 7086, 7088, 7089, 7093, 7094, 7096, 7097, 7099, 7100, 7101, 7102, 7105, 7106, 7110, 7114, 7117, 7118, 7119]

In [None]:
# from skfeature.function.similarity_based import reliefF
# 
# print(X_train)
# print(y_train)
# score = reliefF.reliefF(X_train, y_train)
# 
# # print the best 5 features with their score
# n_best_features = sorted(enumerate(score), key=lambda p:p[1], reverse=True)
# print(n_best_features[:5])
# 
# # get best features indice
# idx = reliefF.feature_ranking(score)
# print(idx)

In [None]:
from skfeature.function.statistical_based import f_score

print(X_train)
print(y_train)
score = f_score.f_score(X_train, y_train)

# print the best 5 features with their score
n_best_features = sorted(enumerate(score), key=lambda p:p[1], reverse=True)
print(n_best_features[:3])

# get best features indice
idx = f_score.feature_ranking(score)
print(idx)

In [None]:
#from skfeature.function.wrapper import svm_backward
#
#print(X_train)
#print(y_train)
#score = svm_backward.svm_backward(X_train, np.array(y_train), n_selected_features=3)
#
## print the best 3 features with their score
#n_best_features = sorted(enumerate(score), key=lambda p:p[1], reverse=True)
#print(n_best_features[:3])
#
## get best features indice
#idx = svm_backward.feature_ranking(score)
#print(idx)

In [None]:
#from skfeature.function.statistical_based import CFS
#
#F = CFS.cfs(X_train, y_train)
#print(F)

In [None]:
from sklearn.feature_selection import f_classif
import math

# basic example where only the 1st feature is important
totoX = [[1,2], [-1,3], [-1,-2], [-1,23], [1,-2], [1,2]]
totoY = [1, -1, -1, -1, 1, 1]
F, pvalues = f_classif(totoX, totoY)

print(F)
print(pvalues)
# we see that

In [None]:
from sklearn.feature_selection import f_classif

F, pvalues = f_classif(X, y)
F_sorted = sorted(enumerate(F), key=lambda x: x[1], reverse=True)

N = 1000

print("Best features according to F score: ")
for x in F_sorted[:4]:
    print("%d : %0.3f" % (x[0], x[1]))

best_X_F = F_sorted[:N]

F_scores = [x[1] for x in best_X_F]
 
plt.plot(F_scores)
plt.ylabel('F score')

plt.title('Best %s features according to F score' % N)
 
plt.show()

In [None]:
from sklearn.feature_selection import mutual_info_classif
import math

X_mi = mutual_info_classif(X, y, n_neighbors=10)

In [None]:
X_mi_sorted = sorted(enumerate(X_mi), key=lambda x: x[1], reverse=True)

N = 1000

print("Best features according to MI score: ")
for x in X_mi_sorted[:4]:
    print("%d : %0.3f" % (x[0], x[1]))

best_X_mi = X_mi_sorted[:N]

mi_scores = [x[1] for x in best_X_mi]
 
plt.plot(mi_scores)
plt.ylabel('Estimated Mutual Info')

plt.title('Best %s features according to mutual info' % N)
 
plt.show()