In [64]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFECV
import tscv

import pandas as pd
import numpy as np

X, y = make_classification(
    n_samples=5000,
    n_features=100,
    n_informative=10,
    n_redundant=20,
    n_repeated=10,
    n_classes=2,
    n_clusters_per_class=1,
    class_sep=0.8,
    random_state=0,
)

In [65]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [66]:
clf = RandomForestClassifier(
    n_estimators=1000,
    criterion="entropy",
    class_weight="balanced_subsample", 
    max_features="log2", 
    min_weight_fraction_leaf=0.05
)

In [67]:
cv = tscv.GapKFold(n_splits=5, gap_before=200, gap_after=200)

rfecv = RFECV(
    estimator=clf,
    step=0.1,
    cv=cv,
    scoring="neg_log_loss",
    verbose=1,
    min_features_to_select=1,
    n_jobs=1
)

In [68]:
rfecv.fit(X_train, y_train)

Fitting estimator with 100 features.
Fitting estimator with 90 features.
Fitting estimator with 80 features.
Fitting estimator with 70 features.
Fitting estimator with 60 features.
Fitting estimator with 50 features.
Fitting estimator with 40 features.
Fitting estimator with 30 features.
Fitting estimator with 20 features.
Fitting estimator with 10 features.
Fitting estimator with 100 features.
Fitting estimator with 90 features.
Fitting estimator with 80 features.
Fitting estimator with 70 features.
Fitting estimator with 60 features.
Fitting estimator with 50 features.
Fitting estimator with 40 features.
Fitting estimator with 30 features.
Fitting estimator with 20 features.
Fitting estimator with 10 features.
Fitting estimator with 100 features.
Fitting estimator with 90 features.
Fitting estimator with 80 features.
Fitting estimator with 70 features.
Fitting estimator with 60 features.
Fitting estimator with 50 features.
Fitting estimator with 40 features.
Fitting estimator with 30

In [69]:
rfecv.cv_results_

{'mean_test_score': array([-0.59654883, -0.36690987, -0.35348442, -0.3693664 , -0.37262192,
        -0.38591261, -0.39842212, -0.39791331, -0.4089772 , -0.41914983,
        -0.42775937]),
 'std_test_score': array([0.01707339, 0.00939141, 0.00970899, 0.0090929 , 0.00862121,
        0.0092071 , 0.00744317, 0.00916561, 0.00854637, 0.00801019,
        0.00804861]),
 'split0_test_score': array([-0.61751541, -0.37543883, -0.35989656, -0.37474754, -0.37750722,
        -0.38907336, -0.40040191, -0.39873047, -0.40994832, -0.41798875,
        -0.42668581]),
 'split1_test_score': array([-0.60059807, -0.37321326, -0.35223236, -0.36600974, -0.37146849,
        -0.38359274, -0.39608758, -0.39542201, -0.40752438, -0.41890185,
        -0.42654238]),
 'split2_test_score': array([-0.58259701, -0.36687471, -0.36452316, -0.37992203, -0.38240909,
        -0.39857799, -0.40915953, -0.41051886, -0.42134025, -0.43096118,
        -0.43967585]),
 'split3_test_score': array([-0.61032552, -0.36997641, -0.35475478

In [72]:
rfecv.get_support()

array([ True,  True, False, False, False, False, False, False, False,
       False,  True, False, False,  True, False, False, False, False,
       False, False, False, False, False,  True,  True, False, False,
       False, False, False,  True, False,  True,  True, False, False,
       False, False,  True,  True, False, False, False, False, False,
        True, False,  True, False, False, False, False, False, False,
       False,  True, False, False, False, False, False, False,  True,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
        True, False,  True, False, False,  True,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False])

In [56]:
rfecv.ranking_

array([1, 2, 3, 3, 1, 7, 4, 1, 1, 1, 5, 1, 2, 7, 7, 5, 4, 1, 6, 1, 6, 2,
       6, 1, 1, 1, 4, 1, 5, 3])

In [58]:
predict_proba = rfecv.predict_proba(X_test)
predict_proba

array([[0.007, 0.993],
       [0.97 , 0.03 ],
       [0.096, 0.904],
       ...,
       [0.988, 0.012],
       [0.014, 0.986],
       [0.991, 0.009]])

In [76]:
rfecv.predict_proba(X_test)

array([[0.3252218 , 0.6747782 ],
       [0.09803035, 0.90196965],
       [0.61421855, 0.38578145],
       ...,
       [0.71221129, 0.28778871],
       [0.16073768, 0.83926232],
       [0.38060061, 0.61939939]])

In [74]:
rfecv.classes_

array([0, 1])

In [77]:
predict = rfecv.predict(X_test)
predict

array([1, 1, 0, ..., 0, 1, 1])

In [82]:
predict2 = predict.reshape(-1, 1)
predict2

array([[1],
       [1],
       [0],
       ...,
       [0],
       [1],
       [1]])

In [83]:
np.hstack((predict2, predict2))

array([[1, 1],
       [1, 1],
       [0, 0],
       ...,
       [0, 0],
       [1, 1],
       [1, 1]])

In [95]:
df = pd.DataFrame([[0,1,2], [0,1,2], [0,1,2]], columns=["a", "b", "c"])
cols = df.columns

In [96]:
f = cols.to_list()

In [100]:
df

Unnamed: 0,a,b,c
0,0,1,2
1,0,1,2
2,0,1,2


In [108]:
df.loc[:, ['a', 'b', 'f']]

KeyError: "['f'] not in index"