# TPS12 - Testing Features with Keras Model

In [1]:
# Global variables for testing changes to this notebook quickly
RANDOM_SEED = 0
NUM_FOLDS = 6
TRAIN_SIZE = 500000

In [2]:
import numpy as np
import pandas as pd
import time
import os
import pyarrow
import gc

# Hide warnings
import warnings
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

# Model/Evaluation
from functools import partial
from sklearn.base import clone
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder, RobustScaler
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, recall_score
from sklearn.inspection import permutation_importance

# Tensorflow/Keras
import tensorflow as tf
from tensorflow import keras

# Keras imports
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Load and Prepare Data

In [3]:
# Load full training data
train = pd.read_feather('../data/train.feather')

# Drop low/no variance 
train.drop(["Soil_Type7", "Id", "Soil_Type15"], axis=1, inplace=True)
train = train[train.Cover_Type != 5]

# Label Encoding
new_encoder = LabelEncoder()
train["Cover_Type"] = new_encoder.fit_transform(train["Cover_Type"])

# Split synthetic data
train, test = train_test_split(
    train, 
    train_size = TRAIN_SIZE, 
    random_state = RANDOM_SEED,
    stratify = train['Cover_Type'],
)
y_train = train['Cover_Type']


# features, data structure for summary scores
features = [x for x in train.columns if x not in ['Id','Cover_Type']]
nonsoil = [x for x in features if not x.startswith('Soil_Type')]
new_rows = list()
gc.collect()

print(f'Training Size: {train.shape[0]} rows, {train.shape[1]} cols')
print(f'Holdout Size: {test.shape[0]} rows, {test.shape[1]} cols\n')

Training Size: 500000 rows, 53 cols
Holdout Size: 3499999 rows, 53 cols



# Model 

In [4]:
# Keras Parameters
BATCH_SIZE = 512
EPOCHS = 100
EARLY_STOP = 10
VERBOSE = 0
NUM_CLASSES = len(train.Cover_Type.value_counts())

In [5]:
def build_model(input_size):
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape = (input_size,)),
            layers.Dense(input_size, kernel_initializer="lecun_normal", activation="selu"),
            layers.Dense(3*input_size//4, kernel_initializer="lecun_normal", activation="selu"),
            layers.Dense(input_size//2, kernel_initializer="lecun_normal", activation="selu"),
            layers.Dense(NUM_CLASSES, activation="softmax"),
        ]
    )

    model.compile(
        loss = tf.keras.losses.SparseCategoricalCrossentropy(), 
        optimizer = tf.keras.optimizers.Adam(),
        metrics=['accuracy'],
    )
    return model 

In [6]:
# Need to adjust input size depending on # of features
def get_pipeline(input_size = len(features)):
    return make_pipeline(
        RobustScaler(),
        KerasClassifier(
            partial(build_model, input_size = input_size),
            batch_size = BATCH_SIZE,
            epochs = EPOCHS,
            verbose = VERBOSE,
            shuffle = True,
            validation_split = 0.1,
            callbacks = [
                EarlyStopping(
                    patience=EARLY_STOP,
                    monitor='val_loss',
                    restore_best_weights=True,
                ),
            ],
        )
    )

# Scoring Function

In [7]:
def score_features(sklearn_model, processing = None):
    
    # Original Training/Test Split
    features = [x for x in train.columns if x not in ['Id','Cover_Type']]
    X_temp, X_test = train[features], test[features]
    y_temp, y_test = train['Cover_Type'], test['Cover_Type']
    
    # Feature Engineering
    if processing:
        X_temp = processing(X_temp)
        X_test = processing(X_test)
    
    # Store the out-of-fold predictions
    test_preds = np.zeros((X_test.shape[0],6))
    oof_preds = np.zeros((X_temp.shape[0],))
    fi_scores = np.zeros((X_temp.shape[1],))
    scores, times = np.zeros(NUM_FOLDS), np.zeros(NUM_FOLDS)
    
    # Stratified k-fold cross-validation
    skf = StratifiedKFold(n_splits = NUM_FOLDS, shuffle = True, random_state = RANDOM_SEED)
    for fold, (train_idx, valid_idx) in enumerate(skf.split(train[features],train['Cover_Type'])):
       
        # Training and Validation Sets
        X_train, X_valid = X_temp.iloc[train_idx], X_temp.iloc[valid_idx]
        y_train, y_valid = y_temp.iloc[train_idx], y_temp.iloc[valid_idx]
        
        # Create model
        start = time.time()
        model = clone(sklearn_model)
        model.fit(X_train, y_train)

        # Permutation Importance
        result = permutation_importance(
            model, X_valid, y_valid, 
            random_state=RANDOM_SEED
        )
        fi_scores += result.importances_mean / NUM_FOLDS

        # validation/holdout predictions
        valid_preds = np.ravel(model.predict(X_valid))
        oof_preds[valid_idx] = valid_preds
        test_preds += model.predict_proba(X_test)

        # Save scores and times
        scores[fold] = accuracy_score(y_valid, valid_preds)
        end = time.time()
        times[fold] = end-start
        print(f'Fold {fold} Accuracy:  {round(scores[fold], 5)} in {round(end-start,2)}s.')
        time.sleep(0.5)
    
    features = [x for x in X_temp.columns]
    nonsoil = [x for x in X_test.columns if not x.startswith('Soil_Type')]
    test_preds = np.argmax(test_preds, axis = 1)
    test_score = accuracy_score(y_test, test_preds)
    #print('\n'+model.__class__.__name__)
    print("Train Accuracy:", round(scores.mean(), 5))
    print('Test Accuracy:', round(test_score, 5))
    print(f'Training Time: {round(times.sum(), 2)}s')
    
    fi_scores = pd.Series(
        data = fi_scores, 
        index = features
    ).loc[nonsoil].sort_values()
    
    return scores.mean(), oof_preds, test_score, fi_scores

# Baseline

In [8]:
cv_score, oof_preds, test_score, fi_scores = score_features(
    get_pipeline()
)

new_rows.append((
    'Baseline', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

fi_scores

Fold 0 Accuracy:  0.95862 in 128.76s.
Fold 1 Accuracy:  0.9574 in 110.21s.
Fold 2 Accuracy:  0.9573 in 125.71s.
Fold 3 Accuracy:  0.95756 in 115.84s.
Fold 4 Accuracy:  0.95732 in 102.86s.
Fold 5 Accuracy:  0.95795 in 112.68s.
Train Accuracy: 0.95769
Test Accuracy: 0.95951
Training Time: 696.05s


Hillshade_9am                         0.000010
Hillshade_3pm                         0.000034
Aspect                                0.000110
Slope                                 0.000131
Wilderness_Area2                      0.000142
Hillshade_Noon                        0.000512
Wilderness_Area1                      0.003987
Wilderness_Area4                      0.010704
Horizontal_Distance_To_Hydrology      0.011343
Vertical_Distance_To_Hydrology        0.019772
Wilderness_Area3                      0.025214
Horizontal_Distance_To_Fire_Points    0.033018
Horizontal_Distance_To_Roadways       0.049651
Elevation                             0.463072
dtype: float64

# Feature Engineering

1. Aspect Features
2. Hillshade Features
3. Water Features
4. Count Features
5. Water/Fire Interactions
6. Roadway Interactions
7. Elevation Interactions

## 1. Aspect Features

Feature involving transformations of the `Aspect` column.

In [9]:
def aspect_features(data):
    df = data.copy()
    df['Aspect_360'] = df['Aspect'] % 360
    df['Aspect_Sine'] = (df['Aspect']* np.pi / 180).apply(np.sin)
    df['Aspect_Alt'] = (df['Aspect']-180).where(
        df['Aspect']+180 > 360, df['Aspect'] + 180
    )
    return df

In [10]:
cv_score, oof_preds, test_score, fi_scores = score_features(
    get_pipeline(len(features) + 3), 
    aspect_features
)

new_rows.append((
    'Aspect_Features', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

fi_scores

Fold 0 Accuracy:  0.95864 in 110.72s.
Fold 1 Accuracy:  0.957 in 116.62s.
Fold 2 Accuracy:  0.95681 in 111.69s.
Fold 3 Accuracy:  0.95828 in 115.45s.
Fold 4 Accuracy:  0.9574 in 146.81s.
Fold 5 Accuracy:  0.95639 in 106.18s.
Train Accuracy: 0.95742
Test Accuracy: 0.95934
Training Time: 707.47s


Slope                                -0.000031
Hillshade_3pm                        -0.000014
Hillshade_9am                         0.000084
Wilderness_Area2                      0.000125
Aspect                                0.000130
Aspect_Sine                           0.000140
Aspect_Alt                            0.000159
Aspect_360                            0.000181
Hillshade_Noon                        0.000497
Wilderness_Area1                      0.003639
Wilderness_Area4                      0.011133
Horizontal_Distance_To_Hydrology      0.011349
Vertical_Distance_To_Hydrology        0.019482
Wilderness_Area3                      0.026104
Horizontal_Distance_To_Fire_Points    0.032959
Horizontal_Distance_To_Roadways       0.049916
Elevation                             0.462196
dtype: float64

## 2. Hillshade Features

Features invovling transformations of the 3 hillshade columns.

In [11]:
def hillshade_features(data):
    df = data.copy()
    shade_features = ['Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm']

    # Clip Range
    df["Hillshade_9am_Clipped"] = df["Hillshade_9am"].clip(lower=0, upper=255)
    df["Hillshade_Noon_Clipped"] = df["Hillshade_9am"].clip(lower=0, upper=255)
    df["Hillshade_3pm_Clipped"] = df["Hillshade_9am"].clip(lower=0, upper=255)
    
    # Hillshade
    #df["Hillshade_Avg"] = df[shade_features].mean(axis=1)
    df["Hillshade_Sum"] = df[shade_features].sum(axis=1)
    df['Hillshade_Range'] = df[shade_features].max(axis=1) - df[shade_features].min(axis=1)
    
    return df

In [12]:
cv_score, oof_preds, test_score, fi_scores = score_features(
    get_pipeline(len(features) + 5), 
    hillshade_features
)

new_rows.append((
    'Hillshade_Features', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

fi_scores

Fold 0 Accuracy:  0.95786 in 114.8s.
Fold 1 Accuracy:  0.95638 in 123.79s.
Fold 2 Accuracy:  0.95728 in 139.89s.
Fold 3 Accuracy:  0.95782 in 132.55s.
Fold 4 Accuracy:  0.95807 in 143.77s.
Fold 5 Accuracy:  0.95715 in 123.25s.
Train Accuracy: 0.95743
Test Accuracy: 0.95963
Training Time: 778.05s


Slope                                -0.000028
Aspect                                0.000006
Wilderness_Area2                      0.000063
Hillshade_Range                       0.000460
Hillshade_Noon                        0.001456
Wilderness_Area1                      0.004113
Hillshade_3pm_Clipped                 0.005404
Hillshade_3pm                         0.006920
Hillshade_9am                         0.007283
Hillshade_Noon_Clipped                0.008001
Wilderness_Area4                      0.008501
Horizontal_Distance_To_Hydrology      0.010832
Hillshade_Sum                         0.011504
Hillshade_9am_Clipped                 0.012442
Vertical_Distance_To_Hydrology        0.019415
Wilderness_Area3                      0.025158
Horizontal_Distance_To_Fire_Points    0.033113
Horizontal_Distance_To_Roadways       0.049966
Elevation                             0.462038
dtype: float64

## 3. Water Features

Features relating to hydrology.

In [13]:
# Helper function
def start_at_eps(series, eps=1e-10): 
    return series - series.min() + eps

def water_features(data):
    df = data.copy()
    
    # use float64 for squaring
    df["Horizontal_Distance_To_Hydrology"] = df["Horizontal_Distance_To_Hydrology"].astype('float64')
    df["Vertical_Distance_To_Hydrology"] = df["Vertical_Distance_To_Hydrology"].astype('float64')
    pos_h_hydrology = start_at_eps(df["Horizontal_Distance_To_Hydrology"])
    pos_v_hydrology = start_at_eps(df['Vertical_Distance_To_Hydrology'])
    
    # Manhatten Distances
    df["Hydro_Taxicab"] = np.abs(df["Horizontal_Distance_To_Hydrology"]) + np.abs(df["Vertical_Distance_To_Hydrology"])
    df['Hydro_Taxicab_Pos'] = (pos_h_hydrology ** 2 + pos_v_hydrology ** 2).apply(np.sqrt).rename('Euclidean_positive_hydrology').astype(np.float32)
    
    # Euclidean Distance
    df["Hydro_Euclid"] = (df["Horizontal_Distance_To_Hydrology"]**2 + np.abs(df["Vertical_Distance_To_Hydrology"])**2)**0.5
    df['Hydro_Euclid_Pos'] = (pos_h_hydrology ** 2 + pos_v_hydrology ** 2).apply(np.sqrt)
    
    # Misc Features
    df['Water_Direction'] = df['Vertical_Distance_To_Hydrology'].apply(np.sign)
    df['Water Elevation'] = df['Elevation'] - df['Vertical_Distance_To_Hydrology']
    
    # Store each as float32
    df["Horizontal_Distance_To_Hydrology"] = df["Horizontal_Distance_To_Hydrology"].astype('float32')
    df["Vertical_Distance_To_Hydrology"] = df["Vertical_Distance_To_Hydrology"].astype('float32')
    df["Hydro_Taxicab"] = df["Hydro_Taxicab"].astype('float32')
    df['Hydro_Taxicab_Pos'] = df['Hydro_Taxicab_Pos'].astype('float32')
    df["Hydro_Euclid"] = df["Hydro_Euclid"].astype('float32')
    df['Hydro_Euclid_Pos'] = df['Hydro_Euclid_Pos'].astype('float32')
    df['Water_Direction'] = df['Water_Direction'].astype('float32')
    df['Water Elevation'] = df['Water Elevation'].astype('float32')
    
    return df

In [14]:
cv_score, oof_preds, test_score, fi_scores = score_features(
    get_pipeline(len(features) + 6), 
    water_features
)

new_rows.append((
    'Water_Features', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

fi_scores

Fold 0 Accuracy:  0.95801 in 126.54s.
Fold 1 Accuracy:  0.95648 in 96.0s.
Fold 2 Accuracy:  0.95716 in 99.18s.
Fold 3 Accuracy:  0.95787 in 108.88s.
Fold 4 Accuracy:  0.9576 in 109.88s.
Fold 5 Accuracy:  0.95673 in 117.6s.
Train Accuracy: 0.95731
Test Accuracy: 0.95946
Training Time: 658.08s


Hillshade_9am                         0.000029
Hillshade_3pm                         0.000050
Slope                                 0.000072
Aspect                                0.000092
Water_Direction                       0.000094
Wilderness_Area2                      0.000124
Hillshade_Noon                        0.000402
Wilderness_Area1                      0.004029
Hydro_Euclid_Pos                      0.006189
Hydro_Taxicab_Pos                     0.007086
Wilderness_Area4                      0.007968
Vertical_Distance_To_Hydrology        0.009778
Hydro_Taxicab                         0.010408
Horizontal_Distance_To_Hydrology      0.014314
Hydro_Euclid                          0.017284
Wilderness_Area3                      0.024886
Horizontal_Distance_To_Fire_Points    0.033874
Horizontal_Distance_To_Roadways       0.050310
Water Elevation                       0.232174
Elevation                             0.246495
dtype: float64

## 4. Count Features

Features involving the sums of the `Soil_Type` and `Wilderness_Area` columns.

In [15]:
def count_features(data):
    
    df = data.copy()
    soil_features = [x for x in df.columns if x.startswith("Soil_Type")]
    wilderness_features = [x for x in df.columns if x.startswith("Wilderness_Area")]

    # Count features
    df["Soil_Count"] = df[soil_features].apply(sum, axis=1)
    df["Wilderness_Count"] = df[wilderness_features].apply(sum, axis=1)
    
    return df

In [16]:
cv_score, oof_preds, test_score, fi_scores = score_features(
    get_pipeline(len(features) + 2), 
    count_features
)

new_rows.append((
    'Count_Features', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

fi_scores

Fold 0 Accuracy:  0.95804 in 112.93s.
Fold 1 Accuracy:  0.95692 in 129.43s.
Fold 2 Accuracy:  0.95793 in 124.67s.
Fold 3 Accuracy:  0.95638 in 102.54s.
Fold 4 Accuracy:  0.95736 in 111.58s.
Fold 5 Accuracy:  0.95747 in 125.59s.
Train Accuracy: 0.95735
Test Accuracy: 0.95963
Training Time: 706.73s


Hillshade_3pm                         0.000017
Slope                                 0.000146
Aspect                                0.000147
Hillshade_9am                         0.000267
Hillshade_Noon                        0.000437
Wilderness_Area2                      0.001275
Wilderness_Count                      0.002598
Wilderness_Area4                      0.009227
Horizontal_Distance_To_Hydrology      0.011020
Wilderness_Area1                      0.011266
Wilderness_Area3                      0.018819
Vertical_Distance_To_Hydrology        0.019433
Horizontal_Distance_To_Fire_Points    0.032612
Soil_Count                            0.044043
Horizontal_Distance_To_Roadways       0.050610
Elevation                             0.462685
dtype: float64

## 5. Water/Fire Interactions

Features created from interactions of the water and fire columns.

In [17]:
def hydrofire_interactions(data):
    
    df = data.copy()
    df['Hydro_Fire_Sum'] = df['Horizontal_Distance_To_Hydrology'] + df['Horizontal_Distance_To_Fire_Points']
    df['Hydro_Fire_AbsDiff'] = abs(df['Horizontal_Distance_To_Hydrology'] - df['Horizontal_Distance_To_Fire_Points'])
    df['Hydro_Fire_EpsSum'] = start_at_eps(df['Horizontal_Distance_To_Hydrology']) + start_at_eps(df['Horizontal_Distance_To_Fire_Points'])
    df['Hydro_Fire_Diff'] = df['Horizontal_Distance_To_Hydrology'] - df['Horizontal_Distance_To_Fire_Points']
    return df

In [18]:
cv_score, oof_preds, test_score, fi_scores = score_features(
    get_pipeline(len(features) + 4), 
    hydrofire_interactions
)

new_rows.append((
    'Water_Fire', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

fi_scores

Fold 0 Accuracy:  0.95854 in 117.22s.
Fold 1 Accuracy:  0.95733 in 123.23s.
Fold 2 Accuracy:  0.95747 in 114.27s.
Fold 3 Accuracy:  0.95807 in 115.43s.
Fold 4 Accuracy:  0.95781 in 112.91s.
Fold 5 Accuracy:  0.95691 in 107.44s.
Train Accuracy: 0.95769
Test Accuracy: 0.95967
Training Time: 690.5s


Slope                                -0.000025
Aspect                                0.000034
Hillshade_3pm                         0.000072
Wilderness_Area2                      0.000077
Hillshade_9am                         0.000135
Hillshade_Noon                        0.000301
Wilderness_Area1                      0.003829
Hydro_Fire_AbsDiff                    0.009249
Horizontal_Distance_To_Fire_Points    0.009409
Horizontal_Distance_To_Hydrology      0.009757
Hydro_Fire_Sum                        0.011174
Hydro_Fire_EpsSum                     0.011430
Wilderness_Area4                      0.011562
Hydro_Fire_Diff                       0.015398
Vertical_Distance_To_Hydrology        0.019572
Wilderness_Area3                      0.025566
Horizontal_Distance_To_Roadways       0.048625
Elevation                             0.462986
dtype: float64

## 6. Roadway Interactions

In [19]:
def roadway_interactions(data):
    df = data.copy()
    df['Hydro_Road_1'] = abs(df['Horizontal_Distance_To_Hydrology'] + df['Horizontal_Distance_To_Roadways'])
    df['Hydro_Road_2'] = abs(df['Horizontal_Distance_To_Hydrology'] - df['Horizontal_Distance_To_Roadways'])
    df['Fire_Road_1'] = abs(df['Horizontal_Distance_To_Fire_Points'] + df['Horizontal_Distance_To_Roadways'])
    df['Fire_Road_2'] = abs(df['Horizontal_Distance_To_Fire_Points'] - df['Horizontal_Distance_To_Roadways'])
    return df

In [20]:
cv_score, oof_preds, test_score, fi_scores = score_features(
    get_pipeline(len(features) + 4), 
    roadway_interactions
)

new_rows.append((
    'Road_Interactions', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

fi_scores

Fold 0 Accuracy:  0.95848 in 128.85s.
Fold 1 Accuracy:  0.95699 in 103.79s.
Fold 2 Accuracy:  0.95718 in 131.44s.
Fold 3 Accuracy:  0.95726 in 95.27s.
Fold 4 Accuracy:  0.95735 in 108.47s.
Fold 5 Accuracy:  0.95697 in 117.42s.
Train Accuracy: 0.95737
Test Accuracy: 0.95945
Training Time: 685.24s


Hillshade_3pm                         0.000043
Wilderness_Area2                      0.000119
Slope                                 0.000123
Aspect                                0.000138
Hillshade_9am                         0.000143
Hillshade_Noon                        0.000466
Wilderness_Area1                      0.004720
Wilderness_Area4                      0.007620
Fire_Road_2                           0.009999
Hydro_Road_2                          0.011183
Horizontal_Distance_To_Hydrology      0.012898
Vertical_Distance_To_Hydrology        0.019012
Hydro_Road_1                          0.022080
Horizontal_Distance_To_Roadways       0.023909
Wilderness_Area3                      0.024433
Fire_Road_1                           0.025680
Horizontal_Distance_To_Fire_Points    0.027801
Elevation                             0.463444
dtype: float64

## 7. Elevation Interactions

Features created from interactions with the `Elevation` column.

In [27]:
def elevation_interactions(data):
    df = data.copy()
    df['Road_Elev_Int'] = df['Horizontal_Distance_To_Roadways'] * df['Elevation']
    df['VHydro_Elev_Int'] = df['Vertical_Distance_To_Hydrology'] * df['Elevation']
    df['Elev_VHydro_Diff'] = df.Elevation - df.Vertical_Distance_To_Hydrology
    df['Elev_HHydro_Diff'] = df.Elevation - df.Horizontal_Distance_To_Hydrology * 0.2
    
    return df

In [28]:
cv_score, oof_preds, test_score, fi_scores = score_features(
    get_pipeline(len(features) + 4), 
    elevation_interactions
)

new_rows.append((
    'Elev_Interactions', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

fi_scores

Fold 0 Accuracy:  0.95866 in 118.97s.
Fold 1 Accuracy:  0.95718 in 119.13s.
Fold 2 Accuracy:  0.957 in 132.19s.
Fold 3 Accuracy:  0.95795 in 123.84s.
Fold 4 Accuracy:  0.95776 in 124.47s.
Fold 5 Accuracy:  0.9568 in 123.66s.
Train Accuracy: 0.95756
Test Accuracy: 0.95961
Training Time: 742.25s


Slope                                -0.000068
Road_Elev_Int                        -0.000065
VHydro_Elev_Int                      -0.000024
Hillshade_3pm                        -0.000017
Aspect                                0.000018
Hillshade_9am                         0.000062
Wilderness_Area2                      0.000078
Hillshade_Noon                        0.000268
Wilderness_Area1                      0.003840
Horizontal_Distance_To_Hydrology      0.007738
Vertical_Distance_To_Hydrology        0.008389
Wilderness_Area4                      0.010784
Wilderness_Area3                      0.024897
Horizontal_Distance_To_Fire_Points    0.032835
Horizontal_Distance_To_Roadways       0.050323
Elev_VHydo_Diff                       0.126924
Elevation                             0.133627
Elev_HHydro_Diff                      0.134380
dtype: float64

## Summary

In [29]:
pd.DataFrame.from_records(
    data = new_rows,
    columns = ['features','cv_scores','holdout','recall_0', 'recall_1','recall_2','recall_3','recall_4','recall_5']
).sort_values('holdout')

Unnamed: 0,features,cv_scores,holdout,recall_0,recall_1,recall_2,recall_3,recall_4,recall_5
1,Aspect_Features,0.95742,0.959345,0.960211,0.971952,0.878965,0.085106,0.338936,0.729025
6,Road_Interactions,0.957372,0.959451,0.962532,0.969801,0.881581,0.021277,0.345238,0.740331
3,Water_Features,0.957308,0.959459,0.960265,0.971895,0.875736,0.06383,0.344538,0.731851
0,Baseline,0.957692,0.959511,0.962952,0.969359,0.893476,0.06383,0.423669,0.71502
7,Elev_Interactions,0.95756,0.959612,0.961508,0.970912,0.88011,0.021277,0.401261,0.730567
4,Count_Features,0.95735,0.959629,0.960505,0.970045,0.887794,0.021277,0.366246,0.754465
2,Hillshade_Features,0.957426,0.959634,0.960859,0.970908,0.881418,0.042553,0.371849,0.738533
5,Water_Fire,0.957688,0.959667,0.962794,0.96977,0.886118,0.021277,0.411765,0.729153
