# Notebook 3b - Breakdown Soil Type

In this notebook, we test out breaking down the soil-type features using domain knowledge and their descriptions

In [1]:
# Global variables for testing changes to this notebook quickly
RANDOM_SEED = 0
NUM_FOLDS = 12

In [2]:
import numpy as np
import pandas as pd
import time
import pyarrow
import gc

# Model evaluation
from functools import partial
from sklearn.base import clone
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, recall_score
from sklearn.inspection import partial_dependence, permutation_importance
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, ExtraTreesClassifier, RandomForestClassifier

# Plotting
import matplotlib
import seaborn as sns
from matplotlib import pyplot as plt

# Hide warnings
import warnings
warnings.filterwarnings('ignore')

# Load Data

In [3]:
%%time

# Load original data
original = pd.read_feather('../data/original.feather')

# Label Encode
old_encoder = LabelEncoder()
original["Cover_Type"] = old_encoder.fit_transform(original["Cover_Type"])
y_train = original['Cover_Type'].iloc[:15119]
y_test = original['Cover_Type'].iloc[15119:]

# Get feature columns
features = [x for x in original.columns if x not in ['Id','Cover_Type']]

# Data structures for summary scores
bagging_scores = list()
extratrees_scores = list()
adaboost_scores = list()
random_scores = list()

Wall time: 52 ms


# Scoring Function

In [4]:
def train_original(sklearn_model, processing = None):
    
    # Original Training/Test Split
    X_temp = original[features].iloc[:15119]
    X_test = original[features].iloc[15119:]
    y_temp = original['Cover_Type'].iloc[:15119]
    y_test = original['Cover_Type'].iloc[15119:]
    
    # Feature Engineering
    if processing:
        X_temp = processing(X_temp)
        X_test = processing(X_test)
        
    # Store the out-of-fold predictions
    test_preds = np.zeros((X_test.shape[0],7))
    oof_preds = np.zeros((X_temp.shape[0],))
    scores, times = np.zeros(NUM_FOLDS), np.zeros(NUM_FOLDS)
    
    # Stratified k-fold cross-validation
    skf = StratifiedKFold(n_splits = NUM_FOLDS, shuffle = True, random_state = RANDOM_SEED)
    for fold, (train_idx, valid_idx) in enumerate(skf.split(X_temp,y_temp)):
       
        # Training and Validation Sets
        X_train, X_valid = X_temp.iloc[train_idx], X_temp.iloc[valid_idx]
        y_train, y_valid = y_temp.iloc[train_idx], y_temp.iloc[valid_idx]
        
        # Create model
        start = time.time()
        model = clone(sklearn_model)
        model.fit(X_train, y_train)

        # validation and test predictions
        valid_preds = np.ravel(model.predict(X_valid))
        oof_preds[valid_idx] = valid_preds
        test_preds += model.predict_proba(X_test)
        
        # Save scores and times
        scores[fold] = accuracy_score(y_valid, valid_preds)
        end = time.time()
        times[fold] = end-start
        time.sleep(0.5)
    
    test_preds = np.argmax(test_preds, axis = 1)
    test_score = accuracy_score(y_test, test_preds)
    print('\n'+model.__class__.__name__)
    print("Train Accuracy:", round(scores.mean(), 5))
    print('Test Accuracy:', round(test_score, 5))
    print(f'Training Time: {round(times.sum(), 2)}s')
    
    return scores.mean(), oof_preds, test_score

# Models

We use the following 4 models from the scikit-learn library:

1. AdaBoost 
2. ExtraTrees
3. Bagging
4. Random Forest

In [5]:
# AdaBoost Classifier
adaboost = AdaBoostClassifier(
    base_estimator = DecisionTreeClassifier(
        splitter = 'random',
        random_state = RANDOM_SEED,
    ),
    random_state = RANDOM_SEED,
)

# ExtraTrees Classifier
extratrees = ExtraTreesClassifier(
    n_jobs = -1,
    random_state = RANDOM_SEED,
    max_features = None,
)

# Bagging Classifier
bagging = BaggingClassifier(
    base_estimator = DecisionTreeClassifier(
        splitter = 'random',
        random_state = RANDOM_SEED,
    ),
    n_jobs = -1,
    random_state = RANDOM_SEED
)

# Random Forest Classifier
randomforest = RandomForestClassifier(
    n_jobs = -1,
    random_state = RANDOM_SEED,
)

# Baselines

In [6]:
# AdaBoost
cv_score, oof_preds, test_score = train_original(adaboost)

adaboost_scores.append((
    'Baseline', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# ExtraTrees
cv_score, oof_preds, test_score = train_original(extratrees)

extratrees_scores.append((
    'Baseline', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Bagging
cv_score, oof_preds, test_score = train_original(bagging)

bagging_scores.append((
    'Baseline', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Random Forest
cv_score, oof_preds, test_score = train_original(randomforest)

random_scores.append((
    'Baseline', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))


AdaBoostClassifier
Train Accuracy: 0.80356
Test Accuracy: 0.75373
Training Time: 4.0s

ExtraTreesClassifier
Train Accuracy: 0.88491
Test Accuracy: 0.77808
Training Time: 37.13s

BaggingClassifier
Train Accuracy: 0.85581
Test Accuracy: 0.75372
Training Time: 22.01s

RandomForestClassifier
Train Accuracy: 0.86395
Test Accuracy: 0.74895
Training Time: 34.66s


# Soil Type Features

## Ordinal Soil Type (Drop)

We undo the one-hot encoding and drop the dummy variables:

In [7]:
def consolidate_soil_types(input_df, drop = True):
    data = input_df.copy()
    data['Soil_Type'] = 0
    soil_features = list()
    for i in range(1,41):
        data['Soil_Type'] += i*data[f'Soil_Type{i}']
        soil_features.append(f'Soil_Type{i}')
    if drop:
        nonsoil_features = [x for x in data.columns if x not in soil_features]
        return data[nonsoil_features]
    return data

In [8]:
# AdaBoost
cv_score, oof_preds, test_score = train_original(adaboost, consolidate_soil_types)

adaboost_scores.append((
    'Ordinal_Drop', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Extra Trees
cv_score, oof_preds, test_score = train_original(extratrees, consolidate_soil_types)

extratrees_scores.append((
    'Ordinal_Drop', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Bagging
cv_score, oof_preds, test_score = train_original(bagging, consolidate_soil_types)

bagging_scores.append((
    'Ordinal_Drop', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Random Forest
cv_score, oof_preds, test_score = train_original(randomforest, consolidate_soil_types)

random_scores.append((
    'Ordinal_Drop', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))


AdaBoostClassifier
Train Accuracy: 0.79039
Test Accuracy: 0.76229
Training Time: 3.18s

ExtraTreesClassifier
Train Accuracy: 0.8867
Test Accuracy: 0.78229
Training Time: 31.88s

BaggingClassifier
Train Accuracy: 0.85416
Test Accuracy: 0.76073
Training Time: 17.79s

RandomForestClassifier
Train Accuracy: 0.86481
Test Accuracy: 0.74847
Training Time: 31.77s


## Ordinal Soil Type (Keep)

We keep both one-hot and ordinally encoded variables:

In [9]:
# AdaBoost
cv_score, oof_preds, test_score = train_original(adaboost, partial(consolidate_soil_types, drop = False))

adaboost_scores.append((
    'Ordinal_Keep', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Extra Trees
cv_score, oof_preds, test_score = train_original(extratrees,  partial(consolidate_soil_types, drop = False))

extratrees_scores.append((
    'Ordinal_Keep', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Bagging
cv_score, oof_preds, test_score = train_original(bagging,  partial(consolidate_soil_types, drop = False))

bagging_scores.append((
    'Ordinal_Keep', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Random Forest
cv_score, oof_preds, test_score = train_original(randomforest,  partial(consolidate_soil_types, drop = False))

random_scores.append((
    'Ordinal_Keep', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))


AdaBoostClassifier
Train Accuracy: 0.79324
Test Accuracy: 0.75612
Training Time: 4.57s

ExtraTreesClassifier
Train Accuracy: 0.88591
Test Accuracy: 0.77965
Training Time: 36.29s

BaggingClassifier
Train Accuracy: 0.85641
Test Accuracy: 0.75935
Training Time: 26.28s

RandomForestClassifier
Train Accuracy: 0.86421
Test Accuracy: 0.7502
Training Time: 35.13s


## Climatic Zone (Ordinal)

We create a feature based on the climatic zone of the soil, which has a natural ordering:

1. lower montane dry
2. lower montane
3. montane dry
4. montane
5. montane dry and montane
6. montane and subalpine
7. subalpine
8. alpine

However, the ordering of the soil type labels roughly follows the ordering of their respectively climatic zones, so there's a chance this feature won't be particularly informative.

In [10]:
def climatic_zone_original(input_df):
    code = {
        1:2702,2:2703,3:2704,4:2705,5:2706,6:2717,7:3501,8:3502,9:4201,
        10:4703,11:4704,12:4744,13:4758,14:5101,15:5151,16:6101,17:6102,
        18:6731,19:7101,20:7102,21:7103,22:7201,23:7202,24:7700,25:7701,
        26:7702,27:7709,28:7710,29:7745,30:7746,31:7755,32:7756,33:7757,
        34:7790,35:8703,36:8707,37:8708,38:8771,39:8772,40:8776
    }
    data = consolidate_soil_types(input_df, drop = False)
    df = input_df.copy()
    df['Climatic_Zone'] = data['Soil_Type'].apply(lambda x: int(str(code[x])[0]))
    return df

In [11]:
# AdaBoost
cv_score, oof_preds, test_score = train_original(adaboost, climatic_zone_original)

adaboost_scores.append((
    'Climatic_Zone', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Extra Trees
cv_score, oof_preds, test_score = train_original(extratrees, climatic_zone_original)

extratrees_scores.append((
    'Climatic_Zone', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Bagging
cv_score, oof_preds, test_score = train_original(bagging, climatic_zone_original)

bagging_scores.append((
    'Climatic_Zone', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Random Forest
cv_score, oof_preds, test_score = train_original(randomforest, climatic_zone_original)

random_scores.append((
    'Climatic_Zone', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))


AdaBoostClassifier
Train Accuracy: 0.8021
Test Accuracy: 0.75724
Training Time: 4.61s

ExtraTreesClassifier
Train Accuracy: 0.88558
Test Accuracy: 0.77836
Training Time: 37.96s

BaggingClassifier
Train Accuracy: 0.85363
Test Accuracy: 0.7577
Training Time: 26.04s

RandomForestClassifier
Train Accuracy: 0.86408
Test Accuracy: 0.74982
Training Time: 35.29s


## Geologic Zones (Nominal)

This is another feature which is based on the soil type codes, but is not ordered like climatic zone.

1. alluvium
2. glacial
3. shale
4. sandstone
5. mixed sedimentary
6. unspecified in the USFS ELU Survey
7. igneous and metamorphic
8. volcanic

In [12]:
def geologic_zone_original(input_df):
    code = {
        1:2702,2:2703,3:2704,4:2705,5:2706,6:2717,7:3501,8:3502,9:4201,
        10:4703,11:4704,12:4744,13:4758,14:5101,15:5151,16:6101,17:6102,
        18:6731,19:7101,20:7102,21:7103,22:7201,23:7202,24:7700,25:7701,
        26:7702,27:7709,28:7710,29:7745,30:7746,31:7755,32:7756,33:7757,
        34:7790,35:8703,36:8707,37:8708,38:8771,39:8772,40:8776
    }
    data = consolidate_soil_types(input_df, drop = False)
    df = input_df.copy()
    df['Geologic_Zone'] = data['Soil_Type'].apply(lambda x: int(str(code[x])[1]))
    return df

In [13]:
# AdaBoost
cv_score, oof_preds, test_score = train_original(adaboost, geologic_zone_original)

adaboost_scores.append((
    'Geologic_Zone', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Extra Trees
cv_score, oof_preds, test_score = train_original(extratrees, geologic_zone_original)

extratrees_scores.append((
    'Geologic_Zone', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Bagging
cv_score, oof_preds, test_score = train_original(bagging, geologic_zone_original)

bagging_scores.append((
    'Geologic_Zone', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Random Forest
cv_score, oof_preds, test_score = train_original(randomforest, geologic_zone_original)

random_scores.append((
    'Geologic_Zone',  cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))


AdaBoostClassifier
Train Accuracy: 0.80217
Test Accuracy: 0.76068
Training Time: 4.61s

ExtraTreesClassifier
Train Accuracy: 0.8869
Test Accuracy: 0.77964
Training Time: 36.2s

BaggingClassifier
Train Accuracy: 0.85455
Test Accuracy: 0.75816
Training Time: 26.24s

RandomForestClassifier
Train Accuracy: 0.86395
Test Accuracy: 0.75029
Training Time: 36.95s


## Surface Cover (Ordinal)

According to the [USDA reference](https://www.nrcs.usda.gov/wps/portal/nrcs/detail/soils/ref/?cid=nrcs142p2_054253#surface_fragments) on soil profiling:

1. **(Stony/Bouldery)** — Stones or boulders cover 0.01 to less than 0.1 percent of the surface. The smallest stones are at least 8 meters apart; the smallest boulders are at least 20 meters apart (fig. 3-9).

2. **(Very Stony/Very Bouldery)** — Stones or boulders cover 0.1 to less than 3 percent of the surface. The smallest stones are not less than 1 meter apart; the smallest boulders are not less than 3 meters apart (fig. 3-10).

3. **(Extremely Stony/Extremely Bouldery)** — Stones or boulders cover 3 to less than 15 percent of the surface. The smallest stones are as little as 0.5 meter apart; the smallest boulders are as little as 1 meter apart (fig. 3-11).

4. **(Rubbly)** — Stones or boulders cover 15 to less than 50 percent of the surface. The smallest stones are as little as 0.3 meter apart; the smallest boulders are as little as 0.5 meter apart. In most places it is possible to step from stone to stone or jump from boulder to boulder without touching the soil (fig. 3-12).

5. **(Very Rubbly)** — Stones or boulders appear to be nearly continuous and cover 50 percent or more of the surface. The smallest stones are less than 0.03 meter apart; the smallest boulders are less than 0.05 meter apart. Classifiable soil is among the rock fragments, and plant growth is possible (fig. 3-13).

In [14]:
def surface_cover_original(input_df):
    # Group IDs
    no_desc = [7,8,14,15,16,17,19,20,21,23,35]
    stony = [6,12]
    very_stony = [2,9,18,26]
    extremely_stony = [1,22,24,25,27,28,29,30,31,32,33,34,36,37,38,39,40]
    rubbly = [3,4,5,10,11,13]

    # Create dictionary
    surface_cover = {i:0 for i in no_desc}
    surface_cover.update({i:1 for i in stony})
    surface_cover.update({i:2 for i in very_stony})
    surface_cover.update({i:3 for i in extremely_stony})
    surface_cover.update({i:4 for i in rubbly})
    
    # Create Feature
    data = consolidate_soil_types(input_df, drop = False)
    df = input_df.copy()
    df['Surface_Cover'] = data['Soil_Type'].apply(lambda x: surface_cover[x])
    return df

In [15]:
# AdaBoost
cv_score, oof_preds, test_score = train_original(adaboost, surface_cover_original)

adaboost_scores.append((
    'Surface_Cover', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Extra Trees
cv_score, oof_preds, test_score = train_original(extratrees, surface_cover_original)

extratrees_scores.append((
    'Surface_Cover', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Bagging
cv_score, oof_preds, test_score = train_original(bagging, surface_cover_original)

bagging_scores.append((
    'Surface_Cover', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Random Forest
cv_score, oof_preds, test_score = train_original(randomforest, surface_cover_original)

random_scores.append((
    'Surface_Cover', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))


AdaBoostClassifier
Train Accuracy: 0.79655
Test Accuracy: 0.75136
Training Time: 4.72s

ExtraTreesClassifier
Train Accuracy: 0.88511
Test Accuracy: 0.77872
Training Time: 36.99s

BaggingClassifier
Train Accuracy: 0.85568
Test Accuracy: 0.75712
Training Time: 26.82s

RandomForestClassifier
Train Accuracy: 0.86474
Test Accuracy: 0.75103
Training Time: 35.84s


## Rock Size (Ordinal)

In [16]:
def rock_size_original(input_df):
    
    # Group IDs
    no_desc = [7,8,14,15,16,17,19,20,21,23,35]
    stones = [1,2,6,9,12,18,24,25,26,27,28,29,30,31,32,33,34,36,37,38,39,40]
    boulders = [22]
    rubble = [3,4,5,10,11,13]

    # Create dictionary
    rock_size = {i:0 for i in no_desc}
    rock_size.update({i:1 for i in stones})
    rock_size.update({i:2 for i in boulders})
    rock_size.update({i:3 for i in rubble})
    
    data = consolidate_soil_types(input_df, drop = False)
    df = input_df.copy()
    df['Rock_Size'] = data['Soil_Type'].apply(lambda x: rock_size[x])
    return df

In [17]:
# AdaBoost
cv_score, oof_preds, test_score = train_original(adaboost, rock_size_original)

adaboost_scores.append((
    'Rock_Size', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Extra Trees
cv_score, oof_preds, test_score = train_original(extratrees, rock_size_original)

extratrees_scores.append((
    'Rock_Size', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Bagging
cv_score, oof_preds, test_score = train_original(bagging, rock_size_original)

bagging_scores.append((
    'Rock_Size', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))

# Random Forest
cv_score, oof_preds, test_score = train_original(randomforest, rock_size_original)

random_scores.append((
    'Rock_Size', cv_score, test_score,
     *recall_score(y_train, oof_preds, average = None)
))


AdaBoostClassifier
Train Accuracy: 0.80025
Test Accuracy: 0.75429
Training Time: 4.64s

ExtraTreesClassifier
Train Accuracy: 0.88432
Test Accuracy: 0.7786
Training Time: 37.02s

BaggingClassifier
Train Accuracy: 0.85469
Test Accuracy: 0.75678
Training Time: 26.15s

RandomForestClassifier
Train Accuracy: 0.86553
Test Accuracy: 0.74993
Training Time: 36.33s


# Summary

In [18]:
# AdaBoost
pd.DataFrame.from_records(
    data = adaboost_scores,
    columns = ['features','cv_score','holdout','recall_0', 'recall_1','recall_2','recall_3','recall_4','recall_5','recall_6']
).sort_values('cv_score')

Unnamed: 0,features,cv_score,holdout,recall_0,recall_1,recall_2,recall_3,recall_4,recall_5,recall_6
5,Surface_Cover,0.796548,0.751363,0.661111,0.606944,0.761464,0.932407,0.89537,0.786111,0.932407
0,Baseline,0.803559,0.753727,0.681944,0.650463,0.745716,0.926389,0.893519,0.791204,0.935648
6,Rock_Size,0.800252,0.754289,0.673148,0.630093,0.761927,0.92963,0.893981,0.780556,0.932407
2,Ordinal_Keep,0.793241,0.756122,0.674537,0.621296,0.751274,0.927778,0.876389,0.768519,0.93287
3,Climatic_Zone,0.802104,0.757244,0.671296,0.624537,0.756369,0.933333,0.893981,0.798611,0.936574
4,Geologic_Zone,0.802171,0.760679,0.683796,0.636574,0.741084,0.936574,0.894444,0.784722,0.937963
1,Ordinal_Drop,0.790395,0.762287,0.664352,0.622222,0.735989,0.922685,0.882407,0.768056,0.937037


In [19]:
# Extra Trees Classifier
pd.DataFrame.from_records(
    data = extratrees_scores,
    columns = ['features','cv_score','holdout','recall_0', 'recall_1','recall_2','recall_3','recall_4','recall_5','recall_6']
).sort_values('cv_score')

Unnamed: 0,features,cv_score,holdout,recall_0,recall_1,recall_2,recall_3,recall_4,recall_5,recall_6
0,Baseline,0.884914,0.778078,0.786574,0.734259,0.866142,0.971759,0.961111,0.903704,0.970833
3,Climatic_Zone,0.885575,0.778364,0.783333,0.739815,0.862899,0.972222,0.960648,0.906944,0.973148
6,Rock_Size,0.884319,0.778601,0.784722,0.736111,0.857341,0.971759,0.961574,0.906944,0.971759
5,Surface_Cover,0.885112,0.778725,0.78287,0.740741,0.863363,0.97037,0.9625,0.905093,0.970833
4,Geologic_Zone,0.886898,0.779639,0.78287,0.74213,0.869384,0.971296,0.960648,0.90787,0.974074
2,Ordinal_Keep,0.885906,0.779647,0.781019,0.743981,0.863826,0.970833,0.959259,0.909722,0.972685
1,Ordinal_Drop,0.8867,0.782291,0.7875,0.741204,0.862436,0.972222,0.962963,0.908333,0.972222


In [20]:
# Bagging Classifier
pd.DataFrame.from_records(
    data = bagging_scores,
    columns = ['features','cv_score','holdout','recall_0', 'recall_1','recall_2','recall_3','recall_4','recall_5','recall_6']
).sort_values('cv_score')

Unnamed: 0,features,cv_score,holdout,recall_0,recall_1,recall_2,recall_3,recall_4,recall_5,recall_6
0,Baseline,0.855812,0.753724,0.769907,0.670833,0.837888,0.964352,0.934259,0.85,0.963426
6,Rock_Size,0.854687,0.756777,0.764352,0.675,0.840667,0.9625,0.941204,0.838889,0.960185
5,Surface_Cover,0.855679,0.757124,0.775,0.669444,0.83233,0.96713,0.946296,0.843056,0.956481
3,Climatic_Zone,0.853628,0.757696,0.768056,0.656481,0.838351,0.965278,0.944444,0.844907,0.95787
4,Geologic_Zone,0.854554,0.758159,0.764815,0.661111,0.842057,0.966667,0.941204,0.849074,0.956944
2,Ordinal_Keep,0.856406,0.759352,0.771296,0.663889,0.843909,0.961574,0.94213,0.851852,0.960185
1,Ordinal_Drop,0.854158,0.760734,0.772222,0.685185,0.825845,0.962963,0.942593,0.83287,0.957407


In [22]:
# Random Forest
pd.DataFrame.from_records(
    data = random_scores,
    columns = ['features','cv_score','holdout','recall_0', 'recall_1','recall_2','recall_3','recall_4','recall_5','recall_6']
).sort_values('cv_score')

Unnamed: 0,features,cv_score,holdout,recall_0,recall_1,recall_2,recall_3,recall_4,recall_5,recall_6
1,Ordinal_Drop,0.864806,0.748467,0.77037,0.701852,0.819824,0.973611,0.953241,0.872222,0.9625
0,Baseline,0.863947,0.748949,0.767593,0.701852,0.819824,0.971759,0.950926,0.869444,0.966204
3,Climatic_Zone,0.864079,0.749818,0.766667,0.702778,0.821214,0.971296,0.949074,0.873611,0.963889
6,Rock_Size,0.865534,0.749926,0.767593,0.710185,0.824456,0.969907,0.946759,0.873611,0.966204
2,Ordinal_Keep,0.864211,0.7502,0.764352,0.706944,0.82075,0.973148,0.949537,0.873148,0.961574
4,Geologic_Zone,0.863947,0.75029,0.7625,0.701852,0.82075,0.971296,0.949537,0.876852,0.964815
5,Surface_Cover,0.86474,0.751031,0.763426,0.710185,0.819824,0.971296,0.950463,0.873148,0.964815
