In [1]:
%load_ext autoreload
%autoreload 2

!python --version

Python 3.7.4


In [2]:
import sys

sys.path.append('..')

In [3]:
from sklearn.metrics import classification_report

# Hyper-Parameter Tunning Using Nature Inspired Algorithms 

Load the [Covertype](https://archive.ics.uci.edu/ml/datasets/Covertype) dataset.

In [4]:
import random

from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_covtype

X, y = fetch_covtype(return_X_y=True)

# shrink the dataset to the 1% of its original size
rows_id = random.sample(range(0, len(X) - 1), (int)(len(X) * 0.01))

X = X[rows_id, :]
y = y[rows_id]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f'train size - {len(X_train)}\ntest size - {len(X_test)}')

train size - 4648
test size - 1162


Define the parameter grid.

In [5]:
param_grid = { 
    'n_estimators': range(20, 400, 20), 
    'max_depth': range(2, 100, 2), 
    'min_samples_split': range(2, 40, 2), 
}

### NatureInspiredSearch

In [28]:
from sklearn_nature_inspired_algorithms.model_selection.nature_inspired_search_cv import NatureInspiredSearchCV

from sklearn.ensemble import RandomForestClassifier

In [None]:
%%time

clf = RandomForestClassifier(random_state=42)

nia_search = NatureInspiredSearchCV(
    clf,
    param_grid,
    cv=5,
    verbose=0,
    max_n_gen=100,
    max_stagnating_gen=5,
    scoring='accuracy')

nia_search.fit(X_train, y_train)

Fitting 5 folds for some of the 17689 candidates, which might total in 88445 fits


In [31]:
nia_search.best_params_

{'n_estimators': 200, 'max_depth': 80, 'min_samples_split': 4}

In [30]:
clf = RandomForestClassifier(**nia_search.best_params_, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.77      0.79      0.78       402
           2       0.82      0.84      0.83       599
           3       0.64      0.77      0.70        62
           4       1.00      0.20      0.33         5
           5       0.60      0.25      0.35        12
           6       0.58      0.34      0.43        32
           7       0.89      0.64      0.74        50

    accuracy                           0.79      1162
   macro avg       0.76      0.55      0.60      1162
weighted avg       0.79      0.79      0.78      1162

