### Importing libraries

In [1]:
from NiaPy.algorithms.basic import FireflyAlgorithm, BatAlgorithm, ParticleSwarmOptimization
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score
from imblearn.under_sampling import RandomUnderSampler

from preprocessing import get_train_test_data
from benchmark import ClassificationBenchmark
from optimizer import optimize
import warnings

warnings.filterwarnings("ignore")

### Loading and preparing training and testing data

In [3]:
%%time

X_train, X_test, y_train, y_test = get_train_test_data('train_transaction.csv', 'train_identity.csv')

all_columns:
cat_features:
num_features:
CPU times: user 2min 32s, sys: 2min 32s, total: 5min 5s
Wall time: 5min 30s


In [4]:
X_train, y_train = RandomUnderSampler().fit_resample(X_train, y_train)

## Let's check the score of the algorithm without using nature inspired algorithms

In [None]:
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
f1_score(y_test, y_pred)

### Optimizing columns using Firefly algorithm and Decision Tree Classifier

In [6]:
%%time

firefly_decision_tree_benchmark = ClassificationBenchmark(DecisionTreeClassifier,
                                                          accuracy_score,
                                                          X_train[:30000],
                                                          y_train[:30000],
                                                          X_test[:10000],
                                                          y_test[:10000])

firefly_decision_tree_columns = optimize(firefly_decision_tree_benchmark, FireflyAlgorithm(), 100)

  0%|          | 0/5 [00:02<?, ?it/s]

KeyboardInterrupt



### Optimizing columns using Firefly algorithm and Logistic Regression

In [5]:
%%time

firefly_logistic_regression_benchmark = ClassificationBenchmark(LogisticRegression,
                                                                accuracy_score,
                                                                X_train[:30000],
                                                                y_train[:30000],
                                                                X_test[:10000],
                                                                y_test[:10000])

firefly_logistic_regression_columns = optimize(firefly_logistic_regression_benchmark, FireflyAlgorithm(), 100)

### Optimizing columns using Firefly algorithm and Random Forest Classifier

In [None]:
%%time

firefly_random_forest_benchmark = ClassificationBenchmark(RandomForestClassifier,
                                                          accuracy_score,
                                                          X_train[:30000],
                                                          y_train[:30000],
                                                          X_test[:10000],
                                                          y_test[:10000])

firefly_random_forest_columns = optimize(firefly_random_forest_benchmark, FireflyAlgorithm(), 100)

### Optimizing columns using Bat Algorithm and Random Forest Classifier

In [None]:
%%time

bat_random_forest_benchmark = ClassificationBenchmark(BatAlgorithm,
                                                      accuracy_score,
                                                      X_train[:30000],
                                                      y_train[:30000],
                                                      X_test[:10000],
                                                      y_test[:10000])

bat_random_forest_columns = optimize(bat_random_forest_benchmark, FireflyAlgorithm(), 100)

### Optimizing columns using Particle Swarm Optimization and Random Forest Classifier

In [None]:
%%time

particle_swarm_random_forest_benchmark = ClassificationBenchmark(BatAlgorithm,
                                                                 accuracy_score,
                                                                 X_train[:30000],
                                                                 y_train[:30000],
                                                                 X_test[:10000],
                                                                 y_test[:10000])

particle_swarm_random_forest_columns = optimize(particle_swarm_random_forest_benchmark, FireflyAlgorithm(), 100)

### Finally, let's check f1 score with the best

In [None]:
clf = RandomForestClassifier()
clf.fit(X_train[firefly_random_forest_columns], y_train)
y_pred = clf.predict(X_test[firefly_random_forest_columns])
f1_score(y_test, y_pred)