In [62]:
# LIBRARIES

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import sklearn.metrics as met
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, KFold

In [63]:
# DATASET

df = pd.read_excel("DryBeanDataset/Dry_Bean_Dataset.xlsx")
df = df.drop_duplicates(ignore_index=True)
df.rename(columns = {'AspectRation':'AspectRatio'}, inplace = True)
df = df.astype({'Area': 'float64'})
df = df.astype({'Class': 'string'})
df.info()
df.describe() # df[df['Class']=='BOMBAY'].describe()
labels = ['BARBUNYA', 'BOMBAY', 'CALI', 'DERMASON', 'HOROZ', 'SEKER', 'SIRA']
colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple', 'tab:brown', 'tab:pink']
features = df.columns.to_list();
features.remove('Class');
#features = ['Area', 'Perimeter', 'MajorAxisLength', 'MinorAxisLength', 'AspectRatio', 'Eccentricity', 'ConvexArea', 'EquivDiameter', 
#            'Extent', 'Solidity', 'roundness', 'Compactness', 'ShapeFactor1', 'ShapeFactor2','ShapeFactor3', 'ShapeFactor4'];

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13543 entries, 0 to 13542
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Area             13543 non-null  float64
 1   Perimeter        13543 non-null  float64
 2   MajorAxisLength  13543 non-null  float64
 3   MinorAxisLength  13543 non-null  float64
 4   AspectRatio      13543 non-null  float64
 5   Eccentricity     13543 non-null  float64
 6   ConvexArea       13543 non-null  int64  
 7   EquivDiameter    13543 non-null  float64
 8   Extent           13543 non-null  float64
 9   Solidity         13543 non-null  float64
 10  roundness        13543 non-null  float64
 11  Compactness      13543 non-null  float64
 12  ShapeFactor1     13543 non-null  float64
 13  ShapeFactor2     13543 non-null  float64
 14  ShapeFactor3     13543 non-null  float64
 15  ShapeFactor4     13543 non-null  float64
 16  Class            13543 non-null  string 
dtypes: float64(1

In [65]:
# TRAIN TEST SPLIT

X = df.iloc[:,:-1]
y = df.iloc[:,-1:]
X_train, X_test, y_train, y_test  = train_test_split(X, y, test_size=0.33)
df_train = pd.merge(X_train, y_train, left_index=True, right_index=True)
df_test = pd.merge(X_test, y_test, left_index=True, right_index=True)

# Feature Scaling 
sc = MinMaxScaler()
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)


In [71]:
PIPE = Pipeline([[ 'over', SMOTE() ] , [ 'classifier', MLPClassifier() ]]) #[ None, SMOTE(), ADASYN(), SMOTETomek(), SMOTEENN()]

models = {
    "Multi-Layer Perceptron": MLPClassifier(
    alpha=1e-5, 
    learning_rate='constant',
    learning_rate_init=0.001,    
    max_iter=500,
    tol=1e-4,
    verbose=True,
    momentum=0.9,
    early_stopping=False,
    ),
}

param_grids = {
    "Multi-Layer Perceptron": {
        "classifier__hidden_layer_sizes": [(64, 64, 64)], 
        "classifier__activation": ['tanh'],                     # {'relu',}
        "classifier__solver": ['adam'],                          # {'sgd',}
                      }
}

results = []
for name, model in models.items():
    grid_search = GridSearchCV(
        estimator=Pipeline([[ 'over', SMOTE() ] , [ 'classifier', model ]]),
        param_grid=param_grids[name],
        scoring = 'accuracy',
        n_jobs = None,
        refit = True,        
        cv=KFold(n_splits=5, shuffle=True),        
        verbose = 4,
        return_train_score=True,
        ).fit(X_train_scaled, y_train.values.ravel())
    result = {"model": name, "cv_results": pd.DataFrame(grid_search.cv_results_)}
    results.append(result)

y_pred_test = grid_search.predict(X_test_scaled) #best_estimator_.
print(met.classification_report(y_pred_test, y_test));

cv_score = grid_search.best_score_
test_score = grid_search.score(X_test_scaled, y_test)

#print(cv_score)
#print(test_score)
#print(results)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Iteration 1, loss = 1.31043604
Iteration 2, loss = 0.43922247
Iteration 3, loss = 0.29150611
Iteration 4, loss = 0.24604976
Iteration 5, loss = 0.21834926
Iteration 6, loss = 0.20270323
Iteration 7, loss = 0.19380577
Iteration 8, loss = 0.18596151
Iteration 9, loss = 0.18282589
Iteration 10, loss = 0.17979900
Iteration 11, loss = 0.17983046
Iteration 12, loss = 0.17836143
Iteration 13, loss = 0.17910170
Iteration 14, loss = 0.17730025
Iteration 15, loss = 0.17374523
Iteration 16, loss = 0.18559681
Iteration 17, loss = 0.17220019
Iteration 18, loss = 0.17082790
Iteration 19, loss = 0.17314574
Iteration 20, loss = 0.17671898
Iteration 21, loss = 0.16809709
Iteration 22, loss = 0.16987734
Iteration 23, loss = 0.16788911
Iteration 24, loss = 0.16832524
Iteration 25, loss = 0.16833323
Iteration 26, loss = 0.16938636
Iteration 27, loss = 0.16557501
Iteration 28, loss = 0.16659350
Iteration 29, loss = 0.16441342
Iteration 30, loss = 0

In [69]:
print(cv_score)
print(test_score)
print(results)

0.9254919041067182
0.9239373601789709
[{'model': 'Multi-Layer Perceptron', 'cv_results':    mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0      10.663768      4.097885         0.002383        0.000544   
1       6.486375      1.831066         0.002659        0.000567   
2       7.998705      2.810508         0.002762        0.000383   
3       6.407062      1.257112         0.005509        0.006576   
4      12.396868      2.174270         0.002784        0.000382   
5       5.901058      1.397603         0.002796        0.000754   
6      12.469470      0.416854         0.002697        0.000398   
7       5.431089      0.903493         0.002566        0.000503   

  param_classifier__activation param_classifier__hidden_layer_sizes  \
0                         relu                         (16, 16, 16)   
1                         relu                         (16, 16, 16)   
2                         relu                            (8, 8, 8)   
3                      