In [13]:
import pandas as pd
from pandas import DataFrame
from sklearn.preprocessing import MinMaxScaler

elec = pd.read_csv('..\dataset generation\small_elec_features.csv')
elec['Class'] = 'elec'
elec = elec.iloc[2:, 1:]
#print(elec.head())
#print(elec.shape)

rock = pd.read_csv('..\dataset generation\small_rock_features.csv')
rock['Class'] = 'rock'
rock = rock.iloc[2:, 1:]
#print(rock.head())
#print(rock.shape)

df = pd.concat([elec, rock])
df = df.sample(frac=1).reset_index(drop=True)
#print(df.head())
#print(df.shape)

y_data = df['Class']
x_data = df.drop('Class', axis = 1)

class_data = pd.get_dummies(df['Class'], drop_first = False)

scaler = MinMaxScaler()
scaled = scaler.fit_transform(x_data)
x_df = DataFrame(scaled)

df = pd.concat([x_df, class_data], axis = 1)
df = df.dropna()
print(df.head())
print(df.shape)

          0         1         2         3         4         5         6  \
0  0.009260  0.025503  0.019094  0.014224  0.032437  0.003717  0.058152   
1  0.006261  0.012419  0.010239  0.010072  0.127611  0.138799  0.093747   
2  0.007663  0.019670  0.020438  0.009324  0.053243  0.003222  0.018844   
3  0.008903  0.019660  0.009521  0.009112  0.060589  0.005921  0.070113   
4  0.008775  0.021878  0.012157  0.010505  0.043952  0.013075  0.110120   

          7         8         9  ...       510       511       512       513  \
0  0.043434  0.021202  0.021000  ...  0.147422  0.003550  0.253650  0.317012   
1  0.175846  0.014050  0.092933  ...  0.055086  0.008524  0.030414  0.032118   
2  0.071196  0.012977  0.026407  ...  0.219999  0.004419  0.125304  0.166158   
3  0.054841  0.015270  0.053664  ...  0.190545  0.017517  0.144769  0.097741   
4  0.074704  0.059289  0.035793  ...  0.144829  0.006712  0.173966  0.130756   

        514       515       516       517  elec  rock  
0  0.295858 

In [14]:
from sklearn.model_selection import train_test_split

y_cols = ['rock', 'elec']
y_data = df[y_cols]
x_data = df.drop(y_data, axis = 1)

x_training_data, x_test_data, y_training_data, y_test_data = train_test_split(x_data, y_data, test_size = 0.1)

In [15]:
from sklearn.metrics import classification_report
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(solver = 'sgd', random_state = 42, activation = 'logistic', learning_rate_init = 0.3, batch_size = 300, hidden_layer_sizes = (200,100), max_iter = 500)
mlp.fit(x_training_data, y_training_data)
predictions = mlp.predict(x_test_data)

print("Confusion Matrix:")
print(multilabel_confusion_matrix(y_test_data, predictions))
print("\nAccuracy:", accuracy_score(y_test_data, predictions))
print("\nMSE:", mean_squared_error(y_test_data, predictions))
print("\nClassification Report:")
print(classification_report(y_test_data, predictions))

Confusion Matrix:
[[[85  8]
  [17 90]]

 [[90 17]
  [ 8 85]]]

Accuracy: 0.875

MSE: 0.125

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.84      0.88       107
           1       0.83      0.91      0.87        93

   micro avg       0.88      0.88      0.88       200
   macro avg       0.88      0.88      0.87       200
weighted avg       0.88      0.88      0.88       200
 samples avg       0.88      0.88      0.88       200



In [5]:
from sklearn.model_selection import GridSearchCV, KFold

learning_rate_init = [0.2, 0.3, 0.4]
max_iter = [300, 500, 700]
hidden_layer_sizes = [(200,100), (256,64,8,2), (200,50)]
solver = ['sgd']
random_state = [42]
activation = ['logistic']
batch_size = [300]


# Make a dictionary of the grid search parameters
param_grid = dict(learning_rate_init=learning_rate_init, max_iter=max_iter, hidden_layer_sizes=hidden_layer_sizes, solver=solver, random_state=random_state, activation=activation, batch_size=batch_size)

# Build and fit the GridSearchCV
grid = GridSearchCV(estimator=mlp, param_grid=param_grid,
                    cv=KFold(), verbose=10)

grid_results = grid.fit(x_data, y_data)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
[CV 1/5; 1/27] START activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd
[CV 1/5; 1/27] END activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd;, score=0.885 total time=   1.7s
[CV 2/5; 1/27] START activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd
[CV 2/5; 1/27] END activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd;, score=0.858 total time=   1.4s
[CV 3/5; 1/27] START activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd
[CV 3/5; 1/27] END activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_

In [7]:
print("Optimal Paramaters:", grid_results.best_params_)
print("\nOptimal Hidden Layer Sizes:", grid_results.best_params_['hidden_layer_sizes'])
print("\nOptimal Learning Rate:", grid_results.best_params_['learning_rate_init'])
print("\nOptimal Number of Epochs:", grid_results.best_params_['max_iter'])
print("\nMax Accuracy:", grid_results.best_score_)

Optimal Paramaters: {'activation': 'logistic', 'batch_size': 300, 'hidden_layer_sizes': (200, 100), 'learning_rate_init': 0.2, 'max_iter': 300, 'random_state': 42, 'solver': 'sgd'}

Optimal Hidden Layer Sizes: (200, 100)

Optimal Learning Rate: 0.2

Optimal Number of Epochs: 300

Max Accuracy: 0.8843834586466166
