In [1]:
import pandas as pd
from pandas import DataFrame
from sklearn.preprocessing import MinMaxScaler

elec = pd.read_csv('..\dataset generation\small_elec_features.csv')
elec['Class'] = 'elec'
elec = elec.iloc[2:, 1:]
#print(elec.head())
#print(elec.shape)

rock = pd.read_csv('..\dataset generation\small_rock_features.csv')
rock['Class'] = 'rock'
rock = rock.iloc[2:, 1:]
#print(rock.head())
#print(rock.shape)

df = pd.concat([elec, rock])
df = df.sample(frac=1).reset_index(drop=True)
#print(df.head())
#print(df.shape)

col = df.columns.get_loc('mfcc')
mfcc = df.iloc[:, col:col+140]
clas = df['Class']
df = pd.concat([mfcc, clas], axis=1)

y_data = df['Class']
x_data = df.drop('Class', axis = 1)

class_data = pd.get_dummies(df['Class'], drop_first = False)

scaler = MinMaxScaler()
scaled = scaler.fit_transform(x_data)
x_df = DataFrame(scaled)

df = pd.concat([x_df, class_data], axis = 1)
df = df.dropna()
print(df.head())
print(df.shape)

          0         1         2         3         4         5         6  \
0  0.003726  0.006590  0.112600  0.064279  0.181075  0.144366  0.137185   
1  0.023029  0.022019  0.182357  0.082678  0.111561  0.103441  0.108151   
2  0.008742  0.005628  0.559984  0.058026  0.251543  0.150085  0.182118   
3  0.002963  0.008525  0.095692  0.062252  0.125961  0.096252  0.107540   
4  0.027465  0.015992  0.155092  0.052292  0.117690  0.073475  0.146549   

          7         8         9  ...       132       133       134       135  \
0  0.108869  0.325683  0.226048  ...  0.192042  0.201825  0.162472  0.140605   
1  0.090657  0.176594  0.109216  ...  0.086641  0.075689  0.082185  0.050522   
2  0.094231  0.139303  0.131240  ...  0.110658  0.130908  0.124006  0.108014   
3  0.109592  0.130127  0.115679  ...  0.307643  0.525281  0.449779  0.385350   
4  0.081661  0.147089  0.143395  ...  0.115965  0.197621  0.152140  0.126997   

        136       137       138       139  elec  rock  
0  0.166513 

In [2]:
from sklearn.model_selection import train_test_split

y_cols = ['rock', 'elec']
y_data = df[y_cols]
x_data = df.drop(y_data, axis = 1)

x_training_data, x_test_data, y_training_data, y_test_data = train_test_split(x_data, y_data, test_size = 0.1)

In [4]:
from sklearn.metrics import classification_report
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(solver = 'sgd', random_state = 42, activation = 'logistic', learning_rate_init = 0.3, batch_size = 300, hidden_layer_sizes = (70,40), max_iter = 500)
mlp.fit(x_training_data, y_training_data)
predictions = mlp.predict(x_test_data)

print("Confusion Matrix:")
print(multilabel_confusion_matrix(y_test_data, predictions))
print("\nAccuracy:", accuracy_score(y_test_data, predictions))
print("\nMSE:", mean_squared_error(y_test_data, predictions))
print("\nClassification Report:")
print(classification_report(y_test_data, predictions))

Confusion Matrix:
[[[95  7]
  [23 75]]

 [[76 22]
  [ 7 95]]]

Accuracy: 0.85

MSE: 0.1475

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.77      0.83        98
           1       0.81      0.93      0.87       102

   micro avg       0.85      0.85      0.85       200
   macro avg       0.86      0.85      0.85       200
weighted avg       0.86      0.85      0.85       200
 samples avg       0.85      0.85      0.85       200



  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
from sklearn.model_selection import GridSearchCV, KFold

learning_rate_init = [0.2, 0.3, 0.4]
max_iter = [300, 500, 700]
hidden_layer_sizes = [(70,40), (32,8), (12,3)]
solver = ['sgd']
random_state = [42]
activation = ['logistic']
batch_size = [300]


# Make a dictionary of the grid search parameters
param_grid = dict(learning_rate_init=learning_rate_init, max_iter=max_iter, hidden_layer_sizes=hidden_layer_sizes, solver=solver, random_state=random_state, activation=activation, batch_size=batch_size)

# Build and fit the GridSearchCV
grid = GridSearchCV(estimator=mlp, param_grid=param_grid,
                    cv=KFold(), verbose=10)

grid_results = grid.fit(x_data, y_data)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
[CV 1/5; 1/27] START activation=logistic, batch_size=300, hidden_layer_sizes=(70, 40), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd
[CV 1/5; 1/27] END activation=logistic, batch_size=300, hidden_layer_sizes=(70, 40), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd;, score=0.823 total time=   0.2s
[CV 2/5; 1/27] START activation=logistic, batch_size=300, hidden_layer_sizes=(70, 40), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd
[CV 2/5; 1/27] END activation=logistic, batch_size=300, hidden_layer_sizes=(70, 40), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd;, score=0.882 total time=   0.5s
[CV 3/5; 1/27] START activation=logistic, batch_size=300, hidden_layer_sizes=(70, 40), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd
[CV 3/5; 1/27] END activation=logistic, batch_size=300, hidden_layer_sizes=(70, 40), learning_rate_init=0.2, ma

In [8]:
print("Optimal Paramaters:", grid_results.best_params_)
print("\nOptimal Hidden Layer Sizes:", grid_results.best_params_['hidden_layer_sizes'])
print("\nOptimal Learning Rate:", grid_results.best_params_['learning_rate_init'])
print("\nOptimal Number of Epochs:", grid_results.best_params_['max_iter'])
print("\nMax Accuracy:", grid_results.best_score_)

Optimal Paramaters: {'activation': 'logistic', 'batch_size': 300, 'hidden_layer_sizes': (32, 8), 'learning_rate_init': 0.3, 'max_iter': 300, 'random_state': 42, 'solver': 'sgd'}

Optimal Hidden Layer Sizes: (32, 8)

Optimal Learning Rate: 0.3

Optimal Number of Epochs: 300

Max Accuracy: 0.8623646616541354
