In [1]:
import pandas as pd
from pandas import DataFrame

df = pd.read_csv('../dataset generation/data/fma_metadata/tracks.csv')
genre = df['track.7']
id = df['Unnamed: 0']
df = pd.concat([id, genre], axis = 1)
df = df.iloc[2:, :].reset_index(drop=True)
df.columns = ['track_id', 'genre']

folk = df.loc[df['genre'] == 'Folk'].sample(n=1000).reset_index(drop=True)
instrumental = df.loc[df['genre'] == 'Instrumental'].sample(n=1000).reset_index(drop=True)
hip_hop = df.loc[df['genre'] == 'Hip-Hop'].sample(n=1000).reset_index(drop=True)
elec = df.loc[df['genre'] == 'Electronic'].sample(n=1000).reset_index(drop=True)
rock = df.loc[df['genre'] == 'Rock'].sample(n=1000).reset_index(drop=True)

df = pd.concat([folk, instrumental, hip_hop, elec, rock])
df = df.sample(frac=1).reset_index(drop=True)
print(df.head())
print(df.shape)

  track_id         genre
0   111936       Hip-Hop
1   143291  Instrumental
2   111230  Instrumental
3     6608       Hip-Hop
4   115891  Instrumental
(5000, 2)


  df = pd.read_csv('../dataset generation/data/fma_metadata/tracks.csv')


In [2]:
pd.options.mode.chained_assignment = None

features = pd.read_csv('../dataset generation/data/fma_metadata/features.csv')
features = features.iloc[3:, :].reset_index(drop=True)
features.head()

full_features = pd.DataFrame()

for i in df['track_id']:
    f = features.loc[features['feature'] == i]
    f['genre'] = df.loc[df['track_id'] == i, 'genre'].iloc[0]
    full_features = pd.concat([full_features, f])

full_features = full_features.iloc[:, 1:].reset_index(drop=True)

  features = pd.read_csv('../dataset generation/data/fma_metadata/features.csv')


In [3]:
from sklearn.preprocessing import MinMaxScaler

y_data = full_features['genre']
x_data = full_features.drop('genre', axis = 1)

class_data = pd.get_dummies(full_features['genre'], drop_first = False)

scaler = MinMaxScaler()
scaled = scaler.fit_transform(x_data)
x_df = DataFrame(scaled)

full_features = pd.concat([x_df, class_data], axis = 1)

In [77]:
from sklearn.model_selection import train_test_split

y_cols = ['Folk', 'Instrumental', 'Hip-Hop', 'Rock', 'Electronic']
y_data = full_features[y_cols]
x_data = full_features.drop(y_data, axis = 1)

x_training_data, x_test_data, y_training_data, y_test_data = train_test_split(x_data, y_data, test_size = 0.1)

In [78]:
from sklearn.metrics import classification_report
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(solver = 'sgd', random_state = 42, activation = 'logistic', learning_rate_init = 0.3, batch_size = 300, hidden_layer_sizes = (200,100), max_iter = 500)
mlp.fit(x_training_data, y_training_data)
predictions = mlp.predict(x_test_data)

print("Confusion Matrix:")
print(multilabel_confusion_matrix(y_test_data, predictions))
print("\nAccuracy:", accuracy_score(y_test_data, predictions))
print("\nMSE:", mean_squared_error(y_test_data, predictions))
print("\nClassification Report:")
print(classification_report(y_test_data, predictions))

Confusion Matrix:
[[[316  12]
  [ 35  51]]

 [[257  67]
  [ 10  80]]

 [[311  11]
  [ 21  71]]

 [[335   1]
  [ 46  32]]

 [[331  15]
  [ 34  34]]]

Accuracy: 0.6207729468599034

MSE: 0.12173913043478261

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.59      0.68        86
           1       0.54      0.89      0.68        90
           2       0.87      0.77      0.82        92
           3       0.97      0.41      0.58        78
           4       0.69      0.50      0.58        68

   micro avg       0.72      0.65      0.68       414
   macro avg       0.78      0.63      0.67       414
weighted avg       0.78      0.65      0.67       414
 samples avg       0.63      0.65      0.64       414



  _warn_prf(average, modifier, msg_start, len(result))


In [79]:
from sklearn.model_selection import GridSearchCV, KFold

learning_rate_init = [0.2, 0.3, 0.4]
max_iter = [300, 500, 700]
hidden_layer_sizes = [(200,100), (256,64,8,2), (200,50)]
solver = ['sgd']
random_state = [42]
activation = ['logistic']
batch_size = [300]


# Make a dictionary of the grid search parameters
param_grid = dict(learning_rate_init=learning_rate_init, max_iter=max_iter, hidden_layer_sizes=hidden_layer_sizes, solver=solver, random_state=random_state, activation=activation, batch_size=batch_size)

# Build and fit the GridSearchCV
grid = GridSearchCV(estimator=mlp, param_grid=param_grid,
                    cv=KFold(), verbose=10)

grid_results = grid.fit(x_data, y_data)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
[CV 1/5; 1/27] START activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd
[CV 1/5; 1/27] END activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd;, score=0.382 total time=   3.1s
[CV 2/5; 1/27] START activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd
[CV 2/5; 1/27] END activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd;, score=0.346 total time=   4.7s
[CV 3/5; 1/27] START activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_init=0.2, max_iter=300, random_state=42, solver=sgd
[CV 3/5; 1/27] END activation=logistic, batch_size=300, hidden_layer_sizes=(200, 100), learning_rate_

In [83]:
print("Optimal Paramaters:", grid_results.best_params_)
print("\nOptimal Hidden Layer Sizes:", grid_results.best_params_['hidden_layer_sizes'])
print("\nOptimal Learning Rate:", grid_results.best_params_['learning_rate_init'])
print("\nOptimal Number of Epochs:", grid_results.best_params_['max_iter'])
print("\nMax Accuracy:", grid_results.best_score_)

Optimal Paramaters: {'activation': 'logistic', 'batch_size': 300, 'hidden_layer_sizes': (200, 100), 'learning_rate_init': 0.2, 'max_iter': 300, 'random_state': 42, 'solver': 'sgd'}

Optimal Hidden Layer Sizes: (200, 100)

Optimal Learning Rate: 0.2

Optimal Number of Epochs: 300

Max Accuracy: 0.41895207450717464
