Random Forest Hyperparameter Experiment

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score,confusion_matrix

# Load the standardized dataset
df_standardized = pd.read_csv('/Users/qilaawg/Documents/cleaned_dataset_standardized.csv')

# Drop irrelevant columns
df = df_standardized.drop(columns=[ 'year','time_signature','loudness','acousticness'])

# Define the target variable and features
X = df.drop(columns=['genre'])  # 'genre' is a target variable
y = df['genre']

# Feature selection using PCA
pca = PCA(n_components=10)  # Select top 10 components
X_selected = pca.fit_transform(X)

# Check explained variance ratio
print("Explained variance ratio by selected components:", pca.explained_variance_ratio_)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Define a function to print accuracy, precision, recall, and F1-score for a model
def print_metrics(model_name, y_test, y_pred):
    print(f"\n{model_name} Performance Metrics:")
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    print(f"Accuracy: {accuracy:.2%}")
    print(f"Precision: {precision:.2%}")
    print(f"Recall: {recall:.2%}")
    print(f"F1 Score: {f1:.2%}")
    print(classification_report(y_test, y_pred))

# Initialize classifiers
svm_model = SVC()
rf_model = RandomForestClassifier()


Explained variance ratio by selected components: [0.20109803 0.13215618 0.11590122 0.10264597 0.09798721 0.09393141
 0.08655728 0.08026483 0.05313664 0.03632123]


In [2]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [None],
    'min_samples_leaf': [1]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)

Best cross-validation score for Random Forest: 0.8010749423832164
Best parameters for Random Forest: {'max_depth': None, 'min_samples_leaf': 1, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 83.27%
Precision: 83.19%
Recall: 83.27%
F1 Score: 83.06%
              precision    recall  f1-score   support

    acoustic       0.77      0.72      0.74      2364
    alt-rock       0.78      0.65      0.71      2329
       blues       0.83      0.70      0.76      2375
   classical       0.93      0.95      0.94      2371
     country       0.70      0.69      0.70      2395
       dance       0.82      0.73      0.77      2367
   dancehall       0.80      0.91      0.85      2385
       disco       0.81      0.78      0.80      2429
         edm       0.85      0.91      0.88      2415
         emo       0.79      0.77      0.78      2398
       forro       0.74      0.82      0.78      2317
      guitar       0.92      0.95      0.93      2387
     hip-hop      

In [2]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [40],
    'min_samples_leaf': [2]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7885026576102855
Best parameters for Random Forest: {'max_depth': 40, 'min_samples_leaf': 2, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 82.08%
Precision: 82.08%
Recall: 82.08%
F1 Score: 81.86%
              precision    recall  f1-score   support

    acoustic       0.76      0.70      0.73      2364
    alt-rock       0.78      0.64      0.70      2329
       blues       0.83      0.69      0.75      2375
   classical       0.92      0.94      0.93      2371
     country       0.68      0.69      0.69      2395
       dance       0.82      0.71      0.76      2367
   dancehall       0.79      0.90      0.84      2385
       disco       0.80      0.77      0.78      2429
         edm       0.84      0.90      0.87      2415
         emo       0.77      0.75      0.76      2398
       forro       0.72      0.82      0.77      2317
      guitar       0.91      0.93      0.92      2387
     hip-hop       0

In [3]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [40],
    'min_samples_leaf': [4]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7549293938943367
Best parameters for Random Forest: {'max_depth': 40, 'min_samples_leaf': 4, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 78.95%
Precision: 79.06%
Recall: 78.95%
F1 Score: 78.65%
              precision    recall  f1-score   support

    acoustic       0.72      0.66      0.69      2364
    alt-rock       0.76      0.58      0.66      2329
       blues       0.81      0.62      0.70      2375
   classical       0.90      0.93      0.91      2371
     country       0.64      0.64      0.64      2395
       dance       0.81      0.66      0.72      2367
   dancehall       0.74      0.88      0.80      2385
       disco       0.77      0.73      0.75      2429
         edm       0.80      0.87      0.84      2415
         emo       0.74      0.72      0.73      2398
       forro       0.67      0.80      0.73      2317
      guitar       0.90      0.91      0.90      2387
     hip-hop       0

In [4]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [40],
    'min_samples_leaf': [6]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7186478612484481
Best parameters for Random Forest: {'max_depth': 40, 'min_samples_leaf': 6, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 75.21%
Precision: 75.42%
Recall: 75.21%
F1 Score: 74.77%
              precision    recall  f1-score   support

    acoustic       0.68      0.60      0.63      2364
    alt-rock       0.72      0.51      0.60      2329
       blues       0.78      0.56      0.65      2375
   classical       0.87      0.91      0.89      2371
     country       0.58      0.59      0.58      2395
       dance       0.78      0.61      0.68      2367
   dancehall       0.69      0.86      0.77      2385
       disco       0.73      0.69      0.71      2429
         edm       0.77      0.84      0.80      2415
         emo       0.69      0.68      0.68      2398
       forro       0.62      0.76      0.69      2317
      guitar       0.87      0.89      0.88      2387
     hip-hop       0

In [5]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [40],
    'min_samples_leaf': [8]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.6866461434709328
Best parameters for Random Forest: {'max_depth': 40, 'min_samples_leaf': 8, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 71.94%
Precision: 72.18%
Recall: 71.94%
F1 Score: 71.30%
              precision    recall  f1-score   support

    acoustic       0.64      0.53      0.58      2364
    alt-rock       0.69      0.45      0.55      2329
       blues       0.74      0.49      0.59      2375
   classical       0.85      0.89      0.87      2371
     country       0.55      0.55      0.55      2395
       dance       0.76      0.54      0.63      2367
   dancehall       0.65      0.85      0.74      2385
       disco       0.68      0.65      0.67      2429
         edm       0.72      0.82      0.77      2415
         emo       0.65      0.64      0.64      2398
       forro       0.58      0.75      0.65      2317
      guitar       0.85      0.87      0.86      2387
     hip-hop       0

In [6]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [40],
    'min_samples_leaf': [10]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.6587513784801654
Best parameters for Random Forest: {'max_depth': 40, 'min_samples_leaf': 10, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 69.28%
Precision: 69.48%
Recall: 69.28%
F1 Score: 68.46%
              precision    recall  f1-score   support

    acoustic       0.61      0.50      0.55      2364
    alt-rock       0.65      0.40      0.50      2329
       blues       0.71      0.44      0.55      2375
   classical       0.83      0.88      0.85      2371
     country       0.51      0.52      0.52      2395
       dance       0.73      0.49      0.59      2367
   dancehall       0.62      0.84      0.72      2385
       disco       0.66      0.62      0.64      2429
         edm       0.70      0.80      0.74      2415
         emo       0.62      0.60      0.61      2398
       forro       0.56      0.73      0.63      2317
      guitar       0.82      0.85      0.84      2387
     hip-hop       

In [None]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [60],
    'min_samples_leaf': [2]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7886179029769765
Best parameters for Random Forest: {'max_depth': 60, 'min_samples_leaf': 2, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 82.20%
Precision: 82.18%
Recall: 82.20%
F1 Score: 81.97%
              precision    recall  f1-score   support

    acoustic       0.76      0.70      0.73      2364
    alt-rock       0.79      0.64      0.71      2329
       blues       0.83      0.69      0.75      2375
   classical       0.92      0.94      0.93      2371
     country       0.69      0.69      0.69      2395
       dance       0.82      0.71      0.76      2367
   dancehall       0.79      0.90      0.84      2385
       disco       0.80      0.77      0.78      2429
         edm       0.84      0.90      0.87      2415
         emo       0.77      0.76      0.77      2398
       forro       0.72      0.82      0.77      2317
      guitar       0.91      0.94      0.92      2387
     hip-hop       0

: 

In [2]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [60],
    'min_samples_leaf': [4]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7550184476748172
Best parameters for Random Forest: {'max_depth': 60, 'min_samples_leaf': 4, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 78.83%
Precision: 78.96%
Recall: 78.83%
F1 Score: 78.52%
              precision    recall  f1-score   support

    acoustic       0.72      0.65      0.69      2364
    alt-rock       0.76      0.58      0.65      2329
       blues       0.81      0.62      0.70      2375
   classical       0.90      0.92      0.91      2371
     country       0.64      0.65      0.64      2395
       dance       0.81      0.66      0.73      2367
   dancehall       0.74      0.88      0.80      2385
       disco       0.77      0.73      0.75      2429
         edm       0.81      0.87      0.84      2415
         emo       0.73      0.72      0.73      2398
       forro       0.67      0.80      0.73      2317
      guitar       0.90      0.91      0.90      2387
     hip-hop       0

In [3]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [60],
    'min_samples_leaf': [8]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.6866723314898167
Best parameters for Random Forest: {'max_depth': 60, 'min_samples_leaf': 8, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 71.97%
Precision: 72.21%
Recall: 71.97%
F1 Score: 71.34%
              precision    recall  f1-score   support

    acoustic       0.63      0.54      0.58      2364
    alt-rock       0.69      0.45      0.54      2329
       blues       0.74      0.50      0.60      2375
   classical       0.84      0.89      0.87      2371
     country       0.54      0.55      0.55      2395
       dance       0.75      0.54      0.63      2367
   dancehall       0.66      0.85      0.74      2385
       disco       0.69      0.65      0.67      2429
         edm       0.73      0.82      0.77      2415
         emo       0.65      0.64      0.64      2398
       forro       0.58      0.75      0.66      2317
      guitar       0.85      0.87      0.86      2387
     hip-hop       0

In [4]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [60],
    'min_samples_leaf': [6]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.718155443950981
Best parameters for Random Forest: {'max_depth': 60, 'min_samples_leaf': 6, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 75.17%
Precision: 75.36%
Recall: 75.17%
F1 Score: 74.72%
              precision    recall  f1-score   support

    acoustic       0.67      0.59      0.63      2364
    alt-rock       0.72      0.51      0.60      2329
       blues       0.77      0.55      0.64      2375
   classical       0.87      0.90      0.88      2371
     country       0.58      0.59      0.59      2395
       dance       0.79      0.61      0.69      2367
   dancehall       0.69      0.86      0.77      2385
       disco       0.73      0.69      0.71      2429
         edm       0.77      0.84      0.80      2415
         emo       0.69      0.68      0.69      2398
       forro       0.62      0.77      0.69      2317
      guitar       0.87      0.89      0.88      2387
     hip-hop       0.

In [None]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [80],
    'min_samples_leaf': [2]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7887750544151079
Best parameters for Random Forest: {'max_depth': 80, 'min_samples_leaf': 2, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 82.17%
Precision: 82.16%
Recall: 82.17%
F1 Score: 81.95%
              precision    recall  f1-score   support

    acoustic       0.76      0.70      0.73      2364
    alt-rock       0.78      0.64      0.70      2329
       blues       0.83      0.69      0.75      2375
   classical       0.92      0.94      0.93      2371
     country       0.68      0.69      0.69      2395
       dance       0.82      0.71      0.76      2367
   dancehall       0.78      0.90      0.84      2385
       disco       0.80      0.77      0.78      2429
         edm       0.84      0.90      0.87      2415
         emo       0.77      0.76      0.77      2398
       forro       0.72      0.82      0.77      2317
      guitar       0.91      0.94      0.92      2387
     hip-hop       0

: 

In [2]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [60],
    'min_samples_leaf': [10]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.6588351845860319
Best parameters for Random Forest: {'max_depth': 60, 'min_samples_leaf': 10, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 69.32%
Precision: 69.51%
Recall: 69.32%
F1 Score: 68.50%
              precision    recall  f1-score   support

    acoustic       0.61      0.50      0.55      2364
    alt-rock       0.65      0.40      0.50      2329
       blues       0.71      0.45      0.55      2375
   classical       0.83      0.88      0.85      2371
     country       0.51      0.51      0.51      2395
       dance       0.72      0.49      0.58      2367
   dancehall       0.63      0.84      0.72      2385
       disco       0.65      0.62      0.63      2429
         edm       0.70      0.80      0.75      2415
         emo       0.62      0.60      0.61      2398
       forro       0.56      0.73      0.64      2317
      guitar       0.83      0.85      0.84      2387
     hip-hop       

In [3]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [80],
    'min_samples_leaf': [4]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7548874849415942
Best parameters for Random Forest: {'max_depth': 80, 'min_samples_leaf': 4, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 78.84%
Precision: 78.99%
Recall: 78.84%
F1 Score: 78.54%
              precision    recall  f1-score   support

    acoustic       0.72      0.66      0.69      2364
    alt-rock       0.76      0.58      0.66      2329
       blues       0.81      0.62      0.71      2375
   classical       0.89      0.92      0.91      2371
     country       0.64      0.65      0.64      2395
       dance       0.81      0.66      0.73      2367
   dancehall       0.74      0.88      0.80      2385
       disco       0.76      0.73      0.75      2429
         edm       0.80      0.87      0.83      2415
         emo       0.74      0.72      0.73      2398
       forro       0.67      0.81      0.73      2317
      guitar       0.90      0.91      0.90      2387
     hip-hop       0

In [4]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [80],
    'min_samples_leaf': [6]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7183440269664645
Best parameters for Random Forest: {'max_depth': 80, 'min_samples_leaf': 6, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 75.17%
Precision: 75.35%
Recall: 75.17%
F1 Score: 74.72%
              precision    recall  f1-score   support

    acoustic       0.67      0.59      0.63      2364
    alt-rock       0.72      0.51      0.60      2329
       blues       0.77      0.55      0.64      2375
   classical       0.87      0.91      0.89      2371
     country       0.58      0.59      0.59      2395
       dance       0.78      0.60      0.68      2367
   dancehall       0.69      0.86      0.77      2385
       disco       0.72      0.69      0.70      2429
         edm       0.77      0.84      0.80      2415
         emo       0.70      0.68      0.69      2398
       forro       0.62      0.77      0.69      2317
      guitar       0.88      0.89      0.88      2387
     hip-hop       0

In [5]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [80],
    'min_samples_leaf': [8]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.6860908613723031
Best parameters for Random Forest: {'max_depth': 80, 'min_samples_leaf': 8, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 71.97%
Precision: 72.16%
Recall: 71.97%
F1 Score: 71.32%
              precision    recall  f1-score   support

    acoustic       0.64      0.54      0.58      2364
    alt-rock       0.68      0.45      0.54      2329
       blues       0.74      0.49      0.59      2375
   classical       0.84      0.89      0.87      2371
     country       0.54      0.55      0.55      2395
       dance       0.75      0.54      0.63      2367
   dancehall       0.66      0.85      0.74      2385
       disco       0.68      0.65      0.66      2429
         edm       0.73      0.82      0.77      2415
         emo       0.65      0.63      0.64      2398
       forro       0.59      0.75      0.66      2317
      guitar       0.85      0.87      0.86      2387
     hip-hop       0

In [6]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [80],
    'min_samples_leaf': [10]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.6591966435408319
Best parameters for Random Forest: {'max_depth': 80, 'min_samples_leaf': 10, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 69.17%
Precision: 69.36%
Recall: 69.17%
F1 Score: 68.35%
              precision    recall  f1-score   support

    acoustic       0.61      0.49      0.55      2364
    alt-rock       0.64      0.40      0.49      2329
       blues       0.71      0.44      0.55      2375
   classical       0.83      0.88      0.85      2371
     country       0.51      0.52      0.51      2395
       dance       0.73      0.49      0.58      2367
   dancehall       0.62      0.84      0.71      2385
       disco       0.65      0.62      0.63      2429
         edm       0.70      0.80      0.75      2415
         emo       0.62      0.60      0.61      2398
       forro       0.56      0.73      0.63      2317
      guitar       0.83      0.85      0.84      2387
     hip-hop       

In [None]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [100],
    'min_samples_leaf': [2]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7886283787059087
Best parameters for Random Forest: {'max_depth': 100, 'min_samples_leaf': 2, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 82.22%
Precision: 82.22%
Recall: 82.22%
F1 Score: 82.01%
              precision    recall  f1-score   support

    acoustic       0.77      0.71      0.73      2364
    alt-rock       0.78      0.64      0.70      2329
       blues       0.83      0.68      0.75      2375
   classical       0.92      0.94      0.93      2371
     country       0.69      0.69      0.69      2395
       dance       0.82      0.71      0.76      2367
   dancehall       0.79      0.90      0.84      2385
       disco       0.80      0.77      0.78      2429
         edm       0.84      0.90      0.87      2415
         emo       0.77      0.76      0.77      2398
       forro       0.72      0.82      0.77      2317
      guitar       0.91      0.94      0.92      2387
     hip-hop       

: 

In [2]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [100],
    'min_samples_leaf': [4]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7548979642378528
Best parameters for Random Forest: {'max_depth': 100, 'min_samples_leaf': 4, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 78.83%
Precision: 78.95%
Recall: 78.83%
F1 Score: 78.52%
              precision    recall  f1-score   support

    acoustic       0.72      0.65      0.68      2364
    alt-rock       0.76      0.58      0.65      2329
       blues       0.82      0.62      0.71      2375
   classical       0.90      0.92      0.91      2371
     country       0.64      0.64      0.64      2395
       dance       0.80      0.66      0.72      2367
   dancehall       0.74      0.87      0.80      2385
       disco       0.77      0.73      0.75      2429
         edm       0.80      0.87      0.84      2415
         emo       0.74      0.72      0.73      2398
       forro       0.67      0.80      0.73      2317
      guitar       0.90      0.91      0.90      2387
     hip-hop       

In [3]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [100],
    'min_samples_leaf': [6]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7180401973494461
Best parameters for Random Forest: {'max_depth': 100, 'min_samples_leaf': 6, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 75.20%
Precision: 75.42%
Recall: 75.20%
F1 Score: 74.75%
              precision    recall  f1-score   support

    acoustic       0.68      0.59      0.63      2364
    alt-rock       0.73      0.51      0.60      2329
       blues       0.78      0.56      0.65      2375
   classical       0.87      0.90      0.89      2371
     country       0.59      0.59      0.59      2395
       dance       0.78      0.60      0.68      2367
   dancehall       0.69      0.86      0.77      2385
       disco       0.72      0.69      0.70      2429
         edm       0.76      0.84      0.80      2415
         emo       0.69      0.68      0.68      2398
       forro       0.62      0.76      0.68      2317
      guitar       0.87      0.89      0.88      2387
     hip-hop       

In [4]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [100],
    'min_samples_leaf': [8]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.686468034537923
Best parameters for Random Forest: {'max_depth': 100, 'min_samples_leaf': 8, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 71.97%
Precision: 72.22%
Recall: 71.97%
F1 Score: 71.34%
              precision    recall  f1-score   support

    acoustic       0.64      0.54      0.59      2364
    alt-rock       0.68      0.46      0.55      2329
       blues       0.74      0.49      0.59      2375
   classical       0.85      0.89      0.87      2371
     country       0.54      0.55      0.55      2395
       dance       0.76      0.54      0.63      2367
   dancehall       0.65      0.85      0.74      2385
       disco       0.69      0.65      0.67      2429
         edm       0.73      0.82      0.77      2415
         emo       0.65      0.63      0.64      2398
       forro       0.58      0.75      0.65      2317
      guitar       0.85      0.87      0.86      2387
     hip-hop       0

In [5]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [100],
    'min_samples_leaf': [10]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.6592385538656229
Best parameters for Random Forest: {'max_depth': 100, 'min_samples_leaf': 10, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 69.26%
Precision: 69.44%
Recall: 69.26%
F1 Score: 68.43%
              precision    recall  f1-score   support

    acoustic       0.60      0.49      0.54      2364
    alt-rock       0.64      0.40      0.49      2329
       blues       0.71      0.45      0.55      2375
   classical       0.83      0.88      0.85      2371
     country       0.51      0.52      0.51      2395
       dance       0.73      0.48      0.58      2367
   dancehall       0.63      0.84      0.72      2385
       disco       0.65      0.62      0.64      2429
         edm       0.69      0.80      0.74      2415
         emo       0.62      0.60      0.61      2398
       forro       0.56      0.73      0.63      2317
      guitar       0.82      0.85      0.84      2387
     hip-hop      

In [None]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [20],
    'min_samples_leaf': [2]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

In [None]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [20],
    'min_samples_leaf': [4]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

In [2]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [20],
    'min_samples_leaf': [6]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.7036030168661418
Best parameters for Random Forest: {'max_depth': 20, 'min_samples_leaf': 6, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 73.16%
Precision: 73.45%
Recall: 73.16%
F1 Score: 72.63%
              precision    recall  f1-score   support

    acoustic       0.66      0.56      0.60      2364
    alt-rock       0.70      0.48      0.57      2329
       blues       0.75      0.51      0.61      2375
   classical       0.86      0.90      0.88      2371
     country       0.53      0.57      0.55      2395
       dance       0.77      0.57      0.65      2367
   dancehall       0.66      0.86      0.74      2385
       disco       0.70      0.66      0.68      2429
         edm       0.74      0.83      0.78      2415
         emo       0.68      0.66      0.67      2398
       forro       0.60      0.75      0.67      2317
      guitar       0.84      0.88      0.86      2387
     hip-hop       0

In [3]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [20],
    'min_samples_leaf': [8]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.6746396006097989
Best parameters for Random Forest: {'max_depth': 20, 'min_samples_leaf': 8, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 70.34%
Precision: 70.56%
Recall: 70.34%
F1 Score: 69.62%
              precision    recall  f1-score   support

    acoustic       0.62      0.51      0.56      2364
    alt-rock       0.66      0.42      0.51      2329
       blues       0.72      0.47      0.57      2375
   classical       0.84      0.89      0.87      2371
     country       0.50      0.53      0.51      2395
       dance       0.73      0.52      0.60      2367
   dancehall       0.63      0.84      0.72      2385
       disco       0.66      0.63      0.64      2429
         edm       0.72      0.80      0.75      2415
         emo       0.64      0.62      0.63      2398
       forro       0.57      0.74      0.64      2317
      guitar       0.83      0.86      0.84      2387
     hip-hop       0

In [2]:
#Random Forest
rf_param_grid = {
    'n_estimators': [800],
    'max_depth': [20],
    'min_samples_leaf': [10]
}

rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)

print("Best cross-validation score for Random Forest:", rf_grid_search.best_score_)
print("Best parameters for Random Forest:", rf_grid_search.best_params_)

# Train the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_test)
print_metrics("Random Forest Classifier", y_test, y_pred_rf)
conf_matrix=confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix for Random Forest Classifier\n",conf_matrix)

Best cross-validation score for Random Forest: 0.6504379498458853
Best parameters for Random Forest: {'max_depth': 20, 'min_samples_leaf': 10, 'n_estimators': 800}

Random Forest Classifier Performance Metrics:
Accuracy: 68.09%
Precision: 68.29%
Recall: 68.09%
F1 Score: 67.22%
              precision    recall  f1-score   support

    acoustic       0.59      0.47      0.53      2364
    alt-rock       0.63      0.38      0.48      2329
       blues       0.69      0.43      0.53      2375
   classical       0.82      0.88      0.85      2371
     country       0.47      0.51      0.49      2395
       dance       0.72      0.47      0.57      2367
   dancehall       0.60      0.83      0.70      2385
       disco       0.64      0.60      0.62      2429
         edm       0.69      0.78      0.73      2415
         emo       0.61      0.59      0.60      2398
       forro       0.55      0.72      0.63      2317
      guitar       0.81      0.84      0.83      2387
     hip-hop       