In [2]:
import librosa
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

genres = ['blues', 'classical', 'country', 'disco', 'hiphop',
          'jazz', 'metal', 'pop', 'reggae', 'rock']

data_path = './dataset'

In [3]:
features_list = []
labels = []

def extract_features(file_path):
    y, sr = librosa.load(file_path, duration=30)
    features = {}

    # Spectral features
    spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
    features['spectral_centroid_mean'] = np.mean(spectral_centroids)
    features['spectral_centroid_std'] = np.std(spectral_centroids)

    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
    features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)
    features['spectral_rolloff_std'] = np.std(spectral_rolloff)

    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
    features['spectral_bandwidth_mean'] = np.mean(spectral_bandwidth)
    features['spectral_bandwidth_std'] = np.std(spectral_bandwidth)

    # Zero crossing rate
    zcr = librosa.feature.zero_crossing_rate(y)[0]
    features['zcr_mean'] = np.mean(zcr)
    features['zcr_std'] = np.std(zcr)

    # RMS Energy
    rms = librosa.feature.rms(y=y)[0]
    features['rms_mean'] = np.mean(rms)
    features['rms_std'] = np.std(rms)

    # MFCCs (13 coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    for i in range(13):
        features[f'mfcc_{i+1}_mean'] = np.mean(mfccs[i])
        features[f'mfcc_{i+1}_std'] = np.std(mfccs[i])

    # Chroma features
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    for i in range(12):
        features[f'chroma_{i+1}_mean'] = np.mean(chroma[i])
        features[f'chroma_{i+1}_std'] = np.std(chroma[i])

    # Tempo
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    features['tempo'] = tempo

    # Spectral contrast
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    for i in range(7):
        features[f'contrast_{i+1}_mean'] = np.mean(contrast[i])
        features[f'contrast_{i+1}_std'] = np.std(contrast[i])

    return features

for genre in genres:
    genre_path = os.path.join(data_path, genre)
    
    for filename in os.listdir(genre_path):
        if filename.endswith('.wav'):
            file_path = os.path.join(genre_path, filename)
            try:
                features = extract_features(file_path)
                features_list.append(features)
                labels.append(genre)
            except Exception as e:
                print(e)

df = pd.DataFrame(features_list)
df['genre'] = labels


  y, sr = librosa.load(file_path, duration=30)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)





In [4]:
df.head()

Unnamed: 0,spectral_centroid_mean,spectral_centroid_std,spectral_rolloff_mean,spectral_rolloff_std,spectral_bandwidth_mean,spectral_bandwidth_std,zcr_mean,zcr_std,rms_mean,rms_std,...,contrast_3_std,contrast_4_mean,contrast_4_std,contrast_5_mean,contrast_5_std,contrast_6_mean,contrast_6_std,contrast_7_mean,contrast_7_std,genre
0,1784.416546,360.194166,3806.41865,949.383832,2002.657106,292.966459,0.083066,0.02769,0.130192,0.053201,...,4.620148,18.363125,3.628891,18.918739,3.486795,17.190236,2.521577,39.667577,3.032167,blues
1,1529.871314,613.197888,3548.986873,1725.338349,2038.612143,462.482296,0.056044,0.038059,0.095892,0.048724,...,4.812807,19.187871,4.284377,18.324039,3.508001,17.50448,3.131714,37.518985,5.817207,blues
2,1552.637786,395.649207,3041.089944,884.842654,1747.382028,275.954142,0.076301,0.031738,0.175494,0.052465,...,5.001119,19.41725,4.188353,20.344435,3.784576,18.314965,2.922925,39.035077,3.268984,blues
3,1070.110059,429.542379,2185.061787,1222.37231,1596.244204,408.212651,0.033309,0.020569,0.141139,0.079623,...,4.22404,18.636217,3.315072,18.490758,3.328274,16.790485,2.301882,34.278016,4.630242,blues
4,1835.507008,585.947299,3581.003346,1253.847208,1748.367477,297.422362,0.1015,0.044199,0.09154,0.048011,...,4.101753,20.526383,4.327284,20.785517,4.070889,19.713927,3.70846,37.338804,5.124857,blues


In [5]:
df.to_csv('data.csv',index=False)

In [6]:
df.columns

Index(['spectral_centroid_mean', 'spectral_centroid_std',
       'spectral_rolloff_mean', 'spectral_rolloff_std',
       'spectral_bandwidth_mean', 'spectral_bandwidth_std', 'zcr_mean',
       'zcr_std', 'rms_mean', 'rms_std', 'mfcc_1_mean', 'mfcc_1_std',
       'mfcc_2_mean', 'mfcc_2_std', 'mfcc_3_mean', 'mfcc_3_std', 'mfcc_4_mean',
       'mfcc_4_std', 'mfcc_5_mean', 'mfcc_5_std', 'mfcc_6_mean', 'mfcc_6_std',
       'mfcc_7_mean', 'mfcc_7_std', 'mfcc_8_mean', 'mfcc_8_std', 'mfcc_9_mean',
       'mfcc_9_std', 'mfcc_10_mean', 'mfcc_10_std', 'mfcc_11_mean',
       'mfcc_11_std', 'mfcc_12_mean', 'mfcc_12_std', 'mfcc_13_mean',
       'mfcc_13_std', 'chroma_1_mean', 'chroma_1_std', 'chroma_2_mean',
       'chroma_2_std', 'chroma_3_mean', 'chroma_3_std', 'chroma_4_mean',
       'chroma_4_std', 'chroma_5_mean', 'chroma_5_std', 'chroma_6_mean',
       'chroma_6_std', 'chroma_7_mean', 'chroma_7_std', 'chroma_8_mean',
       'chroma_8_std', 'chroma_9_mean', 'chroma_9_std', 'chroma_10_mean',
   

In [7]:
feature_columns = [col for col in df.columns if col != 'genre']

In [8]:
X = df[feature_columns]
y = df['genre']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )

In [10]:
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
gb_clf = GradientBoostingClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(probability=True, random_state=42)
lr_clf = LogisticRegression(random_state=42, max_iter=20000)

In [11]:
rf_clf.fit(X_train,y_train)
y_pred =rf_clf.predict(X_test)
    # Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.7450


In [12]:
gb_clf.fit(X_train, y_train)
y_pred =gb_clf.predict(X_test)
    # Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.6800


In [13]:
svm_clf.fit(X_train, y_train)
y_pred =svm_clf.predict(X_test)
    # Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.3500


In [14]:
lr_clf.fit(X_train, y_train)
y_pred =lr_clf.predict(X_test)
    # Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.7500


STOP: TOTAL NO. OF F,G EVALUATIONS EXCEEDS LIMIT

You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [15]:
# Voting classifier (ensemble)
ensemble_clf = VotingClassifier(
    estimators=[
        ('rf', rf_clf),
        ('gb', gb_clf),
        ('svm', svm_clf),
        ('lr', lr_clf)
    ],
    voting='soft'  # Use probability-based voting
)

ensemble_clf.fit(X_train, y_train)

STOP: TOTAL NO. OF F,G EVALUATIONS EXCEEDS LIMIT

You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0,1,2
,estimators,"[('rf', ...), ('gb', ...), ...]"
,voting,'soft'
,weights,
,n_jobs,
,flatten_transform,True
,verbose,False

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,loss,'log_loss'
,learning_rate,0.1
,n_estimators,100
,subsample,1.0
,criterion,'friedman_mse'
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_depth,3
,min_impurity_decrease,0.0

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'lbfgs'
,max_iter,20000


In [16]:
y_pred =ensemble_clf.predict(X_test)
    # Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.7700


In [17]:
joblib.dump(rf_clf, 'models/rf_clf.joblib')
joblib.dump(gb_clf, 'models/gb_clf.joblib')
joblib.dump(svm_clf, 'models/svm_clf.joblib')
joblib.dump(lr_clf, 'models/lr_clf.joblib')
joblib.dump(ensemble_clf, 'models/ensemble_clf.joblib')

['models/ensemble_clf.joblib']