In [12]:
import librosa
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV

In [2]:
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop','reggae','rock']
dataset = []

In [3]:
for genre in genres:
    folder = f"Data/genres_original/{genre}"
    print(f"\n🎵 Processing genre: {genre}")

    for filename in os.listdir(folder):
        if filename.endswith(".wav"):
            file_path = os.path.join(folder, filename)

            try:
                # Load audio
                y, sr = librosa.load(file_path, sr=None, duration=30)

                # Extract features
                mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
                mfccs_mean = np.mean(mfccs, axis=1)
                mfccs_var = np.var(mfccs, axis=1)

                chroma = librosa.feature.chroma_stft(y=y, sr=sr)
                chroma_mean = np.mean(chroma, axis=1)

                spec_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
                spec_bw = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
                rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
                zcr = np.mean(librosa.feature.zero_crossing_rate(y))

                tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

                # Combine into one feature vector
                features = np.hstack([
                    mfccs_mean, mfccs_var,
                    chroma_mean,
                    spec_centroid, spec_bw, rolloff, zcr, tempo
                ])

                dataset.append([features, genre])
                print(f"✅ Extracted features from: {filename}")

            except Exception as e:
                print(f"❌ Failed on: {filename}, Skipping!")



🎵 Processing genre: blues
✅ Extracted features from: blues.00000.wav
✅ Extracted features from: blues.00001.wav
✅ Extracted features from: blues.00002.wav
✅ Extracted features from: blues.00003.wav
✅ Extracted features from: blues.00004.wav
✅ Extracted features from: blues.00005.wav
✅ Extracted features from: blues.00006.wav
✅ Extracted features from: blues.00007.wav
✅ Extracted features from: blues.00008.wav
✅ Extracted features from: blues.00009.wav
✅ Extracted features from: blues.00010.wav
✅ Extracted features from: blues.00011.wav
✅ Extracted features from: blues.00012.wav
✅ Extracted features from: blues.00013.wav
✅ Extracted features from: blues.00014.wav
✅ Extracted features from: blues.00015.wav
✅ Extracted features from: blues.00016.wav
✅ Extracted features from: blues.00017.wav
✅ Extracted features from: blues.00018.wav
✅ Extracted features from: blues.00019.wav
✅ Extracted features from: blues.00020.wav
✅ Extracted features from: blues.00021.wav
✅ Extracted features from: 

  y, sr = librosa.load(file_path, sr=None, duration=30)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


✅ Extracted features from: jazz.00055.wav
✅ Extracted features from: jazz.00056.wav
✅ Extracted features from: jazz.00057.wav
✅ Extracted features from: jazz.00058.wav
✅ Extracted features from: jazz.00059.wav
✅ Extracted features from: jazz.00060.wav
✅ Extracted features from: jazz.00061.wav
✅ Extracted features from: jazz.00062.wav
✅ Extracted features from: jazz.00063.wav
✅ Extracted features from: jazz.00064.wav
✅ Extracted features from: jazz.00065.wav
✅ Extracted features from: jazz.00066.wav
✅ Extracted features from: jazz.00067.wav
✅ Extracted features from: jazz.00068.wav
✅ Extracted features from: jazz.00069.wav
✅ Extracted features from: jazz.00070.wav
✅ Extracted features from: jazz.00071.wav
✅ Extracted features from: jazz.00072.wav
✅ Extracted features from: jazz.00073.wav
✅ Extracted features from: jazz.00074.wav
✅ Extracted features from: jazz.00075.wav
✅ Extracted features from: jazz.00076.wav
✅ Extracted features from: jazz.00077.wav
✅ Extracted features from: jazz.00

In [4]:
# Convert to DataFrame
X = np.array([x[0] for x in dataset])
y = np.array([x[1] for x in dataset])
df = pd.DataFrame(X)
df['genre'] = y
X = df.iloc[:, :-1]
y = df['genre']

y

0      blues
1      blues
2      blues
3      blues
4      blues
       ...  
994     rock
995     rock
996     rock
997     rock
998     rock
Name: genre, Length: 999, dtype: object

In [5]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=1)
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)


In [6]:
model = XGBClassifier(random_state=1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, y_pred))


Accuracy:  0.65


In [11]:
#Hyperparameter Tuning
param_grid = {
    'n_estimators': [100, 200, 300, 500],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [4,6,8],
    'min_child_weight': [1, 3, 5],
    'subsample': [0.7, 0.85, 1.0],
    'colsample_bytree': [0.7, 0.85, 1.0],
    'reg_alpha': [0, 0.01, 0.1, 1, 10, 100],
    'reg_lambda': [0.5, 0.7, 1, 1.3]
}

random_search = RandomizedSearchCV(param_distributions = param_grid, estimator = model, cv=10, scoring = 'accuracy', n_jobs=8, n_iter=100, random_state=1)

random_search.fit(X_train, y_train)
best_xgb_model = random_search.best_estimator_
print("Best Parameters: ", random_search.best_params_)
print("Best Accuracy: ", random_search.best_score_)

Best Parameters:  {'subsample': 0.7, 'reg_lambda': 1, 'reg_alpha': 0.01, 'n_estimators': 500, 'min_child_weight': 1, 'max_depth': 8, 'learning_rate': 0.2, 'colsample_bytree': 0.85}
Best Accuracy:  0.6896518987341771
