In [17]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

In [18]:
# Load dataset
DATASET_PATH = "C:/Users/krishna kaanth reddy/Downloads/college/archive/Data/genres_original"
#genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
def load_dataset(dataset_path):
    X = []
    y = []
    for folder in os.listdir(dataset_path):
        folder_path = os.path.join(dataset_path, folder)
        if os.path.isdir(folder_path):
            for filename in os.listdir(folder_path):
                file_path = os.path.join(folder_path, filename)
                if filename.endswith(".wav"):
                    # Load audio file
                    signal, sr = librosa.load(file_path, sr=22050)
                    # Extract features
                    mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
                    mfccs_mean = np.mean(mfccs, axis=1)
                    # Add to dataset
                    X.append(mfccs_mean)
                    y.append(folder)
    return np.array(X), np.array(y)

X, y = load_dataset(DATASET_PATH)

In [19]:
# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [20]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [21]:
# Train machine learning model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [22]:
# Evaluate model performance
train_accuracy = model.score(X_train, y_train)
test_accuracy = model.score(X_test, y_test)
print(f"Train accuracy: {train_accuracy}")
print(f"Test accuracy: {test_accuracy1}")

Train accuracy: 1.0
Test accuracy: 0.79


In [23]:
# Load the new audio file
new_audio_file = "C:/Users/krishna kaanth reddy/Downloads/college/archive/Data/sample_1/rl.wav"
signal, sr = librosa.load(new_audio_file, sr=22050)

# Extract features
mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
mfccs_mean = np.mean(mfccs, axis=1)

# Reshape features
mfccs_mean = mfccs_mean.reshape(1, -1)

# Make prediction
predicted_label = model.predict(mfccs_mean)

# Convert predicted label to original class label
predicted_class = le.inverse_transform(predicted_label)[0]

# Debugging statements
print(f"Input shape: {mfccs_mean.shape}")
print(f"Predicted label: {predicted_label}")
print(f"Predicted class: {predicted_class}")
print(f"Label encoder classes: {le.classes_}")
print(f"Label encoder mapping: {dict(zip(le.classes_, le.transform(le.classes_)))}")


Input shape: (1, 13)
Predicted label: [3]
Predicted class: disco
Label encoder classes: ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop'
 'reggae' 'rock']
Label encoder mapping: {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}


In [34]:
#Make prediction on new audio file
file_path = "C:/Users/krishna kaanth reddy/Downloads/college/archive/Data/sample_1/rl.wav"
signal, sr = librosa.load(file_path, sr=22050)
mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
mfccs_mean = np.mean(mfccs, axis=1)
X_new = np.array([mfccs_mean])
y_pred = model.predict(X_new)
print(f"Predicted genre: {le.inverse_transform(y_pred)}")

Predicted genre: ['disco']


In [35]:
#Make prediction on new audio file
file_path = "C:/Users/krishna kaanth reddy/Downloads/college/archive/Data/sample_1/r2.wav"
signal, sr = librosa.load(file_path, sr=22050)
mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
mfccs_mean = np.mean(mfccs, axis=1)
X_new = np.array([mfccs_mean])
y_pred = model.predict(X_new)
print(f"Predicted genre: {le.inverse_transform(y_pred)}")

Predicted genre: ['classical']


In [36]:
#Make prediction on new audio file
file_path = "C:/Users/krishna kaanth reddy/Downloads/college/archive/Data/sample_1/r3.wav"
signal, sr = librosa.load(file_path, sr=22050)
mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
mfccs_mean = np.mean(mfccs, axis=1)
X_new = np.array([mfccs_mean])
y_pred = model.predict(X_new)
print(f"Predicted genre: {le.inverse_transform(y_pred)}")

Predicted genre: ['reggae']


In [37]:
#Make prediction on new audio file
file_path = "C:/Users/krishna kaanth reddy/Downloads/college/archive/Data/sample_1/r4.wav"
signal, sr = librosa.load(file_path, sr=22050)
mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
mfccs_mean = np.mean(mfccs, axis=1)
X_new = np.array([mfccs_mean])
y_pred = model.predict(X_new)
print(f"Predicted genre: {le.inverse_transform(y_pred)}")

Predicted genre: ['disco']


In [38]:
#Make prediction on new audio file
file_path = "C:/Users/krishna kaanth reddy/Downloads/college/archive/Data/sample_1/r5.wav"
signal, sr = librosa.load(file_path, sr=22050)
mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
mfccs_mean = np.mean(mfccs, axis=1)
X_new = np.array([mfccs_mean])
y_pred = model.predict(X_new)
print(f"Predicted genre: {le.inverse_transform(y_pred)}")

Predicted genre: ['reggae']


In [39]:
#Make prediction on new audio file
file_path = "C:/Users/krishna kaanth reddy/Downloads/college/archive/Data/sample_1/r6.wav"
signal, sr = librosa.load(file_path, sr=22050)
mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
mfccs_mean = np.mean(mfccs, axis=1)
X_new = np.array([mfccs_mean])
y_pred = model.predict(X_new)
print(f"Predicted genre: {le.inverse_transform(y_pred)}")

Predicted genre: ['country']


In [25]:
#Make prediction on new audio file
file_path = "C:/Users/krishna kaanth reddy/Downloads/college/archive/Data/hhh/abc.wav"
signal, sr = librosa.load(file_path, sr=22050)
mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
mfccs_mean = np.mean(mfccs, axis=1)
X_new = np.array([mfccs_mean])
y_pred = model.predict(X_new)
print(f"Predicted genre: {le.inverse_transform(y_pred)}")

Predicted genre: ['disco']


In [40]:
# Test model performance with cross validation
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y_encoded, cv=5)
print(f"Cross-validation scores: {scores}")
print(f"Average cross-validation score: {np.mean(scores)}")

Cross-validation scores: [0.37      0.535     0.475     0.425     0.3919598]
Average cross-validation score: 0.4393919597989949


In [41]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Train SVM with RBF kernel
svm = SVC(kernel='rbf')
svm.fit(X_train, y_train)

# Evaluate on test set
y_pred = svm.predict(X_test)
print(classification_report(y_test, y_pred, target_names=le.classes_))

              precision    recall  f1-score   support

       blues       0.17      0.14      0.15        21
   classical       0.75      1.00      0.86        12
     country       0.44      0.17      0.24        24
       disco       1.00      0.05      0.09        22
      hiphop       0.33      0.40      0.36        15
        jazz       0.70      0.59      0.64        27
       metal       0.46      0.89      0.60        18
         pop       0.46      0.84      0.59        19
      reggae       0.37      0.32      0.34        22
        rock       0.23      0.30      0.26        20

    accuracy                           0.43       200
   macro avg       0.49      0.47      0.41       200
weighted avg       0.49      0.43      0.39       200

