In [None]:
# Day 2 - Improved Music Genre Classification with More Features + KNN

import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
from joblib import dump, load

# 🔹 Dataset path - update this to your dataset location
DATASET_PATH = r"F:\projects\personal projects\Music-Genre-classifier\data\genre"

# 🔹 Get list of genres (folder names)
genres = os.listdir(DATASET_PATH)
print("🎵 Genres found:", genres)

# ✅ Function to extract multiple features from an audio file
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, duration=15)
        
        # MFCC (Mel Frequency Cepstral Coefficients) – voice texture
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfcc_mean = np.mean(mfcc.T, axis=0)
        
        # Chroma – harmony/melody (like musical chords)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_mean = np.mean(chroma.T, axis=0)

        # Spectral Contrast – sharp vs soft tones
        contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        contrast_mean = np.mean(contrast.T, axis=0)

        # Zero Crossing Rate – how noisy the signal is
        zcr = librosa.feature.zero_crossing_rate(y)
        zcr_mean = np.mean(zcr.T, axis=0)

        # Tempo – speed of the beat
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        tempo_feature = np.array([tempo])

        # 🧠 Combine all features into one array (make sure all are 1D arrays)
        features = np.hstack([mfcc_mean, chroma_mean, contrast_mean, zcr_mean, tempo_feature])
        return features

    except Exception as e:
        print(f"⚠️ Error processing {file_path}: {e}")
        return None

# 🔹 Create features and labels
features = []
labels = []

for genre in genres:
    genre_path = os.path.join(DATASET_PATH, genre)
    for filename in os.listdir(genre_path):
        if filename.endswith(".wav"):
            file_path = os.path.join(genre_path, filename)
            data = extract_features(file_path)
            if data is not None:
                features.append(data)
                labels.append(genre)

# ✅ Convert to numpy arrays
X = np.array(features)
y = np.array(labels)

print("✅ Feature matrix shape:", X.shape)
print("✅ Labels shape:", y.shape)

# 🧼 Scale features so KNN works better
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ✂️ Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 🤖 Train KNN model
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)

# 🔍 Predict
y_pred = model.predict(X_test)

# 📊 Evaluation
print("\n✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📄 Classification Report:")
print(classification_report(y_test, y_pred))

# 📉 Confusion Matrix
cm = confusion_matrix(y_test, y_pred, labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
disp.plot(xticks_rotation='vertical')
plt.title("🎯 Confusion Matrix")
plt.tight_layout()
plt.show()

# 🔹 Save model to disk for later use
dump(model, 'knn_music_genre_model.joblib')
print("✅ Model saved successfully!")

# ✅ Test your own songs
def predict_genre_for_song(song_path):
    # Extract features for your song (same as before)
    features = extract_features(song_path)
    
    # Check if features were successfully extracted
    if features is not None:
        # Scale features using the same scaler
        features_scaled = scaler.transform([features])
        
        # Predict genre using the trained model
        predicted_genre = model.predict(features_scaled)
        
        print(f"🎵 The predicted genre for '{song_path}' is: {predicted_genre[0]}")
    else:
        print(f"⚠️ Could not extract features for {song_path}")

# 📂 Loop through your list of songs and predict their genres
my_songs_folder = './my_songs/'  # Folder where your songs are located
for song in os.listdir(my_songs_folder):
    if song.endswith(".wav"):
        song_path = os.path.join(my_songs_folder, song)
        predict_genre_for_song(song_path)


🎵 Genres found: ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
⚠️ Error processing F:\projects\personal projects\Music-Genre-classifier\data\genre\blues\blues.00000.wav: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 4 has 2 dimension(s)
⚠️ Error processing F:\projects\personal projects\Music-Genre-classifier\data\genre\blues\blues.00001.wav: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 4 has 2 dimension(s)
⚠️ Error processing F:\projects\personal projects\Music-Genre-classifier\data\genre\blues\blues.00002.wav: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 4 has 2 dimension(s)
⚠️ Error processing F:\projects\personal projects\Music-Genre-classifier\data\genre\blues\blues.00003.wav: all the input arrays mu

KeyboardInterrupt: 