# Extracting features
We will now extract features like `MFCC`, `Chroma features`, `Spectrum features`, `Zero Crossing Rates`, `Tonnetz` and `Root Mean Square Energy`.
- These features are concatenated and are further appended to list named `X`. The genre names are added to variable `y` which is basically our label.
- The try-except block is used to handle errors if Librosa cannot process the file.
- Then these features are normalized using `StandardScaler`.
- The labels are converted to numeric labels using `Label Encoder`.
- The data has been divided in the ratio 80:20.


In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler  # Import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split, cross_val_score  # Import cross_val_score
from xgboost import XGBClassifier, plot_importance
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import librosa
from sklearn.metrics import accuracy_score, classification_report
import os


# Create lists to hold features and labels
X = []  # Keep as list
y = []  # Keep as list

# Iterate through each genre folder and each audio file inside the folder
for genre in genres:
    genre_dir = os.path.join(data_dir, genre)
    # Ensure it processes .wav files only
    audio_files = [f for f in os.listdir(genre_dir) if f.endswith('.wav')]  
    for file in audio_files:
        print(f"Processing audio file: {file} from genre: {genre}")
        file_path = os.path.join(genre_dir, file)
        try:
            # Load the audio file
            audio, sr = librosa.load(file_path, sr=None)
            
            # Extract MFCC features using librosa
            mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
            mfccs_mean = np.mean(mfccs.T, axis=0)  # Get mean of MFCC features

            # Chroma features
            chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
            chroma_mean = np.mean(chroma.T, axis=0)

            # Spectral Contrast
            spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
            spectral_contrast_mean = np.mean(spectral_contrast.T, axis=0)

            # Zero-Crossing Rate
            zcr = librosa.feature.zero_crossing_rate(y=audio)
            zcr_mean = np.mean(zcr.T, axis=0)

            # Root Mean Square Energy
            rmse = librosa.feature.rms(y=audio)
            rmse_mean = np.mean(rmse.T, axis=0)

            # Spectral Centroid
            spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)
            spectral_centroid_mean = np.mean(spectral_centroid.T, axis=0)

            # Spectral Bandwidth
            spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr)
            spectral_bandwidth_mean = np.mean(spectral_bandwidth.T, axis=0)

            # Spectral Flatness
            spectral_flatness = librosa.feature.spectral_flatness(y=audio)
            spectral_flatness_mean = np.mean(spectral_flatness.T, axis=0)

            # Tonnetz
            tonnetz = librosa.feature.tonnetz(y=audio, sr=sr)
            tonnetz_mean = np.mean(tonnetz.T, axis=0)

            # Combine features into a single feature vector
            features = np.concatenate((
                mfccs_mean, chroma_mean, spectral_contrast_mean,
                zcr_mean, rmse_mean, spectral_centroid_mean,
                spectral_bandwidth_mean, spectral_flatness_mean,
                tonnetz_mean
            ))

            # Append features and corresponding label
            X.append(features)  # Append to list
            y.append(genre)     # Store the genre as the label
            
        except Exception as e:
            print(f"Error loading {file}: {e}")  # Catch and print any errors during file loading

In [None]:

# Convert lists to numpy arrays after processing all files
X = np.array(X)  # Convert to numpy array
y = np.array(y)  # Convert to numpy array

# Check if X and y are populated correctly
print(f"Number of feature vectors: {len(X)}")
print(f"Number of labels: {len(y)}")

if len(X) > 0:
    print(f"Sample features: {X[0]}")
    print(f"Sample label: {y[0]}")

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features using StandardScaler
scaler = StandardScaler() 
X_train = scaler.fit_transform(X_train)  # Fit to the training data and transform
X_test = scaler.transform(X_test)  # Only transform the test data

# Label encode the genre labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)  # Converts genre names to numeric labels
y_test_encoded = label_encoder.transform(y_test)