In [27]:
import librosa
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
import joblib

def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)  # loading original music composition

    # Extract features in the order specified in the dataset
    features = []
    features.append(661794) #placeholder for 'length' value in dataset that doesn't play a role in classification

    # Chroma STFT
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    features.extend([np.mean(chroma_stft), np.var(chroma_stft)])

    # RMS
    rms = librosa.feature.rms(y=y)
    features.extend([np.mean(rms), np.var(rms)])

    # Spectral features
    spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)
    features.extend([np.mean(spectral_centroids), np.var(spectral_centroids)])

    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    features.extend([np.mean(spectral_bandwidth), np.var(spectral_bandwidth)])

    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    features.extend([np.mean(rolloff), np.var(rolloff)])

    # Zero Crossing Rate
    zero_crossings = librosa.feature.zero_crossing_rate(y)
    features.extend([np.mean(zero_crossings), np.var(zero_crossings)])

    # Harmony and Perceptrum
    harmony, perceptr = librosa.effects.hpss(y)
    features.extend([np.mean(harmony), np.var(harmony), np.mean(perceptr), np.var(perceptr)])

    # Tempo
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    features.append(tempo)

    # MFCCs
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    for i in range(20):
        features.extend([np.mean(mfccs[i]), np.var(mfccs[i])])

    return np.array(features)

def predict_genre(model, scaler, label_encoder, features):
    scaled_features = scaler.transform([features])
    reshaped_features = scaled_features.reshape(1, scaled_features.shape[1], 1)
    prediction = model.predict(reshaped_features)
    predicted_index = np.argmax(prediction, axis=1)
    predicted_genre = label_encoder.inverse_transform(predicted_index)
    return predicted_genre[0]

# Example usage
file_path = '/content/metal - Break It Down.wav'
features = extract_features(file_path)
model = load_model('/content/hyperparameter_optimized.h5')
scaler = joblib.load('/content/scaler.pkl')  # Load the fitted scaler
label_encoder = joblib.load('/content/label_encoder.pkl')  # Load the fitted label encoder

print(predict_genre(model, scaler, label_encoder, features))




reggae
