In [None]:
import librosa
import pandas as pd

def extract_features(wav_file_path):
    try:
        # Load the audio file
        audio, sr = librosa.load(wav_file_path)

        # Calculate features using librosa
        features = {
            'length': len(audio),
            'chroma_stft_mean': librosa.feature.chroma_stft(y=audio, sr=sr).mean(),
            'chroma_stft_var': librosa.feature.chroma_stft(y=audio, sr=sr).var(),
            'rms_mean': librosa.feature.rms(y=audio).mean(),
            'rms_var': librosa.feature.rms(y=audio).var(),
            'spectral_centroid_mean': librosa.feature.spectral_centroid(y=audio, sr=sr).mean(),
            'spectral_centroid_var': librosa.feature.spectral_centroid(y=audio, sr=sr).var(),
            'spectral_bandwidth_mean': librosa.feature.spectral_bandwidth(y=audio, sr=sr).mean(),
            'spectral_bandwidth_var': librosa.feature.spectral_bandwidth(y=audio, sr=sr).var(),
            'rolloff_mean': librosa.feature.spectral_rolloff(y=audio, sr=sr).mean(),
            'rolloff_var': librosa.feature.spectral_rolloff(y=audio, sr=sr).var(),
            'zero_crossing_rate_mean': librosa.feature.zero_crossing_rate(y=audio).mean(),
            'zero_crossing_rate_var': librosa.feature.zero_crossing_rate(y=audio).var(),
            'harmony_mean': librosa.effects.harmonic(audio).mean(),
            'harmony_var': librosa.effects.harmonic(audio).var(),
            'perceptr_mean': librosa.effects.percussive(audio).mean(),
            'perceptr_var': librosa.effects.percussive(audio).var(),
            'tempo': librosa.beat.tempo(y=audio, sr=sr)[0],
        }

        # Calculate MFCCs
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20)
        for i in range(1, 21):
            features[f'mfcc{i}_mean'] = mfccs[i-1].mean()
            features[f'mfcc{i}_var'] = mfccs[i-1].var()

        return features
    except Exception as e:
        print("Error occurred while extracting features:", e)
        return None

# Example usage
wav_file_path = '/content/drive/MyDrive/Music Genre Classification/genres_original/metal/metal.00020.wav'
features = extract_features(wav_file_path)

if features is not None:
    features_df = pd.DataFrame(features, index=[0])
    print(features_df)
else:
    print("Failed to extract features from the WAV file.")


	This function was moved to 'librosa.feature.rhythm.tempo' in librosa version 0.10.0.
	This alias will be removed in librosa version 1.0.
  'tempo': librosa.beat.tempo(y=audio, sr=sr)[0],


   length  chroma_stft_mean  chroma_stft_var  rms_mean   rms_var  \
0  661504          0.496008          0.07287  0.117267  0.000866   

   spectral_centroid_mean  spectral_centroid_var  spectral_bandwidth_mean  \
0             2657.967496          189113.304146              2345.687162   

   spectral_bandwidth_var  rolloff_mean  ...  mfcc16_mean  mfcc16_var  \
0            32712.829762    5358.28696  ...      5.08249   27.755531   

   mfcc17_mean  mfcc17_var  mfcc18_mean  mfcc18_var  mfcc19_mean  mfcc19_var  \
0    -10.67766   26.512857     3.868277   25.586306    -4.939446   24.455713   

   mfcc20_mean  mfcc20_var  
0     3.259207   25.145039  

[1 rows x 58 columns]


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

# Load the pre-trained deep learning model
model = tf.keras.models.load_model('/content/92percentmodel.h5')
# Function to predict genre from extracted features
def predict_genre(features):
    try:
        # Normalize the features (if necessary)
        # You can normalize the features using the same scaling used during training

        # Predict genre using the model
        prediction = model.predict(features)

        # Assuming prediction is a one-hot encoded vector, get the index of the maximum value
        predicted_class_index = np.argmax(prediction)

        # Map the predicted class index to genre label
        genre_labels = [
            'Blues', 'Classical', 'Country', 'Disco', 'HipHop',
            'Jazz', 'Metal', 'Pop', 'Reggae', 'Rock'
        ]
        predicted_genre = genre_labels[predicted_class_index]

        return predicted_genre
    except Exception as e:
        print("Error occurred while predicting genre:", e)
        return None

# Example usage
def predict_genre_from_features(features_df):
    try:
        # Convert features DataFrame to numpy array
        features = features_df.to_numpy()

        # Predict genre
        predicted_genre = predict_genre(features)
        if predicted_genre:
            print("Predicted genre:", predicted_genre)
        else:
            print("Failed to predict genre.")
    except Exception as e:
        print("Error occurred:", e)

# Example usage
# Assuming you have extracted features stored in a DataFrame named 'features_df'
predict_genre_from_features(features_df)


Predicted genre: Pop


In [None]:
#I will give input wala code
import numpy as np
import tensorflow as tf

# Load the pre-trained deep learning model
model = tf.keras.models.load_model('/content/92percentmodel.h5')

# Function to predict genre from individual features
def predict_genre_from_input(length, chroma_stft_mean, chroma_stft_var, rms_mean, rms_var,
                             spectral_centroid_mean, spectral_centroid_var, spectral_bandwidth_mean,
                             spectral_bandwidth_var, rolloff_mean, rolloff_var, zero_crossing_rate_mean,
                             zero_crossing_rate_var, harmony_mean, harmony_var, perceptr_mean, perceptr_var,
                             tempo, mfcc1_mean, mfcc1_var, mfcc2_mean, mfcc2_var, mfcc3_mean, mfcc3_var,
                             mfcc4_mean, mfcc4_var, mfcc5_mean, mfcc5_var, mfcc6_mean, mfcc6_var, mfcc7_mean,
                             mfcc7_var, mfcc8_mean, mfcc8_var, mfcc9_mean, mfcc9_var, mfcc10_mean, mfcc10_var,
                             mfcc11_mean, mfcc11_var, mfcc12_mean, mfcc12_var, mfcc13_mean, mfcc13_var,
                             mfcc14_mean, mfcc14_var, mfcc15_mean, mfcc15_var, mfcc16_mean, mfcc16_var,
                             mfcc17_mean, mfcc17_var, mfcc18_mean, mfcc18_var, mfcc19_mean, mfcc19_var,
                             mfcc20_mean, mfcc20_var):
    try:
        # Create a numpy array with the input features
        features = np.array([[length, chroma_stft_mean, chroma_stft_var, rms_mean, rms_var,
                              spectral_centroid_mean, spectral_centroid_var, spectral_bandwidth_mean,
                              spectral_bandwidth_var, rolloff_mean, rolloff_var, zero_crossing_rate_mean,
                              zero_crossing_rate_var, harmony_mean, harmony_var, perceptr_mean, perceptr_var,
                              tempo, mfcc1_mean, mfcc1_var, mfcc2_mean, mfcc2_var, mfcc3_mean, mfcc3_var,
                              mfcc4_mean, mfcc4_var, mfcc5_mean, mfcc5_var, mfcc6_mean, mfcc6_var, mfcc7_mean,
                              mfcc7_var, mfcc8_mean, mfcc8_var, mfcc9_mean, mfcc9_var, mfcc10_mean, mfcc10_var,
                              mfcc11_mean, mfcc11_var, mfcc12_mean, mfcc12_var, mfcc13_mean, mfcc13_var,
                              mfcc14_mean, mfcc14_var, mfcc15_mean, mfcc15_var, mfcc16_mean, mfcc16_var,
                              mfcc17_mean, mfcc17_var, mfcc18_mean, mfcc18_var, mfcc19_mean, mfcc19_var,
                              mfcc20_mean, mfcc20_var]])

        # Predict genre using the model
        prediction = model.predict(features)

        # Assuming prediction is a one-hot encoded vector, get the index of the maximum value
        predicted_class_index = np.argmax(prediction)

        # Map the predicted class index to genre label
        genre_labels = [
            'Blues', 'Classical', 'Country', 'Disco', 'HipHop',
            'Jazz', 'Metal', 'Pop', 'Reggae', 'Rock'
        ]
        predicted_genre = genre_labels[predicted_class_index]

        return predicted_genre
    except Exception as e:
        print("Error occurred while predicting genre:", e)
        return None

# Example usage
# Call the predict_genre_from_input function with the provided feature values
predicted_genre = predict_genre_from_input(length=66149,
                                           chroma_stft_mean=0.265236974,
                                           chroma_stft_var=0.083201207,
                                           rms_mean=0.042729616,
                                           rms_var=0.000206827,
                                           spectral_centroid_mean=2207.345907,
                                           spectral_centroid_var=199852.0379,
                                           spectral_bandwidth_mean=2183.78897,
                                           spectral_bandwidth_var=57374.92353,
                                           rolloff_mean=4472.197829,
                                           rolloff_var=817302.1916,
                                           zero_crossing_rate_mean=0.100991587,
                                           zero_crossing_rate_var=0.00114156,
                                           harmony_mean=-2.92E-05,
                                           harmony_var=0.000807024,
                                           perceptr_mean=-2.38E-05,
                                           perceptr_var=0.000535287,
                                           tempo=161.4990234,
                                           mfcc1_mean=-219.4849701,
                                           mfcc1_var=1027.824219,
                                           mfcc2_mean=96.54521179,
                                           mfcc2_var=390.881958,
                                           mfcc3_mean=-19.1740818,
                                           mfcc3_var=213.9888153,
                                           mfcc4_mean=48.72191238,
                                           mfcc4_var=101.8740387,
                                           mfcc5_mean=-17.60900497,
                                           mfcc5_var=96.43577576,
                                           mfcc6_mean=13.07938862,
                                           mfcc6_var=86.95465851,
                                           mfcc7_mean=-14.81746769,
                                           mfcc7_var=55.30488586,
                                           mfcc8_mean=12.5872221,
                                           mfcc8_var=56.38642502,
                                           mfcc9_mean=-26.74412918,
                                           mfcc9_var=43.41917801,
                                           mfcc10_mean=16.76107597,
                                           mfcc10_var=33.215065,
                                           mfcc11_mean=-13.62707329,
                                           mfcc11_var=28.61709785,
                                           mfcc12_mean=7.229446411,
                                           mfcc12_var=57.26486969,
                                           mfcc13_mean=-7.467717171,
                                           mfcc13_var=60.75220108,
                                           mfcc14_mean=1.515818954,
                                           mfcc14_var=56.58996964,
                                           mfcc15_mean=-13.74907207,
                                           mfcc15_var=60.15317535,
                                           mfcc16_mean=2.883044243,
                                           mfcc16_var=90.80250549,
                                           mfcc17_mean=-12.67630005,
                                           mfcc17_var=100.091423,
                                           mfcc18_mean=-0.636478841,
                                           mfcc18_var=136.4545593,
                                           mfcc19_mean=-7.841148853,
                                           mfcc19_var=91.96411896,
                                           mfcc20_mean=-6.169021606,
                                           mfcc20_var=101.2950897)




if predicted_genre:
    print("Predicted genre:", predicted_genre)
else:
    print("Failed to predict genre.")




Predicted genre: Pop


In [None]:
# Provided feature values
length = 66149
chroma_stft_mean = 0.458405405
chroma_stft_var = 0.074251935
rms_mean = 0.281134576
rms_var = 0.000989014
spectral_centroid_mean = 2576.742609
spectral_centroid_var = 99402.06565
spectral_bandwidth_mean = 2057.826757
spectral_bandwidth_var = 43261.36774
rolloff_mean = 4821.03572
rolloff_var = 477254.3997
zero_crossing_rate_mean = 0.15954402
zero_crossing_rate_var = 0.001309733
harmony_mean = -7.87E-05
harmony_var = 0.033878487
perceptr_mean = -0.000353842
perceptr_var = 0.017359992
tempo = 92.28515625
mfcc1_mean = 26.3681221
mfcc1_var = 198.711731
mfcc2_mean = 88.54369354
mfcc2_var = 196.2245789
mfcc3_mean = -55.95457458
mfcc3_var = 139.2305298
mfcc4_mean = 54.98015594
mfcc4_var = 131.5715332
mfcc5_mean = -15.47372532
mfcc5_var = 75.48957062
mfcc6_mean = 25.20189667
mfcc6_var = 78.37755585
mfcc7_mean = -18.45191574
mfcc7_var = 70.73079681
mfcc8_mean = 30.10690689
mfcc8_var = 43.34566879
mfcc9_mean = -13.56456852
mfcc9_var = 68.17417145
mfcc10_mean = 15.94600773
mfcc10_var = 36.49228668
mfcc11_mean = -13.82757187
mfcc11_var = 37.44901657
mfcc12_mean = 9.812178612
mfcc12_var = 68.3144455
mfcc13_mean = -13.63992405
mfcc13_var = 63.24339676
mfcc14_mean = 4.452778339
mfcc14_var = 42.78804398
mfcc15_mean = -8.288208008
mfcc15_var = 43.14927292
mfcc16_mean = 7.867574692
mfcc16_var = 30.88098335
mfcc17_mean = -10.36344242
mfcc17_var = 29.13957787
mfcc18_mean = -3.454397917
mfcc18_var = 26.41159821
mfcc19_mean = -8.083053589
mfcc19_var = 45.22610855
mfcc20_mean = -3.777705193
mfcc20_var = 25.2562809

# Predict genre
predicted_genre = predict_genre_from_input(length, chroma_stft_mean, chroma_stft_var, rms_mean, rms_var,
                                           spectral_centroid_mean, spectral_centroid_var, spectral_bandwidth_mean,
                                           spectral_bandwidth_var, rolloff_mean, rolloff_var,
                                           zero_crossing_rate_mean, zero_crossing_rate_var, harmony_mean,
                                           harmony_var, perceptr_mean, perceptr_var, tempo, mfcc1_mean, mfcc1_var,
                                           mfcc2_mean, mfcc2_var, mfcc3_mean, mfcc3_var, mfcc4_mean, mfcc4_var,
                                           mfcc5_mean, mfcc5_var, mfcc6_mean, mfcc6_var, mfcc7_mean, mfcc7_var,
                                           mfcc8_mean, mfcc8_var, mfcc9_mean, mfcc9_var, mfcc10_mean, mfcc10_var,
                                           mfcc11_mean, mfcc11_var, mfcc12_mean, mfcc12_var, mfcc13_mean, mfcc13_var,
                                           mfcc14_mean, mfcc14_var, mfcc15_mean, mfcc15_var, mfcc16_mean, mfcc16_var,
                                           mfcc17_mean, mfcc17_var, mfcc18_mean, mfcc18_var, mfcc19_mean, mfcc19_var,
                                           mfcc20_mean, mfcc20_var)

# Print predicted genre
if predicted_genre:
    print("Predicted genre:", predicted_genre)
else:
    print("Failed to predict genre.")


Predicted genre: Pop
