### Importing Libraries

In [1]:
import os
import librosa
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.image import resize

In [2]:
#Loading Model
model = tf.keras.models.load_model("model/tripathi_model.h5")

In [3]:
classes = ['blues', 'classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']

### Single Audio Preprocessing

In [11]:
# Load and preprocess audio data
def load_and_preprocess_data(file_path, target_shape=(150, 150)):
    data = []
    audio_data, sample_rate = librosa.load(file_path, sr=None)
    # Perform preprocessing (e.g., convert to Mel spectrogram and resize)
    # Define the duration of each chunk and overlap
    chunk_duration = 4  # seconds
    overlap_duration = 2  # seconds
    print(sample_rate)
                
    # Convert durations to samples
    chunk_samples = chunk_duration * sample_rate
    overlap_samples = overlap_duration * sample_rate
                
    # Calculate the number of chunks
    num_chunks = int(np.ceil((len(audio_data) - chunk_samples) / (chunk_samples - overlap_samples))) + 1
                
    # Iterate over each chunk
    for i in range(num_chunks):
                    # Calculate start and end indices of the chunk
        start = i * (chunk_samples - overlap_samples)
        end = start + chunk_samples
                    
                    # Extract the chunk of audio
        chunk = audio_data[start:end]
                    
                    # Compute the Mel spectrogram for the chunk
        mel_spectrogram = librosa.feature.melspectrogram(y=chunk, sr=sample_rate)
                    
                #mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
        mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
        data.append(mel_spectrogram)
    
    return np.array(data)

### Model Prediction

In [16]:
from IPython.display import Audio
# file_path = "../mfcc_genre_detection/datasets/reggae/reggae.00033.wav"
# file_path = "audio1_enhenced.wav"
file_path = "test.wav"
# y, sr = librosa.load(file_path, sr=44100)
# Audio(data=y, rate=sr)
#Processing Test File
X_test = load_and_preprocess_data(file_path)
# print(X_test.shape)
prediction = model.predict(X_test)

genres = {
    "Blues": 0,
    "Classical": 0,
    "Country": 0,
    "Disco": 0,
    "HipHop": 0,
    "Jazz": 0,
    "Metal": 0,
    "Pop": 0,
    "Reggae": 0,
    "Rock": 0,
}
genres_list = list(genres.keys())

for i in range(prediction.shape[0]):
    # print(np.argmax(prediction[i]))
    for idx, v in enumerate(prediction[i]):
        # print('val', genres['Reggae'])
        genres[genres_list[idx]] = ((genres[genres_list[idx]] * 3 )+ v) / 4
        # genres[genres_list[idx]] = genres[genres_list[idx]]+ v 


print(genres)

print("<<========= Genre Detection Accuracy =======>>")
total_val = 0
for genre in genres:
    print("{}\t\t==> {}%".format(genre, round(genres[genre] * 100, 2)))
    total_val = total_val +  round(genres[genre] * 100, 2)

print('total value',total_val)

22050
{'Blues': 2.455440859241325e-07, 'Classical': 7.19126872859535e-10, 'Country': 2.6487469101625256e-06, 'Disco': 0.00041205812478795534, 'HipHop': 0.44067096039137976, 'Jazz': 1.7965058486871723e-07, 'Metal': 1.0291056324755685e-05, 'Pop': 0.5366565873524766, 'Reggae': 0.0024198757358396693, 'Rock': 0.002009231722174808}
Blues		==> 0.0%
Classical		==> 0.0%
Country		==> 0.0%
Disco		==> 0.04%
HipHop		==> 44.07%
Jazz		==> 0.0%
Metal		==> 0.0%
Pop		==> 53.67%
Reggae		==> 0.24%
Rock		==> 0.2%
total value 98.22


In [46]:
#Model Prediction
def model_prediction(X_test):
    y_pred = model.predict(X_test)
    predicted_categories = np.argmax(y_pred,axis=1)
    unique_elements, counts = np.unique(predicted_categories, return_counts=True)
    print(y_pred.shape)
    max_count = np.max(counts)
    print(max_count)
    max_elements = unique_elements[counts == max_count]
    print(max_elements)
    return max_elements[0]

In [58]:
#Model Prediction
c_index = model_prediction(X_test)
print(f"Model Prediction :: Music Genre --> {classes[c_index]}")

(15, 10)
6
[1]
Model Prediction :: Music Genre --> classical
