In [2]:
import json
import os
import math
import librosa
from tensorflow.keras.models import load_model
import numpy as np

In [24]:
model = load_model("./model/cnn__genre_detection.h5")
SAMPLE_RATE = 22050
TRACK_DURATION = 30  # second
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION
file_path = "../mfcc_genre_detection/datasets/jazz/jazz.00069.wav"


def extract_mfcc(file_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
    # dictionary to store mapping, labels, and MFCCs
    data = []

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)

    for d in range(num_segments):
        # calculate start and finish sample for current segment
        start = samples_per_segment * d
        finish = start + samples_per_segment

        # extract mfcc
        mfcc = librosa.feature.mfcc(
            y=signal[start:finish],
            sr=sample_rate,
            n_mfcc=num_mfcc,
            n_fft=n_fft,
            hop_length=hop_length,
        )
        mfcc = mfcc.T

        # store only mfcc feature with expected number of vectors
        if len(mfcc) == num_mfcc_vectors_per_segment:
            data.append(mfcc.tolist())

    return data


val = extract_mfcc(file_path=file_path, num_segments=10)
# print(val)
val = np.array(val)
print(val.shape)
val = val[..., np.newaxis]

percentage = 0
genres = {"Blues" : 0,
            "Classical" : 0,
            "Country" : 0,
            "Disco" : 0,
            "HipHop" : 0,
            "Jazz" : 0,"Metal" : 0,
            "Pop" : 0,
            "Reggae" : 0,
            "Rock" : 0}

accuracy = 0
sum_accuracy = 0
min_acc = 0
# for i in range(val.shape[0]) :
#     segmen = val[i][np.newaxis, ...]
#     prediction = model.predict(segmen)
#     print(np.max(prediction))
#     accuracy = accuracy + np.max(prediction)

# print(accuracy)
    
prediction = model.predict(val)
print(prediction.shape)

(10, 130, 13)
(10, 10)


In [25]:
for i in range(prediction.shape[0]) :
    print(prediction[i])
# current_genre = list(genres.keys())[genre_index]
#     genres[current_genre] = value
    for key, genre in enumerate(genres):
        genres[genre] = genres[genre] +  prediction[i][key]
        if key == 5:
            print(prediction[i][key])
            sum_accuracy = sum_accuracy + prediction[i][key]
        
    # print('max value ==>', np.max(prediction[i]))
    # print('genre ==>', np.argmax(prediction[i]))
    # print('min value ==>', np.min(prediction[i]))
    # print('genre ==>', np.argmin(prediction[i]))
    # min_acc = min_acc + np.min(prediction[i])
    # accuracy = accuracy + np.max(prediction[i])

[5.8380736e-04 4.0116995e-03 4.7107856e-03 6.5299573e-05 1.8987552e-05
 9.8658586e-01 3.2076656e-04 1.6371845e-04 2.1634418e-05 3.5174545e-03]
0.98658586
[5.2855270e-05 5.0110513e-05 4.0461167e-05 6.4545156e-06 1.0854468e-06
 9.9963355e-01 1.2750542e-05 3.9651245e-06 1.8704253e-06 1.9694795e-04]
0.99963355
[7.68614933e-04 2.66436309e-01 1.47365285e-02 1.92790467e-04
 1.29684107e-04 6.94877625e-01 2.99931708e-04 3.81287676e-03
 9.03005930e-05 1.86553709e-02]
0.6948776
[0.00338391 0.03166066 0.7225048  0.00203272 0.00222801 0.05572223
 0.00372411 0.06920528 0.00147338 0.10806488]
0.055722233
[1.3824921e-03 1.5683946e-01 1.9117825e-02 1.9455800e-04 1.2407605e-04
 7.8765613e-01 4.4318859e-04 3.5824594e-03 5.0482340e-05 3.0609265e-02]
0.7876561
[8.36836919e-03 6.78463280e-02 1.17246136e-01 6.22071209e-04
 1.10585266e-03 7.48295546e-01 1.61824608e-03 3.67478654e-02
 9.96001880e-04 1.71535145e-02]
0.74829555
[2.6486032e-03 2.0644916e-02 2.7299946e-02 7.1423029e-04 8.7864185e-04
 8.9644724e-01

In [26]:
# print('accuracy ==>', accuracy)
# print('min accuracy  ==>', min_acc)
print('sum_accuracy ==>', sum_accuracy)

print('<<========= Genre Detection Accuracy =======>>')
print(genres)
total_val = 0
for genre in genres:
    print('{}\t\t==> {}%'.format(genre, round(genres[genre] * 10, 2)))
    total_val = total_val + genres[genre]

print('total_val ==>',total_val)
    
# print(
#     "mfcc_val ==> {} \nprediction ==> {}\npredicted_index ==> {}".format(
#         val.shape, prediction, predicted_index
#     )
# )
# for v in prediction:
#     print(v)
#     print('==='*30)
#     print(np.max(v))
#     print('==='*30)

sum_accuracy ==> 8.012447115033865
{'Blues': 0.01791323409997858, 'Classical': 0.5549905630796275, 'Country': 0.9404911054916738, 'Disco': 0.006302460954429989, 'HipHop': 0.0048047398280459674, 'Jazz': 8.012447115033865, 'Metal': 0.012815086361115391, 'Pop': 0.11817820123951606, 'Reggae': 0.004754828957061363, 'Rock': 0.3273026292445138}
Blues		==> 0.18%
Classical		==> 5.55%
Country		==> 9.4%
Disco		==> 0.06%
HipHop		==> 0.05%
Jazz		==> 80.12%
Metal		==> 0.13%
Pop		==> 1.18%
Reggae		==> 0.05%
Rock		==> 3.27%
total_val ==> 9.999999964289827


In [104]:
nilai = np.arange(0,10)
genres = {"Blues" : 0,
            "Classical" : 0,
            "Country" : 0,
            "Disco" : 0,
            "HipHop" : 0,
            "Jazz" : 0,"Metal" : 0,
            "Pop" : 0,
            "Reggae" : 0,
            "Rock" : 0}

# genre_index = 0
# for i in range(nilai.size):
#     current_genre = list(genres.keys())[genre_index]
#     genres[current_genre] = i
#     genre_index += 1

for key, val in enumerate(genres):
  print(key) 
  print(val) 

0
Blues
1
Classical
2
Country
3
Disco
4
HipHop
5
Jazz
6
Metal
7
Pop
8
Reggae
9
Rock
