In [364]:
from keras.models import load_model
import pathlib
from pathlib import Path
import os
import librosa, librosa.display
import numpy as np
import pickle
from sklearn.metrics import classification_report
from sklearn import preprocessing
import numpy, scipy, matplotlib.pyplot as plt, IPython.display as ipd
import re
import csv

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

In [365]:
# parameters
duration = 2.97
sr = 22050 # if sampling rate is different, resample it to this
input_root = './../../../models/'
input_whole = input_root + 'cnn_dong_model_whole.h5'
input_label = input_root + 'label.pkl'
input_test_paths = input_root + 'test_paths.pkl'
default_song = Path("../../../audio/testfiles/GTZAN/genres/rock/rock.00003.wav")
output_test_results = input_root + 'predictions_cnn.pkl'
output_test_results_csv = input_root + 'predictions_cnn.csv'
fmax = 1500 # maximum frequency considered
fft_window_points = 512
fft_window_dur = fft_window_points * 1.0 / sr # 23ms windows
hop_size = int(fft_window_points/ 2) # 50% overlap between consecutive frames
n_mels = 64

In [366]:
#load model, labels and paths for songs to predict
model = load_model(input_whole)
with open(input_label, 'rb') as f:
    lb = pickle.load(f)
with open(input_test_paths, 'rb') as f:
    paths = pickle.load(f)

In [367]:
def load_song(song = default_song, offset = 0):
    offset = duration*offset
    y, sr = librosa.load(song, mono=True, offset=offset, duration=duration)
    m_sp = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=fft_window_points,
                                              hop_length=hop_size, n_mels=n_mels,
                                              fmax=fmax)
    #plt.figure(figsize=(15, 5))
    #librosa.display.specshow(m_sp, sr=sr, hop_length=hop_size, x_axis='time', y_axis='mel')
    #plt.colorbar(format='%+2.0f dB')
    m_sp = np.expand_dims(m_sp, 0)
    m_sp = np.expand_dims(m_sp, 3)
    return m_sp
        
def get_songs_num_offsets(song = default_song, num = 1):
    samples = []
    for i  in range(num):
        samples.append(load_song(song = song, offset =  i))
    return samples

In [368]:
def predict_song(song = default_song, number_of_offsets = 1):
    samples = get_songs_num_offsets(song = song, num = number_of_offsets)
    p = 0
    pr_n = []
    for sample in samples:
        prediction = model.predict(sample)
        if p == 0:
            pr_n = prediction
            p = 1
        else:
            pr_n = pr_n + prediction / 2
    #print(pr_n)
    #print(lb.classes_[pr_n.argmax(axis=-1)])
    return pr_n


In [369]:
header = 'filename blues classical country disco hiphop jazz metal pop reggae rock argmax'.split()

In [370]:
p_songs = []
file = open(output_test_results_csv, 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
for path in paths:
    prediction = predict_song(song = path, number_of_offsets = 1)[0]
    prediciton = np.reshape(prediction, (10,1))
    arr = (path.name, prediction[0], prediction[1],prediction[2],
           prediction[3],prediction[4],prediction[5],prediction[6],
           prediction[7],prediction[8],prediction[9],
           re.sub('[\[\]\']', '', str(lb.classes_[prediction.argmax(axis=-1)])))
    p_songs.append(arr)
    file = open(output_test_results_csv, 'a', newline='')
    with file:
        writer = csv.writer(file)
        writer.writerow(arr)

In [371]:
with open(output_test_results, 'wb') as f:
    pickle.dump(p_songs, f)

In [379]:
type(p_songs)

list