In [421]:
from keras.models import load_model
import pathlib
from pathlib import Path
import os
import librosa, librosa.display
import numpy as np
import pickle
from sklearn.metrics import classification_report
from sklearn import preprocessing
import numpy, scipy, matplotlib.pyplot as plt, IPython.display as ipd
import re
import csv

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

In [422]:
# parameters
duration = 2.97
sr = 22050 # if sampling rate is different, resample it to this
input_root = './../../../models/'
input_whole = input_root + 'cnn_dong_model_whole.h5'
input_label = input_root + 'label.pkl'
input_test_paths = input_root + 'test_paths.pkl'
default_song = Path("../../../audio/testfiles/GTZAN/genres/rock/rock.00003.wav")
output_test_results = input_root + 'predictions_cnn.pkl'
output_test_results_csv = input_root + 'predictions_cnn.csv'
fmax = 1500 # maximum frequency considered
fft_window_points = 512
fft_window_dur = fft_window_points * 1.0 / sr # 23ms windows
hop_size = int(fft_window_points/ 2) # 50% overlap between consecutive frames
n_mels = 64

In [423]:
#load model, labels and paths for songs to predict
model = load_model(input_whole)
with open(input_label, 'rb') as f:
    lb = pickle.load(f)
with open(input_test_paths, 'rb') as f:
    paths = pickle.load(f)

In [424]:
def load_song(song = default_song, offset = 0):
    offset = duration*offset
    y, sr = librosa.load(song, mono=True, offset=offset, duration=duration)
    m_sp = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=fft_window_points,
                                              hop_length=hop_size, n_mels=n_mels,
                                              fmax=fmax)
    #plt.figure(figsize=(15, 5))
    #librosa.display.specshow(m_sp, sr=sr, hop_length=hop_size, x_axis='time', y_axis='mel')
    #plt.colorbar(format='%+2.0f dB')
    m_sp = np.expand_dims(m_sp, 0)
    m_sp = np.expand_dims(m_sp, 3)
    return m_sp
        
def get_songs_num_offsets(song = default_song, num = 1):
    samples = []
    for i  in range(num):
        samples.append(load_song(song = song, offset =  i))
    return samples

In [425]:
def predict_song(song = default_song, number_of_offsets = 1):
    samples = get_songs_num_offsets(song = song, num = number_of_offsets)
    p = 0
    pr_n = []
    for sample in samples:
        prediction = model.predict(sample)
        if p == 0:
            pr_n = prediction
            p = 1
        else:
            pr_n = pr_n + prediction / 2
    #print(pr_n)
    #print(lb.classes_[pr_n.argmax(axis=-1)])
    return pr_n


In [426]:
header = 'filename chroma_stft spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += f' pred.blues pred.classical pred.country pred.disco pred.hiphop pred.jazz pred.metal pred.pop pred.reggae pred.rock pred.argmax'
header = header.split()
header

['filename',
 'chroma_stft',
 'spectral_centroid',
 'spectral_bandwidth',
 'rolloff',
 'zero_crossing_rate',
 'mfcc1',
 'mfcc2',
 'mfcc3',
 'mfcc4',
 'mfcc5',
 'mfcc6',
 'mfcc7',
 'mfcc8',
 'mfcc9',
 'mfcc10',
 'mfcc11',
 'mfcc12',
 'mfcc13',
 'mfcc14',
 'mfcc15',
 'mfcc16',
 'mfcc17',
 'mfcc18',
 'mfcc19',
 'mfcc20',
 'pred.blues',
 'pred.classical',
 'pred.country',
 'pred.disco',
 'pred.hiphop',
 'pred.jazz',
 'pred.metal',
 'pred.pop',
 'pred.reggae',
 'pred.rock',
 'pred.argmax']

In [427]:
p_songs = []
file = open(output_test_results_csv, 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
for path in paths:
    prediction = predict_song(song = path, number_of_offsets = 1)[0]
    prediciton = np.reshape(prediction, (10,1))
    y, sr = librosa.load(path, mono=True, duration=30)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    #rmse = librosa.feature.rmse(y=y, S=None, frame_length=2048, hop_length=512, center=True, pad_mode='reflect')
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    to_append = f'{path.name} {np.mean(chroma_stft)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    for e in prediction:
        to_append += f' {e}'
    #to_append += f' {re.sub('[\[\]\']', '', str(lb.classes_[prediction.argmax(axis=-1)]))}'
    to_append += f' {str(lb.classes_[prediction.argmax(axis=-1)])}'
    arr = (to_append.split())
    file = open(output_test_results_csv, 'a', newline='')
    with file:
        writer = csv.writer(file)
        writer.writerow(arr)
    p_songs.append(arr)

In [428]:
with open(output_test_results, 'wb') as f:
    pickle.dump(p_songs, f)

In [431]:
type(p_songs)

list