In [None]:
import os
from tqdm import tqdm
from matplotlib import cm, colors, pyplot as plt
from matplotlib.colors import ListedColormap

import IPython 
from IPython.display import display

import numpy as np
import pandas as pd

import librosa 
import librosa.display
import random

import tensorflow as tf
import tensorflow.python.keras as k

from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import minmax_scale
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

import warnings
warnings.simplefilter("ignore")
%matplotlib inline

In [None]:
SEED=42

In [None]:
random.seed = SEED

np.random.seed(SEED)

os.environ['PYTHONHASHSEED'] = '0'

tf.random.set_seed(SEED)

session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)

sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)

k.backend.set_session(sess)

In [None]:
data = pd.read_csv('../input/gtzan-dataset-music-genre-classification/Data/features_3_sec.csv')
print(display(data.head()))
print(data.info())

In [None]:
def signal(amplitudes, sample_rate):
    print(f"{len(amplitudes)} points, {len(amplitudes) / sample_rate} sec, sr {sample_rate}")
    plt.figure(figsize=(5, 5))
    signal = librosa.display.waveplot(amplitudes, sr=sample_rate, alpha=0.4)
    plt.title("Signal")  

In [None]:
def play_signal(amplitudes, sample_rate):
    return IPython.display.Audio(data = amplitudes, rate = sample_rate)

In [None]:
def spectrogram(amplitudes, sample_rate):
   
    plt.figure(figsize=(5, 5))
    X = librosa.stft(amplitudes)
    Xdb = librosa.amplitude_to_db(abs(X))
    spectrogram = librosa.display.specshow(data=Xdb, sr=sample_rate, x_axis='time', y_axis='hz')
    plt.title("Spectogram")
    plt.colorbar()    

In [None]:
def normalize(amplitudes, axis=0):
    return minmax_scale(amplitudes, axis=axis)

def spectral_centroids(amplitudes, sample_rate, plt_off=False):
    spectral_centroids = librosa.feature.spectral_centroid(amplitudes, sr=sample_rate)[0]
  
  
    plt.figure(figsize=(5, 5))
    frames = range(len(spectral_centroids))
    t = librosa.frames_to_time(frames)

    librosa.display.waveplot(amplitudes, sr=sample_rate, alpha=0.4)
    plt.plot(t, normalize(spectral_centroids), color='r')
    plt.title("Spectral centroids")

In [None]:
def spectral_rolloff(amplitudes, sample_rate):
    spectral_rolloff = librosa.feature.spectral_rolloff(amplitudes+0.01, sr=sample_rate)[0]
  
    plt.figure(figsize=(5, 5))
    frames = range(len(spectral_rolloff))
    t = librosa.frames_to_time(frames)
    librosa.display.waveplot(amplitudes, sr=sample_rate, alpha=0.4)
    plt.plot(t, normalize(spectral_rolloff), color='r')
    plt.title("Spectral rolloff")

In [None]:
def spectral_bandwidth(amplitudes, sample_rate):
    spectral_bandwidth_2 = librosa.feature.spectral_bandwidth(amplitudes+0.01, sr=sample_rate)[0]
    spectral_bandwidth_3 = librosa.feature.spectral_bandwidth(amplitudes+0.01, sr=sample_rate, p=3)[0]
    spectral_bandwidth_4 = librosa.feature.spectral_bandwidth(amplitudes+0.01, sr=sample_rate, p=4)[0]
    frames = range(len(spectral_bandwidth_2))
    t = librosa.frames_to_time(frames)
  
    plt.figure(figsize=(5, 5))
    librosa.display.waveplot(amplitudes, sr=sample_rate, alpha=0.4)
    plt.plot(t, normalize(spectral_bandwidth_2), color='r')
    plt.plot(t, normalize(spectral_bandwidth_3), color='g')
    plt.plot(t, normalize(spectral_bandwidth_4), color='y')
    plt.title("Spectral bandwidth")
    plt.legend(('p = 2', 'p = 3', 'p = 4'))    

In [None]:
def melspectrogram(amplitudes, sample_rate):
    mel = librosa.feature.melspectrogram(amplitudes, sr = sample_rate, n_mels=128, fmin=20, fmax=sample_rate//2)
  
  
    plt.figure(figsize=(5, 5))
    
    librosa.display.specshow(mel, sr=sample_rate, x_axis='time')
    plt.xlabel("time")
    plt.colorbar()
    plt.title("Melspectrogram")

In [None]:
def chromagram(amplitudes, sample_rate):
    hop_length = 512
    chromagram = librosa.feature.chroma_stft(amplitudes, sr =sample_rate, hop_length=hop_length)
  
    plt.figure(figsize=(5, 5))
    librosa.display.specshow(chromagram, x_axis='time', y_axis='chroma', hop_length=hop_length, cmap='coolwarm')
    plt.title("Chromagram")
    plt.colorbar()    

In [None]:
def PCEN(amplitudes, sample_rate):
    s = librosa.feature.melspectrogram(amplitudes, 
                                   sr         = sample_rate,
                                   n_mels     = 128,
                                   hop_length = 512,
                                   n_fft      = 2048,
                                   fmin       = 20,
                                   fmax       = sample_rate//2)
 

    pcen_librosa = librosa.core.pcen(s, 
                                 sr            = sample_rate,
                                 hop_length    = 512,
                                 gain          = 0.6,
                                 bias          = 0.1,
                                 power         = 0.2,
                                 time_constant = 0.4,
                                 eps           = 1e-9)
  
    plt.figure(figsize=(5, 5))
    librosa.display.specshow(pcen_librosa, sr=sample_rate, x_axis='time')
    plt.title("PCEN")
    plt.colorbar()    

In [None]:
amplitudes, sample_rate = librosa.load("../input/gtzan-dataset-music-genre-classification/Data/genres_original/blues/blues.00000.wav")
signal(amplitudes, sample_rate)
spectrogram(amplitudes, sample_rate)
spectral_centroids(amplitudes, sample_rate)
spectral_rolloff(amplitudes, sample_rate)
spectral_bandwidth(amplitudes, sample_rate)
melspectrogram(amplitudes, sample_rate)
chromagram(amplitudes, sample_rate)
PCEN(amplitudes, sample_rate)
play_signal(amplitudes, sample_rate)

In [None]:
all_labels=data['label'].unique()
print('Classes:', len(all_labels))

In [None]:
label_encoder = LabelEncoder()
labels = data['label']
label_encoder.fit(labels)

In [None]:
data.insert(60, 'label_id', 9999)
data.insert(1, 'filename_full', '')
for i in range(len(data)):
    label = data.loc[i,'label']
    label_id =label_encoder.transform([label])
    data.loc[i,'label_id']=label_id.item()
    data.loc[i,'filename_full']=str(data.loc[i,'filename']).split('.')[0]+"."+str(data.loc[i,'filename']).split('.')[1]+"."+str(data.loc[i,'filename']).split('.')[3]
data['label_id']=data['label_id'].astype(int)

In [None]:
features_full = data.drop(['filename','filename_full', 'length','label', 'label_id'], axis = 1)
target_full = data['label_id'].astype('int')
features, features_test, target, target_test = train_test_split(features_full, target_full, test_size=0.25, random_state = SEED, shuffle = True)

In [None]:
scaler = StandardScaler()
features = pd.DataFrame(scaler.fit_transform(features), columns=features.columns)
features_test = pd.DataFrame(scaler.transform(features_test), columns=features_test.columns)

In [None]:
def ranking(y, title='', xlabel='', ylabel=''):
    y_pos = np.arange(len(y),0,-1)
    plt.figure(figsize=(6,6))
    cmap = cm.viridis_r
    bar_colors = cmap(y.values/np.max(y.values))
    norm= colors.Normalize(0, np.max(y.values))
    plt.barh(y_pos, width=y, height=0.6, left=None, align='center', color=bar_colors, alpha = 0.8)
    plt.colorbar(cm.ScalarMappable(norm =norm, cmap=cmap))
    plt.yticks(y_pos, y.index)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    
    for p, c, ch in zip(y_pos, y.index, round(y,4)):
        plt.annotate(str(round(ch,4)), xy=(ch if ch>0 else 0, p), va='center')

In [None]:
def accuracy_for_each_class(target_test, preds):
    table = pd.DataFrame({'accuracy':0, 'class':label_encoder.classes_})
    class_correct = [0 for i in range(10)]
    class_total = [0 for i in range(10)]
    names = label_encoder.classes_

    c = np.array(np.array(preds) == np.array(target_test))
    for i in range(len(preds)):
        label =preds[i]            
        class_correct[label] += c[i]
        class_total[label] += 1

    
    

    for i in range(10):
        table.loc[i, 'class'] = names[i]
        if class_total[i] != 0:
          table.loc[i, 'accuracy'] = 100 * class_correct[i] / class_total[i]  
        else:
          table.loc[i, 'accuracy'] = -1
       
    table.index = table['class']
    
    return table

In [None]:
def keras_model(optimizer='rmsprop', init='glorot_uniform'):
 
    model = k.models.Sequential([
      
      k.layers.Dense(1024, activation='relu', kernel_initializer=init, input_shape=(features.shape[1],)),
      k.layers.Dropout(0.4),
      k.layers.BatchNormalization(), 
        
      k.layers.Dense(512, activation='relu', kernel_initializer=init),
      k.layers.Dropout(0.4),
      k.layers.BatchNormalization(),                                                  
      
      k.layers.Dense(256, activation='relu', kernel_initializer=init),
      k.layers.Dropout(0.4),
      k.layers.BatchNormalization(),
        
      k.layers.Dense(128, kernel_initializer=init, activation='relu'),
      k.layers.Dropout(0.4),
      k.layers.BatchNormalization(),
        
      k.layers.Dense(64, kernel_initializer=init, activation='relu'),
      k.layers.Dropout(0.4),
      k.layers.BatchNormalization(),

      k.layers.Dense(10, activation='softmax'),
    ])
 
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [None]:
model =  KerasClassifier(build_fn=keras_model, verbose=0, optimizer='rmsprop', epochs=100, batch_size=128, init='glorot_uniform')
history = model.fit(features, target)
pd.DataFrame(history.history).plot(figsize=(12,6))
plt.xlabel("epochs")
plt.ylabel("loss") 
plt.show()

In [None]:
preds = model.predict(features_test)
accuracy = accuracy_score(target_test, preds)
print('accuracy score = {:.4f}'.format(accuracy))

In [None]:
ranking(accuracy_for_each_class(target_test, preds).sort_values(by = 'accuracy', ascending = False)['accuracy'], title='Accuracy for each class', xlabel='accuracy', ylabel='class')


In [None]:
def pcen_make_data(path, part, n_mels):
    part = int(part)
    amplitudes, sample_rate = librosa.load(path)
    amplitudes_part = amplitudes[int(len(amplitudes)*(part)/10):int(len(amplitudes)*(part+1)/10)]

    y = librosa.effects.trim(amplitudes_part)[0]

    s = librosa.feature.melspectrogram(y, 
                                   sr         = sample_rate,
                                   n_mels     = n_mels,
                                   hop_length = 512,
                                   n_fft      = 2048,
                                   fmin       = 20,
                                   fmax       = sample_rate//2)
 

    pcen_librosa = librosa.core.pcen(s, 
                                 sr            = sample_rate,
                                 hop_length    = 512,
                                 gain          = 0.6,
                                 bias          = 0.1,
                                 power         = 0.2,
                                 time_constant = 0.4,
                                 eps           = 1e-9)
    return pcen_librosa

In [None]:
fig = plt.figure(figsize=(20,3))
fig.suptitle("PCEN")
plt.imshow(pcen_make_data('../input/gtzan-dataset-music-genre-classification/Data/genres_original/blues/blues.00000.wav', 3, 64))

In [None]:
data_with_pcen = data
n_mels = 64
for n in range(n_mels):
  data_with_pcen.insert(1+n, "pcen_"+str(n), 0)

In [None]:
for i in tqdm(range(len(data_with_pcen.values))):
    for n in range(n_mels): 
        path = '../input/gtzan-dataset-music-genre-classification/Data/genres_original/'+data_with_pcen.loc[i, 'filename_full'].split('.')[0]+'/'+ data_with_pcen.loc[i, 'filename_full']
        if path =='../input/gtzan-dataset-music-genre-classification/Data/genres_original/jazz/jazz.00054.wav':
            path ='../input/gtzan-dataset-music-genre-classification/Data/genres_original/jazz/jazz.00053.wav'
        part = data_with_pcen.loc[i, 'filename'].split('.')[-2]
        data_with_pcen.loc[i, "pcen_"+str(n)] = np.mean(pcen_make_data(path, part, n_mels), axis =1)[n]       

In [None]:
data_with_pcen.to_csv("./data_with_pcen.csv", index=False)

In [None]:
features_full = data_with_pcen.drop(['filename','filename_full', 'length','label', 'label_id'], axis = 1)
target_full = data_with_pcen['label_id'].astype('int')
features, features_test, target, target_test = train_test_split(features_full, target_full, test_size=0.25, random_state = SEED, shuffle = True)

In [None]:
scaler = StandardScaler()
features = pd.DataFrame(scaler.fit_transform(features), columns=features.columns)
features_test = pd.DataFrame(scaler.transform(features_test), columns=features_test.columns)

In [None]:
model =  KerasClassifier(build_fn=keras_model, verbose=0, optimizer='rmsprop', epochs=100, batch_size=128, init='glorot_uniform')
history = model.fit(features, target)
pd.DataFrame(history.history).plot(figsize=(12,6))
plt.xlabel("epochs")
plt.ylabel("loss") 
plt.show()

In [None]:
preds = model.predict(features_test)
accuracy = accuracy_score(target_test, preds)
print('accuracy score = {:.4f}'.format(accuracy))

In [None]:
ranking(accuracy_for_each_class(target_test, preds).sort_values(by = 'accuracy', ascending = False)['accuracy'], title='Accuracy for each class', xlabel='accuracy', ylabel='class')