## mel spec demo

In [27]:
# Miscellaneous
import os
import glob
import time
from natsort import natsorted

# Scientific
import numpy as np
import scipy
import librosa
# from skimage.transform import resize
from beatbrain.metrics import ncc
from beatbrain import utils

# Visualization
import IPython.display as ipd
import matplotlib.pyplot as plt
import seaborn as sns
from beatbrain.display import show_spec

In [8]:
def show_audio(audio, **kwargs):
    ipd.display(ipd.Audio(audio, **kwargs))

In [9]:
AUDIO_PATH = "home/pengfei/yunyi/beatbrain/data/fma/audio/test/006/006329.mp3"

In [10]:
#AUDIO_PATH = "../data/fma/audio/000/000005.mp3"
START = 0
DURATION = 5
SAMPLE_RATE = 32768
N_FFT = 4096
HOP_LENGTH = 256
N_MELS = 512
CHUNK_SIZE = 640
RESAMPLE_TYPE = 'kaiser_fast'

In [None]:
audio, sr = librosa.load(AUDIO_PATH, sr=SAMPLE_RATE,
                         offset=START, duration=DURATION,
                         res_type=RESAMPLE_TYPE)
show_audio(audio, rate=sr)

In [None]:
spec = np.abs(librosa.stft(audio, n_fft=N_FFT, hop_length=HOP_LENGTH))
show_spec(spec, scale_fn=librosa.amplitude_to_db, title="STFT Spectrogram")

## Data processing 
### get genre labels from csv

In [28]:
from pathlib import Path
import pandas as pd
#home/pengfei/yunyi/beatbrain/data/fma/numpy
#metadata = Path("/home/cds_data/fma/metadata")
metadata = Path("/home/pengfei/yunyi/beatbrain/data/fma/metadata")
tracks = pd.read_csv(metadata.joinpath('tracks.csv'), header=[0, 1, 2])
tracks = tracks.droplevel(2, axis=1)
tracks.columns = tracks.columns.set_levels(['track_id', *tracks.columns.levels[0][1:]], level=0)
tracks.columns = tracks.columns.set_levels(['', *tracks.columns.levels[1][1:]], level=1)
tracks.set_index("track_id", inplace=True)
tracks = tracks[tracks["set", "subset"] == "small"]  # Only include songs from fma_small dataset
tracks = tracks[pd.notnull(tracks["track", "genre_top"])] 

In [29]:
train = tracks.index[tracks['set', 'split'] == 'training']
val = tracks.index[tracks['set', 'split'] == 'validation']
test = tracks.index[tracks['set', 'split'] == 'test']
print('{} training examples, {} validation examples, {} testing examples'.format(*map(len, [train, val, test])))

6400 training examples, 800 validation examples, 800 testing examples


In [30]:
tracks=tracks.drop(['album', 'artist'], axis=1)

In [31]:
set(tracks['track', 'genre_top'])#8 classes

{'Electronic',
 'Experimental',
 'Folk',
 'Hip-Hop',
 'Instrumental',
 'International',
 'Pop',
 'Rock'}

In [32]:
### get one excerpt per category
for i in set(set(tracks['track', 'genre_top'])):
    print(tracks[tracks['track', 'genre_top']==i].head(1).index[0])

148
10
2
140
10250
1482
666
182


## Prep GMM (kmeans init)
##### one excerpt per category

In [35]:
example_ls=[666,10250,182,10,140,148,1482,2]
import numpy as np
X_mean=[]
#Y=[]
for fpath in glob.glob('/home/pengfei/yunyi/beatbrain/data/fma/numpy/*/*'):
    track_id=int(fpath[-10:-4])
    if track_id in example_ls:
        print(track_id)
        example_ls.remove(track_id)
        nps=utils.load_arrays(fpath)
        #only take the first
        f1=np.mean(nps[0],axis=1)
        f2=np.std(nps[0], axis=1)
        f3=np.median(nps[0],axis=1)
        npz=np.concatenate((f1,f2,f3), axis=None)
        X_mean.append(npz)
X_mean=np.array(X_mean)
np.save('X_example.npy', X_mean) 

10
2
666
182
140
148
10250
1482


In [37]:
X_mean.shape

(8, 1536)

In [52]:
npz=utils.load_arrays("/home/pengfei/yunyi/beatbrain/data/fma/numpy/000/000002.npz")
npz[-1].shape#(band,time)

(512, 640)

## Prep ML X,Y

In [50]:
import numpy as np
##########prep data#########################
X=[]
Y=[]
####normalize
#_min,_max=float('inf'),-float('inf')

#for fpath in glob.glob('/home/cds_data/fma/numpy/*/*'):
for fpath in glob.glob('home/pengfei/yunyi/beatbrain/data/fma/numpy/*/*'):
    #based on track_id get test/val/train
    track_id=int(fpath[-10:-4])
    ########use fma predefined split####
    #split=tracks.loc[track_id]["set"]["split"]
    genre=tracks.loc[track_id]["track"]["genre_top"]
    #load data
    nps=utils.load_arrays(fpath)
    for npz in nps:
        #mean
        f1=np.mean(npz,axis=1)
        #median
        f2=np.std(npz, axis=1)
        #std
        f3=np.median(npz,axis=1)
        #append 
        npz=np.concatenate((f1,f2,f3), axis=None)
        X.append(npz)
        Y.append(genre)
        ###########normalize code(unused)
        #_min=min(np.amin(np),_min)
        #_max=max(np.amix(np),_max)
#prep all data
X,Y=np.array(X),np.array(Y)
#####normalize(unused)
#X=(X-_min)/(_max-_min)

In [52]:
#change y to categorical int
lookupTable, Y = np.unique(Y, return_inverse=True)

In [53]:
np.save('X_all_1.npy', X) # save
np.save('Y_all_1.npy', Y)

## Load pre-trained ML X,Y

In [38]:
###########load from local if pre##############
X=np.load("X_all_1.npy")
Y=np.load("Y_all_1.npy")

In [39]:
print(X.shape,Y.shape)
print(set([X[i].shape for i in range(len(X))]))

(43323, 1536) (43323,)
{(1536,)}


#### data split

In [40]:
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
#X_train, X_test, y_train, y_test = train_test_split(X[:10000],Y[:10000],test_size=0.3,random_state=0)
X_train, X_test, y_train, y_test = X[:10000],X[10000:12000],Y[:10000],Y[10000:12000]

In [41]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((10000, 1536), (2000, 1536), (10000,), (2000,))

## Decision Tree

In [24]:
from sklearn import tree
leaf_ls=[1,3,5,7,10,25]
for leaf_num in leaf_ls:
    print("min leaves="+str(leaf_num))
    model=tree.DecisionTreeClassifier(min_samples_leaf=leaf_num)
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test) # Predicting labels for our test set using trained
    print(classification_report(y_test, y_pred))

min leaves=1
              precision    recall  f1-score   support

           0       0.27      0.22      0.24       297
           1       0.29      0.24      0.26       401
           2       0.22      0.38      0.28       146
           3       0.25      0.16      0.20       277
           4       0.18      0.40      0.25       134
           5       0.28      0.16      0.20       312
           6       0.14      0.14      0.14       304
           7       0.26      0.42      0.32       129

    accuracy                           0.23      2000
   macro avg       0.24      0.27      0.24      2000
weighted avg       0.24      0.23      0.23      2000

min leaves=3
              precision    recall  f1-score   support

           0       0.27      0.28      0.27       297
           1       0.30      0.27      0.28       401
           2       0.20      0.33      0.25       146
           3       0.21      0.15      0.17       277
           4       0.20      0.42      0.27       13

## logistic regression

In [22]:
from sklearn.linear_model import LogisticRegression
def LR_exp(c):
    print("Regularization parameter(C)="+str(c))
    model = LogisticRegression(C=c,  multi_class='multinomial', solver='lbfgs')
    model.fit(X_train, y_train) # Training the model
    y_pred = model.predict(X_test) # Predicting labels for our test set using trained
    print(classification_report(y_test, y_pred))

In [23]:
C_param_range = [0.001,0.01,0.1,1,10,100]
for c in C_param_range:
    LR_exp(c)

Regularization parameter(C)=0.001
              precision    recall  f1-score   support

           0       0.38      0.23      0.28       297
           1       0.29      0.06      0.10       401
           2       0.23      0.43      0.30       146
           3       0.29      0.20      0.24       277
           4       0.19      0.72      0.30       134
           5       0.32      0.13      0.19       312
           6       0.19      0.24      0.21       304
           7       0.36      0.71      0.48       129

    accuracy                           0.26      2000
   macro avg       0.28      0.34      0.26      2000
weighted avg       0.29      0.26      0.23      2000

Regularization parameter(C)=0.01
              precision    recall  f1-score   support

           0       0.38      0.29      0.33       297
           1       0.28      0.13      0.18       401
           2       0.30      0.45      0.36       146
           3       0.32      0.38      0.35       277
           

## SVM

In [20]:
from sklearn.svm import SVC
def SVM_exp(c):
    print("Regularization parameter(C)="+str(c))
    model = SVC(C=c, gamma='auto', kernel='rbf')
    model.fit(X_train, y_train)# Training SVM
    y_pred = model.predict(X_test) # Predicting labels for our test set using trained
    print(classification_report(y_test, y_pred))

In [8]:
C_param_range = [0.001,0.01,0.1,1,10,100]
for c in C_param_range:
    SVM_exp(c)

Regularization parameter(C)=0.001
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       328
           1       0.00      0.00      0.00       380
           2       0.00      0.00      0.00       431
           3       0.00      0.00      0.00       316
           4       0.15      1.00      0.26       456
           5       0.00      0.00      0.00       340
           6       0.00      0.00      0.00       426
           7       0.00      0.00      0.00       323

    accuracy                           0.15      3000
   macro avg       0.02      0.12      0.03      3000
weighted avg       0.02      0.15      0.04      3000

Regularization parameter(C)=0.01
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       328
           1       0.00      0.00      0.00       380
           2       0.00      0.00      0.00       431
           3       0.00      0.00      0.00       316
           

In [21]:
C_param_range = [150,200]
for c in C_param_range:
    SVM_exp(c)

Regularization parameter(C)=150
              precision    recall  f1-score   support

           0       0.42      0.42      0.42       297
           1       0.32      0.22      0.26       401
           2       0.40      0.48      0.44       146
           3       0.44      0.41      0.43       277
           4       0.24      0.60      0.34       134
           5       0.53      0.27      0.36       312
           6       0.20      0.20      0.20       304
           7       0.44      0.65      0.53       129

    accuracy                           0.35      2000
   macro avg       0.37      0.41      0.37      2000
weighted avg       0.37      0.35      0.35      2000

Regularization parameter(C)=200
              precision    recall  f1-score   support

           0       0.42      0.41      0.41       297
           1       0.31      0.23      0.26       401
           2       0.40      0.48      0.44       146
           3       0.45      0.41      0.43       277
           4  

## KNN

In [22]:
from sklearn.neighbors import KNeighborsClassifier
def KNN_exp(n):
    print("n_neighbors="+str(n))
    model = KNeighborsClassifier(n_neighbors=n) #
    model.fit(X_train, y_train) # Training the model
    # Evaluate the model:
    y_pred = model.predict(X_test) # Predicting labels for our test set using trained
    print(classification_report(y_test, y_pred))    

In [23]:
for n in [5,10,20,40]:
    KNN_exp(n)

n_neighbors=5
              precision    recall  f1-score   support

           0       0.31      0.26      0.29       297
           1       0.28      0.25      0.26       401
           2       0.36      0.45      0.40       146
           3       0.25      0.30      0.27       277
           4       0.20      0.31      0.24       134
           5       0.46      0.40      0.43       312
           6       0.24      0.16      0.19       304
           7       0.47      0.62      0.53       129

    accuracy                           0.31      2000
   macro avg       0.32      0.35      0.33      2000
weighted avg       0.31      0.31      0.31      2000

n_neighbors=10
              precision    recall  f1-score   support

           0       0.36      0.30      0.32       297
           1       0.26      0.21      0.24       401
           2       0.39      0.47      0.43       146
           3       0.28      0.38      0.32       277
           4       0.19      0.32      0.24      

## Guassian Naive Bayes

In [24]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, y_train) # Training the model
y_pred = model.predict(X_test) # Predicting labels for our test set using trained
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.24      0.05      0.09       297
           1       0.36      0.04      0.07       401
           2       0.15      0.45      0.23       146
           3       0.31      0.50      0.39       277
           4       0.28      0.67      0.39       134
           5       0.31      0.06      0.10       312
           6       0.19      0.10      0.13       304
           7       0.24      0.91      0.38       129

    accuracy                           0.24      2000
   macro avg       0.26      0.35      0.22      2000
weighted avg       0.27      0.24      0.18      2000



## GMM
#### w/o kmeans mean init

In [25]:
from sklearn.mixture import GaussianMixture
model=GaussianMixture(n_components=8,)
model.fit(X_train)
y_pred=model.predict(X_test)

In [26]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       297
           1       0.25      0.02      0.04       401
           2       0.00      0.00      0.00       146
           3       0.00      0.00      0.00       277
           4       0.00      0.00      0.00       134
           5       0.00      0.00      0.00       312
           6       0.15      0.99      0.27       304
           7       0.00      0.00      0.00       129

    accuracy                           0.15      2000
   macro avg       0.05      0.13      0.04      2000
weighted avg       0.07      0.15      0.05      2000



#### bad because unsupervised learning, y_train not used,but can try init mean based on its genre (1 song from each genre)
#### w kmeans init:

In [42]:
means=np.load('X_example.npy')
from sklearn.mixture import GaussianMixture
model=GaussianMixture(n_components=8,init_params='kmeans',means_init=means)
model.fit(X_train)
y_pred=model.predict(X_test)
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.15      1.00      0.26       297
           1       0.00      0.00      0.00       401
           2       0.00      0.00      0.00       146
           3       0.00      0.00      0.00       277
           4       0.00      0.00      0.00       134
           5       0.00      0.00      0.00       312
           6       0.00      0.00      0.00       304
           7       0.00      0.00      0.00       129

    accuracy                           0.15      2000
   macro avg       0.02      0.12      0.03      2000
weighted avg       0.02      0.15      0.04      2000



## DL-data prepare

In [24]:
#####data processing######
import numpy as np
X=[]
Y=[]
####normalize
_min,_max=float('inf'),-float('inf')
for fpath in glob.glob('/home/pengfei/yunyi/beatbrain/data/fma/numpy/*/*'):
    #based on track_id get test/val/train
    track_id=int(fpath[-10:-4])
    ########use fma predefined split####
    #split=tracks.loc[track_id]["set"]["split"]
    genre=tracks.loc[track_id]["track"]["genre_top"]
    #load data
    nps=utils.load_arrays(fpath)
    for npz in nps:
        if npz.shape!=(512, 640):
            print(track_id)
        else:
            X.append(npz)
            Y.append(genre)
            ###########normalize code(unused)
            _min=min(np.amin(npz),_min)
            _max=max(np.amax(npz),_max)

98565
98567
98569


#### above is track_id whose mel spec with wrong duration

In [25]:
set([X[i].shape for i in range(len(X))])

{(512, 640)}

In [27]:
#####normalize(unused)
X=(X-_min)/(_max-_min)
#change y to categorical int
lookupTable, Y = np.unique(Y, return_inverse=True)

In [28]:
X=np.array(X)
Y=np.array(Y)

In [30]:
np.save('X_dl.npy',X) # save
np.save('Y_dl.npy',Y)
np.save('min_dl.npy',_min) # save
np.save('max_dl.npy',_max)

In [31]:
"""
X=np.load('X_dl.npy') # save
Y=np.load('Y_dl.npy')
_min=np.load('min.npy') # save
_max=np.load('max.npy')
"""

"\nX=np.load('X_dl.npy') # save\nY=np.load('Y_dl.npy')\n_min=np.load('min.npy') # save\n_max=np.load('max.npy')\n"

In [32]:
X.shape,Y.shape

((43320, 512, 640), (43320,))

### DL-GPU device setting

In [1]:
import tensorflow as tf

In [2]:
import os

In [3]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
os.environ['VISIBLE_CUDA_DEVICES'] = "0"
# tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [4]:
physical_devices

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

In [5]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dropout, Dense, TimeDistributed
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, LSTM
from sklearn.utils.class_weight import compute_class_weight
from tqdm import tqdm
import pickle

## Load pre-trained DL-X,DL-Y

In [6]:
import numpy as np
X=np.load('X_dl.npy') # save
Y=np.load('Y_dl.npy')

## Recurrent

In [7]:
def get_recurrent_model(input_shape):
    #shape of RNN (n,time,feat)
    model=Sequential()
    model.add(LSTM(128,return_sequences=True,input_shape=input_shape))
    model.add(LSTM(128,return_sequences=True))
    model.add(Dropout(0.5))
    model.add(TimeDistributed(Dense(64,activation="relu")))
    model.add(TimeDistributed(Dense(32,activation="relu")))
    model.add(TimeDistributed(Dense(16,activation="relu")))
    model.add(TimeDistributed(Dense(8,activation="relu")))
    model.add(Flatten())
    model.add(Dense(8,activation="softmax"))
    #model.summary()
    model.compile(loss="categorical_crossentropy",optimizer="adam",metrics=["acc"])
    return model

In [8]:
############# for rec ############################
X=X.reshape(X.shape[0],X.shape[1],X.shape[2])

In [9]:
X.shape

(43320, 512, 640)

In [10]:
from tensorflow.keras.utils import to_categorical
Y_b = to_categorical(Y)

In [11]:
Y_b.shape

(43320, 8)

In [12]:
input_shape=(X.shape[1],X.shape[2])
model=get_recurrent_model(input_shape)
#checkpoint=ModelCheckpoint("rnn.model",monitor="val_acc",verbose=1,mode="max",
#                           save_best_only=True,save_weights_only=False,periods=1)
model.fit(X[:35000],Y_b[:35000],epochs=10,batch_size=32,shuffle=True,
          validation_data=(X[35000:],Y_b[35000:])
         # ,callbacks=[checkpoint]
         )
#model.save("rnn.model")

Train on 35000 samples, validate on 8320 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f43680d3ad0>

In [44]:
#model.save("rnn.model")
model=get_recurrent_model(input_shape)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 512, 128)          393728    
_________________________________________________________________
lstm_3 (LSTM)                (None, 512, 128)          131584    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512, 128)          0         
_________________________________________________________________
time_distributed_4 (TimeDist (None, 512, 64)           8256      
_________________________________________________________________
time_distributed_5 (TimeDist (None, 512, 32)           2080      
_________________________________________________________________
time_distributed_6 (TimeDist (None, 512, 16)           528       
_________________________________________________________________
time_distributed_7 (TimeDist (None, 512, 8)           

## Conv

In [46]:
def get_conv_model(input_shape):
    model=Sequential()
    #model.add(Conv2D(16,(3,3),activation="relu",strides=(1,1),padding="same"))
    model.add(Conv2D(16,(3,3),activation="relu",strides=(1,1),padding="same"))
    model.add(Conv2D(16,(3,3),activation="relu",strides=(1,1),padding="same"))
    model.add(Conv2D(16,(3,3),activation="relu",strides=(1,1),padding="same"))
    model.add(MaxPool2D(2,2))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(128,activation="relu"))
    model.add(Dense(64,activation="relu"))
    model.add(Dense(8,activation="softmax"))
    #model.summary()
    model.compile(loss="categorical_crossentropy",optimizer="adam",metrics=["acc"])
    return model

In [14]:
############for conv ##########################
X=X.reshape(X.shape[0],X.shape[1],X.shape[2],1)
input_shape=(X.shape[1],X.shape[2],1)

model=get_conv_model(input_shape)
###train and evel
#class_weight=compute_class_weight("balanced",np.unique(y),y_flat)
checkpoint=ModelCheckpoint("conv.model",monitor="val_acc",verbose=1,mode="max",
                           save_best_only=True,save_weights_only=False,periods=1)
model.fit(X[:35000],Y_b[:35000],epochs=10,batch_size=32,shuffle=True,
          validation_data=(X[35000:],Y_b[35000:]),callbacks=[checkpoint])
model.save("conv.model")

Train on 35000 samples, validate on 8320 samples
Epoch 1/10
Epoch 00001: val_acc improved from -inf to 0.41791, saving model to conv.model
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: conv.model/assets
Epoch 2/10
Epoch 00002: val_acc did not improve from 0.41791
Epoch 3/10


KeyboardInterrupt: 

In [47]:
model=get_conv_model(input_shape)
model.summary()

ValueError: This model has not yet been built. Build the model first by calling `build()` or calling `fit()` with some data, or specify an `input_shape` argument in the first layer(s) for automatic build.