In [None]:
SCRIPT_INTRODUCTION = """
   ==================== [_Kopie von ESP_Pipe003_evaluationJustRecognition_REFACTORED_NEWnn] ====================
  This Script was intended to build a recognition evaluation, for the onsets, using SVM models. Copy this notebooks as a reference
  to make it for other models. 
  ==================== ==================[ INFO ] ==============================
"""

import librosa
from google.colab import drive
import os,sys,re,pandas as pd,numpy as np
import glob
import logging
from sympy import Interval
import warnings
# torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch
import torch.optim as optim

In [None]:
ROOT_DIR = "/content/drive"
drive.mount(ROOT_DIR)
MUSIC_DIR = os.path.join(ROOT_DIR,'My Drive/Maestria DM y KDD/Especializacion tesis/MDBDrums/MDB Drums')
AUDIO_DIR = os.path.join(MUSIC_DIR,'audio','drum_only')
ANNOTATIONS_DIR = os.path.join(MUSIC_DIR,'annotations','class')
ANNOTATIONS_DIR_TEST = os.path.join(ANNOTATIONS_DIR,'test')
EXP_PIPE_DATA = os.path.join(MUSIC_DIR,'pipe005_multiplemodelsdata_corrected_over60')
annotations_test_filepaths = glob.glob(ANNOTATIONS_DIR_TEST+"/*.txt")
annotations_test_filepaths

Mounted at /content/drive


['/content/drive/My Drive/Maestria DM y KDD/Especializacion tesis/MDBDrums/MDB Drums/annotations/class/test/MusicDelta_Hendrix_class.txt',
 '/content/drive/My Drive/Maestria DM y KDD/Especializacion tesis/MDBDrums/MDB Drums/annotations/class/test/MusicDelta_SwingJazz_class.txt',
 '/content/drive/My Drive/Maestria DM y KDD/Especializacion tesis/MDBDrums/MDB Drums/annotations/class/test/MusicDelta_FreeJazz_class.txt',
 '/content/drive/My Drive/Maestria DM y KDD/Especializacion tesis/MDBDrums/MDB Drums/annotations/class/test/MusicDelta_Beatles_class.txt',
 '/content/drive/My Drive/Maestria DM y KDD/Especializacion tesis/MDBDrums/MDB Drums/annotations/class/test/MusicDelta_Country1_class.txt',
 '/content/drive/My Drive/Maestria DM y KDD/Especializacion tesis/MDBDrums/MDB Drums/annotations/class/test/MusicDelta_SpeedMetal_class.txt',
 '/content/drive/My Drive/Maestria DM y KDD/Especializacion tesis/MDBDrums/MDB Drums/annotations/class/test/MusicDelta_Punk_class.txt',
 '/content/drive/My Dri

# Change cd path to ./utils folder , or use sys.insert.path("path_to_utils")

In [None]:
%cd "/content/drive/My Drive/Colab Notebooks/tesis_esp/"
import eval_utils
import pred_utils

/content/drive/My Drive/Colab Notebooks/tesis_esp


# Load models

In [None]:
class ModelWrapper():
  def __init__(self, model,target_label):
    """
    This is just for converting the binary predictions to a string
    Params:
      model [binary_clf]: any binary classifier that will apply the predict 
                          method (output must be either 1 or 0)
      target_label [str]: convert the 1 to target_label ; the zeros to other (both strings)

    """
    self.model = model
    self.target_label = target_label
    self.predictions = None
  def prediction_to_label(self, predictions):
    predictions = np.select([predictions == 1,predictions == 0],[self.target_label,"OTHER"], default="ERROR")
    predictions_error = predictions[predictions == "ERROR"]
    if len(predictions_error)>1:
      raise ValueError("[ERROR] Default predictions are >1; check your predicted output")
    return predictions
  def predict(self, X):
    """
    Params:
      X [np.ndarray,pd.DataFrame]: input for predicitng
    Return:
      predictions_labelized
    """
    self.predictions = self.model.predict(X)
    self.predictions_labelized = self.prediction_to_label(self.predictions)
    return self.predictions_labelized


In [None]:
class Net(nn.Module):
    def __init__(self,nchannels,nclasses, unique_labels, meanstd_normalize, prediction_threshold):
        """
        unique_labels: list of labels to be mapped; example: ['OTHER','KD'] 
        meanstd_normalize:  output from torchvision.transforms.Normalize; for zscaling 
            o             the data
        prediction_threshold: float in (0,1) interval; if proba>threshold then
                      predicted class will be TARGET else other. 
        nclasses of the net; 2 in this case since we are applying binary classif
        """
        # start
        super().__init__()
        self.prediction_threshold = torch.tensor([prediction_threshold])
        # this is the normalizer to used in the predictor then
        self.meanstd_normalizer = torchvision.transforms.Normalize(**meanstd_normalize, inplace=False) 
        # remove it if you want to this is jsut for cleaner predictions (use labels instead of etc)
        self.unique_labels = unique_labels
        # other attributes
        self.nchannels = nchannels
        self.nclasses = nclasses
        self.conv1 = nn.Conv2d(self.nchannels, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(2000, 120)
        self.dropout1 = nn.Dropout(p=0.5, inplace=False)
        self.fc2 = nn.Linear(120, 84)
        #self.dropout2 = nn.Dropout(p=0.3, inplace=False)
        self.fc3 = nn.Linear(84, self.nclasses)
    def forward(self, x):
        # conv1 
        x = self.conv1(x)
        #print("Conv1:",x.shape)
        x =F.relu(x)
        x = self.pool(x)
        #print("Pool1:",x.shape)
        x = self.conv2(x)
        #print("Conv2:",x.shape)
        x = F.relu(x)
        x = self.pool(x)
        #print("Pool2:",x.shape)
        # flatten  all dims except the batch; 
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        #print("Flattened, except batch:",x.shape)
        x = self.fc1(x)
        x=F.relu(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = F.relu(x)
        #x = self.dropout2(x)
        # pass over fc3 omg
        x = self.fc3(x)
        x = torch.sigmoid(x)
        return x
    def map_idx2labels(self,mapped_labels,unique_labels):
      labels = list()
      for idx in range(len(mapped_labels)):
        mapped_lab = mapped_labels[idx]
        label_name = unique_labels[mapped_lab]
        labels.append(label_name)
      return labels

    def predict(self,x_batch, predictions_to_labels = False):
      """
      Final prediction function
      params:
        x_batch -> np.array model dimensions data_lenx513x17 data
      return: mapped prediction (either target label or other)
      """
      #x_batch = x_test[:10].copy()
      #N = len(x_batch)
      data_tensor = torch.tensor(x_batch, dtype=torch.float32)
      N,H,W = data_tensor.shape
      data_tensor = data_tensor.reshape(N,1,H,W)
      # noramalize data
      data_tensor = self.meanstd_normalizer(data_tensor)
      #forward pass
      predictions = self.forward(data_tensor)
      if self.prediction_threshold is not None:
        predictions = (predictions>=self.prediction_threshold).float()*1
      if predictions_to_labels:
        predictions = self.map_idx2labels(predictions,self.unique_labels)
      return predictions



# You can find models in ./models, remember to change the paths. 

In [None]:
model_type = "NN"

if model_type == "SVC":
  print("[INFO ]Loading SVC models")
  # load all models SVC classifier, each of them has it own aparameters ; to acces to them use model_hh.best_estimator_
  model_tt = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'v2_svc_model_TT.joblib'))
  model_cy = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'v2_svc_model_CY.joblib'))
  model_kd = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'v2_svc_model_KD.joblib'))
  model_sd = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'v2_svc_model_SD.joblib'))
  model_hh = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'v2_svc_model_HH.joblib'))
  model_ot = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'v2_svc_model_OT.joblib'))
elif model_type == "NN":
  print("[INFO ]Loading NN  models")
  model_tt = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'BinaryClassBCEnn_model_TT.pth'))
  model_cy = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'BinaryClassBCEnn_model_CY.pth'))
  model_kd = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'BinaryClassBCEnn_model_KD.pth'))
  model_sd = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'BinaryClassBCEnn_model_SD.pth'))
  model_hh = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'BinaryClassBCEnn_model_HH.pth'))
  model_ot = pred_utils.load_model(os.path.join(EXP_PIPE_DATA,'BinaryClassBCEnn_model_OT.pth'))
else:
  raise ValueError("Param: model_type must be 'SVC' or 'NN' for now, please choose a valid model")

models_list = [model_tt,model_cy,model_kd,model_sd,model_hh,model_ot]

[INFO ]Loading NN  models


## Convert models from 1 or 0z to target -  other

In [None]:

models_list = [
              ModelWrapper(model_tt, "TT"),
               ModelWrapper(model_cy,"CY"),
               ModelWrapper(model_kd,"KD"),
               ModelWrapper(model_sd,"SD"),
               ModelWrapper(model_hh,"HH"),
               ModelWrapper(model_ot,"OT"),
               ]


In [None]:
# step 1 load the annotations dataset you want to scan
# step 2 load the corresponding .wav
# step 3 for each sound of the annotations, add the label (break step inot more steps)#
# step 4 use evalmetrics class
config_signal_params = {"hop_size":256,"n_fft":1024,"desired_signal_size_for_padding":4096,"seconds_window":0.05}
hop_size, n_fft, desired_signal_size_for_padding, seconds_window  = config_signal_params.values()

In [None]:
# step1

def pipeline_generate_datasets(annotation_path: str):
  """
  --------------------------------------------------------------------------------
  Given an annotation path, this function will geneate datasets with 
  predictions on a .wav and its annotation (loading the .txt);
   this is a pre function for calculatiing precision and recall
   --------------------------------------------------------------------------------
  args: 
    annotation_path -> .wav song ; a .txt with the same name must exist
    drum_types_detector -> DrumTypesDetector class with all params and models loaded
  return:
    df_annotation -> annotations loaded into dataframe
    df_predicted -> predictiosn from the model (comes from pred_utils models)
  In case you wanna change the models you ened to change : pred_utils.config_signal_params, sklearn_models_list within the function; the rest will be the same
  """
  df_annotation = eval_utils.load_labels(annotation_path,set2df = True)
  # step 2: load correponsding .wav
  # 2.a search path
  wav_path = eval_utils.search_correspondingpath_given_annotation(annotation_path_txt = annotation_path, audio_directory = AUDIO_DIR)
  # 2.b now load the wav
  signal,sampling_rate = librosa.load(wav_path)
  # 3.c) now use annotations onset, here you could use an onset detection method aswell  and DruMTypesDetector's instance to create a dataset with annotations and predictions 
  # 3.c.i) first set onset time to onset_sample
  df_annotation["onset_sample"] = (df_annotation["onset_time"].astype("float")*sampling_rate).astype("int")
  onsets_annotation_list = df_annotation["onset_sample"].tolist()
  # 3.d) now perform the predicts and put this into a dataframe
  drum_types_detector = pred_utils.DrumTypesDetector(config_signal_params,
                                        sklearn_models_list = models_list,
                                        flatten_data = False)
  drum_types_detector(wav_path, presettled_onsets= onsets_annotation_list)
  df_predicted = pd.DataFrame(drum_types_detector.list_formatted_onsets, columns=["onset_time","predicted_drumtype"])
  # now write annotation path for both datasets
  df_predicted["annotation_path"] = annotation_path
  df_annotation["annotation_path"] = annotation_path
  return df_annotation, df_predicted
# now compute the metrics




# Run prediction  pipeline for all songs 

In [None]:
# build a huge dataset with this pipeline
annotation_list, predicted_list = list(), list()
#  vbuild a dataset with all path
counter = 0
total_files2process = len(annotations_test_filepaths)
for annotation_path in annotations_test_filepaths:
  print("Proportion of processed:",round(counter/total_files2process,2))
  df_annotation, df_predicted = pipeline_generate_datasets(
                            annotation_path,
                            )
  annotation_list.append(df_annotation)
  predicted_list.append(df_predicted)
  counter += 1
# now create a df based on the list of dfs
df_annotation_all = pd.concat(annotation_list)
df_predicted_all = pd.concat(predicted_list)

Proportion of processed: 0.0
Proportion of processed: 0.09
Proportion of processed: 0.18
Proportion of processed: 0.27
Proportion of processed: 0.36
Proportion of processed: 0.45
Proportion of processed: 0.55
Proportion of processed: 0.64
Proportion of processed: 0.73
Proportion of processed: 0.82
Proportion of processed: 0.91


In [None]:
df_annotation_all.head()

Unnamed: 0,onset_time,drum_type,onset_sample,annotation_path
0,0.02,KD,441,/content/drive/My Drive/Maestria DM y KDD/Espe...
1,0.020408,HH,449,/content/drive/My Drive/Maestria DM y KDD/Espe...
2,0.29,KD,6394,/content/drive/My Drive/Maestria DM y KDD/Espe...
3,0.295918,HH,6524,/content/drive/My Drive/Maestria DM y KDD/Espe...
4,0.57,SD,12568,/content/drive/My Drive/Maestria DM y KDD/Espe...


In [None]:
df_predicted_all.head()

Unnamed: 0,onset_time,predicted_drumtype,annotation_path
0,0.02,[OTHER],/content/drive/My Drive/Maestria DM y KDD/Espe...
1,0.02,[CY],/content/drive/My Drive/Maestria DM y KDD/Espe...
2,0.02,[KD],/content/drive/My Drive/Maestria DM y KDD/Espe...
3,0.02,[OTHER],/content/drive/My Drive/Maestria DM y KDD/Espe...
4,0.02,[HH],/content/drive/My Drive/Maestria DM y KDD/Espe...


In [None]:
unlist_labels = lambda x : x[0]
df_predicted_all["predicted_drumtype"] = df_predicted_all["predicted_drumtype"].apply(unlist_labels)
df_predicted_all.head()

Unnamed: 0,onset_time,predicted_drumtype,annotation_path
0,0.02,OTHER,/content/drive/My Drive/Maestria DM y KDD/Espe...
1,0.02,CY,/content/drive/My Drive/Maestria DM y KDD/Espe...
2,0.02,KD,/content/drive/My Drive/Maestria DM y KDD/Espe...
3,0.02,OTHER,/content/drive/My Drive/Maestria DM y KDD/Espe...
4,0.02,HH,/content/drive/My Drive/Maestria DM y KDD/Espe...


In [None]:
df_predicted_all["predicted_drumtype"].unique()

array(['OTHER', 'CY', 'KD', 'HH', 'OT', 'SD', 'TT'], dtype=object)

# Once you have predictions and annotations, compute the required metrics for each drum_type

In [None]:
# criterion; this criterion is for onset detection;
# but take into account that you are passing five models, 
# for example if the HH is on second 0.02 and SD on 0.03 ; using window time
# you can recognize the HH at second 0.03 and SD at second 0.02; and this will be correct (since it is part of the same STFT / sound)
# therefore teheres no need to change sample criterion so much (0.01 is min in recognition)
seconds_criterion = 0.03
# this is for all of our songs (see it in the librosa.laod return)
sampling_rate =  int(44100/2)
# add onset_sample which if forgot
df_predicted_all["onset_sample"] = (df_predicted_all["onset_time"]*sampling_rate).astype("int")
df_predicted_all["drum_type"] = df_predicted_all["predicted_drumtype"]
#
samples_criterion = int(seconds_criterion*sampling_rate)
# all drumtypes
drum_types_list = ["KD","SD","HH","CY","OT","TT"]

# list all the metrics for each drumtype
metrics_dict_list = list()
for drum_type in drum_types_list:
  compute_metrics = eval_utils.ComputeMetrics(true_labels = df_annotation_all, predicted_labels = df_predicted_all, samples_criterion = samples_criterion, filter_drumtype = drum_type)
  metrics_dict = compute_metrics()
  metrics_dict["drum_type"] = drum_type
  metrics_dict_list.append(metrics_dict)

In [None]:
metrics_dict_list

[{'recall': 0.989749430523918,
  'precision': 0.9340136054421768,
  'f1_score': 0.9610741206738507,
  'drum_type': 'KD'},
 {'recall': 0.9496296296296296,
  'precision': 0.9091891891891892,
  'f1_score': 0.9289694985352404,
  'drum_type': 'SD'},
 {'recall': 0.8445945945945946,
  'precision': 0.7437266417512013,
  'f1_score': 0.7909577572912337,
  'drum_type': 'HH'},
 {'recall': 0.8816326530612245,
  'precision': 0.5798082346305696,
  'f1_score': 0.6995532648213374,
  'drum_type': 'CY'},
 {'recall': 0.0625,
  'precision': 0.041666666666666664,
  'f1_score': 0.05,
  'drum_type': 'OT'},
 {'recall': 0.13333333333333333,
  'precision': 0.1724137931034483,
  'f1_score': 0.15037593984962408,
  'drum_type': 'TT'}]