<a href="https://colab.research.google.com/github/jwang44/Plucking-Style-Detection/blob/main/Exps_on_subset_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Preparation

In [None]:
!pip install essentia

In [11]:
import numpy as np
import pickle
import glob, os

from collections import Counter
from statistics import stdev

import essentia
from essentia.standard import *

## String number estimation using Dataset1
Use Dataset1 (non-chord portion). Monophonic string number estimation.


#### Feature extraction on single note recordings and save features and labels to pickle

In [9]:
w = Windowing(type = 'hann')
spectrum = Spectrum()
mfcc = MFCC()
spectral_peaks = SpectralPeaks()

In [None]:
# iterate over all files in dataset1 (non-chord portion)
AUDIO_DIR = '/content/drive/MyDrive/IDMT-SMT-GUITAR_V2/dataset1/*[!hords]/audio/*.wav'

# parse dataset1
i = 0
data = []
string_labels = []
pitch_labels = []
fret_labels = []

for audio_file in glob.iglob(AUDIO_DIR):
  # G53-40100-1111-00001.wav
  label = os.path.basename(audio_file).split('-')[1]  # 40100
  pitch_label = label[0:2] # 40
  string_label = label[2] # 1
  fret_label = label[3:] # 00

  audio = MonoLoader(filename=audio_file)()
  pool = essentia.Pool()
  for frame in FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True):
      _, mfcc_coeffs = mfcc(spectrum(w(frame)))
      # spectral_peaks(spectrum(w(frame)))
      pool.add('mfcc', mfcc_coeffs)

  # compute statistics of the frame-level features
  aggr_pool = PoolAggregator(defaultStats = ['min', 'max', 'median', 'mean', 'var', 'skew', 'kurt'])(pool)

  feature = np.array([])
  for key in aggr_pool.descriptorNames():
    feature = np.concatenate((feature, aggr_pool[key]))
  
  data.append(feature)
  string_labels.append(string_label)
  pitch_labels.append(pitch_label)
  fret_labels.append(fret_label)

  i+=1
  if i%100==0:
    print(i)


In [13]:
print(len(data))
print(len(string_labels))
print(len(pitch_labels))
print(len(fret_labels))

312
312
312
312


In [14]:
print(Counter(string_labels))
print(Counter(pitch_labels))
print(Counter(fret_labels))

Counter({'1': 52, '2': 52, '5': 52, '6': 52, '4': 52, '3': 52})
Counter({'51': 12, '50': 12, '52': 12, '64': 12, '67': 12, '56': 12, '55': 12, '61': 12, '66': 12, '60': 12, '65': 12, '59': 12, '57': 12, '62': 12, '47': 8, '49': 8, '48': 8, '46': 8, '45': 8, '53': 8, '63': 8, '54': 8, '68': 8, '69': 8, '71': 8, '70': 8, '58': 8, '42': 4, '40': 4, '41': 4, '43': 4, '44': 4, '72': 4, '74': 4, '73': 4, '75': 4, '76': 4})
Counter({'07': 24, '11': 24, '02': 24, '09': 24, '10': 24, '08': 24, '01': 24, '05': 24, '00': 24, '06': 24, '12': 24, '03': 24, '04': 24})


In [15]:
with open('/content/drive/MyDrive/dataset1_pkl/MFCC.pkl', 'wb') as file:
    pickle.dump(data, file)
with open('/content/drive/MyDrive/dataset1_pkl/STR_LABEL.pkl', 'wb') as file:
    pickle.dump(string_labels, file)
with open('/content/drive/MyDrive/dataset1_pkl/PITCH_LABEL.pkl', 'wb') as file:
    pickle.dump(pitch_labels, file)

#### Load features and labels

In [33]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC, LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import KFold, train_test_split, GridSearchCV
from sklearn.utils import shuffle
from statistics import stdev

In [16]:
with open('/content/drive/MyDrive/dataset1_pkl/MFCC.pkl', 'rb') as file:
    data = pickle.load(file)
with open('/content/drive/MyDrive/dataset1_pkl/STR_LABEL.pkl', 'rb') as file:
    string_labels = pickle.load(file)

In [18]:
data = np.vstack(data)
le = LabelEncoder()
string_labels = le.fit_transform(string_labels) # convert category from string to numerical

#### Train monophonic string number classifier

In [62]:
# split into train and test
X = data
y = string_labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

In [None]:
model = make_pipeline(StandardScaler(), SVC())
parameters = {
    'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 
    'svc__C':[0.2, 1, 5], 
    'svc__gamma': ['scale', 'auto']
    }
clf = GridSearchCV(model, parameters, n_jobs=-1, refit=True, cv=10, return_train_score=True)
clf.fit(X_train, y_train)

In [None]:
clf.cv_results_

In [54]:
clf.score(X_test, y_test)

0.9365079365079365

In [52]:
clf.best_params_

{'svc__C': 5, 'svc__gamma': 'scale', 'svc__kernel': 'rbf'}

In [63]:
# Sanity check use normal Kfold
X = X_train
y = y_train
model = make_pipeline(StandardScaler(), SVC(C=5, gamma='scale'))
kf = KFold(n_splits=10, shuffle=True, random_state=42)
train_accus = []
val_accus = []
for train_index, val_index in kf.split(X):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]
    model.fit(X_train, y_train)
    train_accus.append(model.score(X_train, y_train))
    val_accus.append(model.score(X_val, y_val))
train_accus = np.array(train_accus)
val_accus = np.array(val_accus)
print("-------------Monophonic String Number Classification---------------")
print("train accu: ", train_accus.mean(), "Std dev: ", stdev(train_accus))
print("val accu: ", val_accus.mean(), "Std dev: ", stdev(val_accus))
print("test accu: ", model.score(X_test, y_test))

-------------Monophonic String Number Classification---------------
train accu:  1.0 Std dev:  0.0
val accu:  0.9756666666666666 Std dev:  0.02871958939147313
test accu:  0.9365079365079365


## Monophonic Pitch Estimation

#### Estimate pitch on dataset1 single notes

In [67]:
with open('/content/drive/MyDrive/dataset1_pkl/PITCH_LABEL.pkl', 'rb') as file:
    pitch_labels = pickle.load(file)

In [106]:
AUDIO_DIR = '/content/drive/MyDrive/IDMT-SMT-GUITAR_V2/dataset1/*[!hords]/audio/*.wav'
from essentia.standard import MultiPitchKlapuri, PitchMelodia, PitchYin
i=0
correct = 0

for audio_file in glob.iglob(AUDIO_DIR):
  # pitch label in str
  pitch_label = int(pitch_labels[i])
  audio = MonoLoader(filename=audio_file)()
  mp = MultiPitchKlapuri()
  # pitchyin = PitchYin()
  freqs = mp(audio) # this is a list of arrays

  notes = []
  for freq_array in freqs:
    note_array = np.round(12*np.log2(freq_array/440)+69)
    note_tuple = tuple(note_array)
    notes.append(note_tuple)

  pitch_pred = max(set(notes), key=notes.count)
  if pitch_label == pitch_pred[0]:
    correct+=1

  i+=1
  if i%50==0:
    print(i)

print("Pitch Estimation accuracy: ", correct/i)


50
100
150
200
250
300
Pitch Estimation accuracy:  0.9935897435897436
