# Initializaton

In [0]:
from google.colab import drive

# System
import os
from pathlib import Path
import random
from shutil import copyfile
from datetime import datetime
import pandas as pd
import numpy as np

# Sound
import librosa

# AI
import fastai
from fastai.vision import *

# Visual
import matplotlib.pyplot as plt
from IPython.display import Audio,display
import librosa.display

#from itertools import islice
#from scipy.fftpack import fft
#import mir_eval
#from scipy.signal import get_window
#import json

In [0]:
drive.mount('/content/drive')

# Global variables and functions

In [0]:
ROOT = Path('/content/drive/My Drive/Colab Notebooks/Chord Recognizer')

CHORD_SPECIFICTS = { 
  'Am': [0,2,2,1,0],
  'G' : [3,2,0,0,3,3],
  'A' : [0,2,2,2,0],
  'C' : [3,2,0,1,0],
  'D' : [0,2,3,2],
  'Dm': [0,2,3,1],
  'E' : [0,2,2,1,0,0],
  'F' : [1,3,3,2,1,1],
  'H7': [0,2,1,2,0,2],
  'Em': [0,2,2,0,0,0]
}

SR = 16000
N_FFT = 1000
HOP_LENGHT = 250
N_MELS = 64
F_MIN = 20
F_MAX = 2000
IMG_SIZE=64

In [0]:
# Ploting
def plot_signal(signal, sr, figsize=(12,3), title = None):
  plt.figure(figsize=figsize)
  librosa.display.waveplot(signal,sr=sr)
  plt.xlabel('Time (s)')
  plt.ylabel('Magnitude')
  plt.title(title)
  plt.show()

# Clip processing
def get_chord_instances(clip, count, sr, bpm):
  l = sr * 60 // bpm # chord instance length
  return [clip[i*l:(i+1)*l] for i in range(0, count)]

# Spectrograms
def signal_to_log_mel_spec(signal, sr = SR):    
    mel_spec = librosa.feature.melspectrogram(signal,
                                              sr=sr,
                                              n_fft=N_FFT, 
                                              hop_length=HOP_LENGHT, 
                                              n_mels=N_MELS,
                                              power=1.0, 
                                              fmin=F_MIN,
                                              fmax=F_MAX)
    
    return librosa.amplitude_to_db(mel_spec, ref=np.max)
    
def save_mel_spec(mel_spec, dst_path, fname):
  dst_fname = dst_path / (fname + '.png')
  plt.imsave(dst_fname, mel_spec)

# Clips generation
def who_can_play(pitches,
                 bad = {2,4,6,7,8,10,11,12,13,15,16,19,20,23,24,25,27,28,30,32,35,36},
                 data_path=ROOT/'structurized-nsynth'):
  result = dict()
  instrument_classes = os.listdir(data_path)
  instrument_classes = [i for i in instrument_classes if int(i) not in bad]

  for i in instrument_classes:
    pitches_dict = dict()
    for p in pitches:
      pitch_dir = data_path/i/str(p)
      if not os.path.exists(pitch_dir):
        break
      pitches_dict[p] = set([int(os.path.splitext(f)[0]) for f in os.listdir(pitch_dir)])
    if len(pitches_dict) == len(pitches):
      result[int(i)] = pitches_dict

  return result 

def gen_chord(pitches_and_velocities,
              instrument,
              seconds=1.0,
              sr = SR,
              start_pad = 1200,
              gaps=[320]*5,
              data_path=ROOT/'structurized-nsynth'):

  strings = [librosa.load(f,sr=None)[0]
            for f in [data_path/'{}/{}/{}.wav'.format(instrument,p,v)
               for (p,v) in pitches_and_velocities]]

  result = np.zeros((sr * 4 + np.sum(gaps)))

  for i in range(len(strings)):
    c = np.pad(strings[i], (np.sum(gaps[:i],dtype=int), np.sum(gaps[i:],dtype=int)), 'constant', constant_values=(0,0))
    result = np.add(result,c)
  result = np.pad(result, (start_pad, 0), 'constant',constant_values=(0,0))
  return result[:int(sr*seconds)]


def gen_some_chords(chord_class,
                    count,
                    seconds = 1.0,
                    sr = SR,
                    flip=False,
                    pad_max  = 0.1875,
                    gaps_min = 0.0125,
                    gaps_max = 0.0375,
                    dest_root=None,
                    n_fft = None,
                    hop_length = None,
                    n_mels = None,
                    save=False):
  specs = CHORD_SPECIFICTS[chord_class]
  standard = np.array([40,45,50,55,59,64])[-len(specs):]
  pitches = standard + specs
  
  if flip:
    pitches = np.flip(pitches)
  
  instruments = who_can_play(pitches)
  instruments_classes = list(instruments)
  instruments_classes.sort()
  indicies = np.resize(instruments_classes,count)
  
  for c in range(count):
    i = indicies[c]
    pitches_and_velocities = [(p,random.choice(tuple(v))) for p,v in instruments[i].items() ]

    gaps_count = len(pitches)-1
    gaps_min_samples = int(sr*gaps_min)
    gaps_max_samples = int(sr*gaps_max)
    pad_max_samples  = int(sr*pad_max)
    
    gaps = [random.randint(gaps_min_samples,gaps_max_samples) for i in range(gaps_count)]
    pad  = random.randint(0,pad_max_samples)
    chord = gen_chord(pitches_and_velocities,i,start_pad=pad,gaps=gaps,seconds=seconds)

    if save :
      dest_dir = dest_root/chord_class
      if not os.path.exists(dest_dir):
        dest_dir.mkdir()
      fname = '{}_{}'.format(len(os.listdir(dest_dir)),i)
      mel_spec = signal_to_log_mel_spec(chord, SR)

      save_mel_spec(mel_spec,dest_dir, fname)
    else :
      plot_signal(chord,SR,(12,1.5),title = i)
      display(Audio(chord,rate=16000))

# Recorded data sandbox

Goal of recorded data sandbox is to find appropriate start padding for recorded clip and to check visualy if the chord instance have appropriate shape.

In this case every chord recorded 60 times with sr = 48 kHz and bpm = 60. Padding approx. = 2s

In [0]:
df = pd.read_csv(ROOT/'recorded/data.csv')
chord_classes = df['chord'].to_list()
clips = [ librosa.load(ROOT/'recorded/audio'/'{}.mp4'.format(chord),sr=SR)[0] for chord in chord_classes ]

In [0]:
#paddings = {'A':[1.8],'Am':[1.8],'C':[1.85],'D':[1.82],'Dm':[1.9],'E':[1.85],'Em':[1.8],'F':[1.8],'G':[1.8],'H7':[1.88]}
paddings = { v['chord'] : v['pad']  for v in df.to_dict('index').values() }

In [0]:
# Checked : A, Am, C, D, Dm, E, F, G, H7, Em

i = 0
clip = clips[i]
chord_class = chord_classes[i]
start_in_secs = paddings[chord_class]
start = int(start_in_secs * SR)
clip = clip[start:]

chord_instances = get_chord_instances(clip, count=60, sr=SR, bpm=60)

plot_signal(clip,sr=SR,figsize=(20,4),title=chord_class)

In [0]:
for j in range(60):
  chord_instance = chord_instances[j]
  plot_signal(chord_instance,sr=SR,figsize=(6,1.5), title=j)

In [0]:
j = 14
chord_instance = chord_instances[j]

plot_signal(chord_instance,SR,(12,1.5),chord_class)
Audio(chord_instance,rate=SR)

In [0]:
librosa.output.write_wav(ROOT/'chord_instance.wav', chord_instance, SR)

# Recorded data to spectrograms

In [0]:
df = pd.read_csv(ROOT/'recorded/data.csv')
REC_ROOT = ROOT/'recorded/spectrograms-64x64'

chord_classes = df['chord'].to_list()
clips = [ librosa.load(ROOT/'recorded/audio'/'{}.mp4'.format(chord),sr=SR)[0] for chord in chord_classes ]
paddings = { v['chord'] : v['pad']  for v in df.to_dict('index').values() }

In [0]:
for chord_class in chord_classes:
  chord_specs_path = REC_ROOT/chord_class
  chord_specs_path.mkdir(parents=True, exist_ok=True)

In [0]:
for i in range(len(clips)): 
  clip = clips[i]
  chord_class = chord_classes[i]

  start_in_secs = paddings[chord_class]
  start = int(start_in_secs * SR)
  clip = clip[start:]

  chord_specs_path = REC_ROOT/chord_class
  chord_instances = get_chord_instances(clip,60,SR,60)

  for chord_instance in chord_instances:
    fname = str(len(os.listdir(chord_specs_path)))
    mel_spec = signal_to_log_mel_spec(chord_instance, SR)
    save_mel_spec(mel_spec,chord_specs_path, fname)

# Generate spectrograms


In [0]:
#bad = [2,4,6-8,10-13,15-16,19-20,23-25,27-28,30,32,35-36]
GEN_ROOT = ROOT/'generated/spectrograms-64x64'

In [0]:
for c in CHORD_SPECIFICTS.keys():
  print(c,datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
  gen_some_chords(c, 140, dest_root=GEN_ROOT,save=True)
  print('Flip', c, datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
  gen_some_chords(c, 140, flip=True, dest_root=GEN_ROOT,save=True)
print("END",datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

# Mixing datasets


In [0]:
data_bunch = ImageDataBunch.from_folder(ROOT/'recorded/spectrograms_generated-64x64',train=".",valid_pct=0.3,classes=classes)
data_test = ImageDataBunch.from_folder(ROOT/'recorded/spectrograms-64x64',train=".",valid_pct=0.3,classes=classes)
data_bunch.valid_dl = data_test.valid_dl
data_test.valid_dl = data_test.train_dl
data_test.test_dl = DeviceDataLoader.create({})

In [0]:
rec_train_pct = 0.25
rec_valid_pct = 0.25
gen_test_pct  = 0.3

REC_COUNT_PER_CLASS = 60
GEN_COUTN_PER_CLASS = 280

GEN_ROOT = ROOT/'data/spectrograms_generated-64x64'
REC_ROOT = ROOT/'data/spectrograms-64x64'

for c in classes:
  fnames = ["%d.png" % i for i in range(REC_COUNT_PER_CLASS)]
  random.shuffle(fnames)
  
  train_count = int(REC_COUNT_PER_CLASS*rec_train_pct)
  valid_count = int(REC_COUNT_PER_CLASS*rec_valid_pct)

  train = fnames[: train_count]
  valid = fnames[train_count : train_count+valid_count]
  test = fnames[train_count+valid_count :]

  for data, dirname in [(train,"train"),(valid,"valid"),(test,"test")]:
    Path('tmp/recognizer/%s/%s' % (dirname, c)).mkdir(parents=True, exist_ok=True)
    for fname in data: 
      copyfile(REC_ROOT/c/fname,
      'tmp/recognizer/%s/%s/%s' % (dirname,c,fname))
  
  test_count = int(GEN_COUTN_PER_CLASS*gen_test_pct)
  fnames = os.listdir(GEN_ROOT/c)
  random.shuffle(fnames)
  train = fnames[:test_count]

  for fname in train: 
    copyfile(GEN_ROOT/c/fname,
              'tmp/recognizer/train/%s/%s' % (c,fname))

# Training


In [0]:
classes=['A', 'Am', 'C', 'D', 'Dm', 'E', 'Em', 'F', 'G', 'H7' ]

In [0]:
data_bunch = ImageDataBunch.from_folder(ROOT/'data/spectrograms_generated-64x64', valid_pct=0, bs=32,size=IMG_SIZE)

In [0]:
data_bunch.show_batch(3)
data_bunch.classes

In [0]:
learn = cnn_learner(data_bunch, models.resnet18, metrics=accuracy)
learn.freeze()
learn.fit_one_cycle(2)

In [0]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()

In [0]:
learn.unfreeze()
learn.lr_find()
learn.recorder.plot()

In [0]:
learn.fit_one_cycle(3, max_lr=slice(5e-4, 5e-3))

In [0]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()

In [0]:
learn.validate(data_test.valid_dl,metrics=[accuracy])

In [0]:
learn.save('13')
learn.export()

In [0]:
data_test = ImageDataBunch.from_folder('tmp/recognizer',train="train",valid='test',bs=32)
#data_test = ImageDataBunch.from_folder(ROOT/'recorded',train="spectrograms_generated-2000HZ",valid='spectrograms-2000HZ',classes=classes)

In [0]:
learn = cnn_learner(data_test, models.resnet18, metrics=accuracy)
learn.load(ROOT/'recorded/spectrograms_generated-2000HZ/models/13');

In [0]:
learn.validate(data_test.valid_dl,metrics=[accuracy])