In [10]:
from midiutil.MidiFile import MIDIFile
from itertools import combinations
from time import sleep
import subprocess
import numpy as np
import soundfile as sf
import os
import sounddevice as sd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import csv

In [11]:
# Functions for onset detecti    midi_instructions = track, channel, time, duration,volume[0], tempo[0]on

def rcoeff_(snippet):
    return np.corrcoef(snippet, np.arange(len(snippet)))[0, 1]

rcoeff = np.vectorize(rcoeff_, signature='(n)->()')

def divide_chunks(signal, intervals, snippet_size):
# yield successive n-sized chunks from l.
    for i in range(0, len(signal) - snippet_size, intervals):
        # looping till the end of the array
        yield signal[i:i + snippet_size]
        
def max_to_one(arr):
    '''Scale the maximum of an array to one'''
    return np.divide(arr, np.max(np.absolute(arr)))

def detect_onset_given_entire_signal(signal):
    '''
    Onset detection function. Could be replaced with a better one.
    Returns two signals, 
    - a short one (fixed at length 4096) for a standard note evaluation algorithm, 
    - a long one (usually 3*4096) for making spectrograms and applying machine learning algorithms like CNN.
    (Why not standardise them, return the longer one instead?)
    Onset detection is carried out every 32 points so we do not have to calculate the r-value of 4096 points at every point.
    '''
#     print(len(signal))
    signal_cs = np.cumsum(np.absolute(signal))
    signal_cs = np.add(signal_cs, np.random.randn(len(signal_cs)))
#     print(len(signal_cs))
    signal_chunk_sum = signal_cs[4096:] - signal_cs[:-4096]
    snippets = np.array(list(divide_chunks(signal_chunk_sum, 32, 4096)))
    signal_rcoeff = rcoeff(snippets)

#     print("loc")
#     print(signal_rcoeff)
#     print(np.where(signal_rcoeff>0.8)[0][0])
    location = np.where(signal_rcoeff>0.8)[0][0]
    start_pt, end_pt = location*32+7168, location*32+7168+4096
    map_start_pt = location*32+7168-3*4096
    if map_start_pt < 0:
        map_start_pt = 0
        
    return signal[start_pt:end_pt], signal[map_start_pt:end_pt]

In [12]:
def generate_midi_file(com,midi_instructions):
    # generate and save midifile
    mf = MIDIFile(len(com)+1)  # don't know why need one extra
    
    track, channel, time, duration, volume, tempo = midi_instructions
    mf.addTempo(track,time,tempo)
    for i in range(len(com)):
        mf.addNote(track, channel, com[i], time, duration, volume)

    code = []
    for num in com:
        code.append(num)
    while len(code) < 4:
        code.append("x")

    filename = "C4_{}_{}_{}_{}".format(code[0], code[1], code[2], code[3])
    # first argument not necessary - convention to be updated
    with open("midifile_sch/{}.mid".format(filename), "wb") as outf:
        mf.writeFile(outf)
        
    return filename

In [13]:
def convert_midi_to_signal(filename,filename_wav,num):
    '''
    Given filename, create soundfile.
    It uses fluidsynth to convert the midifile from the directory into a soundfile in another directory.
    Installation of fluidsynth is necessary to run this.
    https://github.com/FluidSynth/fluidsynth/wiki/BuildingWithCMake
    returns mono signal - could we use stereo information for evaluation someday?
    '''
    soundfont_dir = "~/soundfonts/GeneralUser\ GS\ v1.471.sf2"
    subprocess.run("fluidsynth -F soundfile_sch/{}.wav {} midifile_sch/{}.mid"
                   .format(filename_wav,soundfont_dir,filename),shell=True)
    sleep(0.2)
    audio,sr = sf.read("soundfile_sch/{}.wav".format(filename_wav))
    if type(audio[0]) != float or type(audio[0]) != int: 
        # if stereo (expected)
        signal = audio[:,0]  
    else:    
        # if mono (but I have yet to try this out)
        signal = audio[:]
    assert (signal[0] != float or signal[0] != int)
    return signal

In [14]:
def add_noise(signal):
    noise = 0.01 * np.array(np.random.randn(len(signal)))
    return np.add(signal, noise)

In [15]:
def save_array(filename, signal_to_map, num):
    fmin = librosa.core.note_to_hz("B3")
    cqt_array = librosa.cqt(signal_to_map, sr=44100, hop_length=2**7, bins_per_octave=36, fmin=fmin, n_bins=108)
    CQT = librosa.magphase(cqt_array)[0]
    output = ""
    for i in list(CQT):
        for j in list(i):
            output += str(j)
            output += ","
    with open("database_{}.csv".format(num), "a") as file:
        writer = csv.writer(file, quoting=csv.QUOTE_MINIMAL, delimiter=" ")
        writer.writerow(filename + "," + output)
    file.close()

In [16]:
def process(comb, midi_instructions, num):
    filename = generate_midi_file(comb, midi_instructions)
    filename_wav = filename + "_{}_{}".format(midi_instructions[-2], midi_instructions[-1])
    signal = convert_midi_to_signal(filename, filename_wav, num)
    signal_to_ayse, signal_to_map = detect_onset_given_entire_signal(signal)
    save_array(filename, signal_to_map, num)

In [20]:
def test_generate_sample(com = None):
    scales = range(5, 6)
    no_notes = range(2, 5)
    
    channel = 0
    time = 0.5
    track = 1
    duration = 2
    volume = [60, 120]
    tempo = [80, 120]
    
    count = 0
    if com == None:
            for j in no_notes:
                comb = [list(x) for x in combinations(range(60, 84), j)]
                for i in comb:
                    for x in volume:
                        for y in tempo:
                            midi_instructions = track, channel, time, duration, x, y
                            process(i, midi_instructions, int(count/100))
                            count += 1
    else:
        midi_instructions = track, channel, time, duration,volume[0], tempo[0]
        process(com, midi_instructions, int(count/100))


In [286]:
test_generate_sample(com=[60,65])

In [3]:
with open("database.csv", "r") as file:
    reader = csv.reader(file, delimiter=",") 
    data = []
    label = []
    for row in reader:
        print(row)

['C 4 _ 6 0 _ 6 5 _ x _ x ', ' 5 . 1 8 4 8 2 6 3 9 0 9 3 e - 0 5 ', ' 5 . 1 7 5 9 6 5 8 4 0 8 7 e - 0 5 ', ' 5 . 1 5 1 6 2 6 9 2 6 4 4 e - 0 5 ', ' 5 . 1 1 2 5 6 2 4 8 1 4 3 e - 0 5 ', ' 5 . 0 5 9 9 3 8 6 7 1 2 1 e - 0 5 ', ' 4 . 9 9 5 2 4 1 1 9 3 5 2 e - 0 5 ', ' 4 . 9 2 0 1 5 6 3 9 0 0 3 e - 0 5 ', ' 4 . 8 3 6 4 3 6 8 8 9 0 3 e - 0 5 ', ' 4 . 7 4 5 7 6 4 4 7 3 8 5 e - 0 5 ', ' 4 . 6 4 9 6 2 6 8 1 7 0 5 e - 0 5 ', ' 4 . 5 4 9 2 1 9 6 1 6 4 3 e - 0 5 ', ' 4 . 4 4 5 3 8 6 4 0 3 2 7 e - 0 5 ', ' 4 . 3 3 8 6 0 5 6 8 6 5 6 e - 0 5 ', ' 4 . 2 2 9 0 2 6 7 1 7 4 3 e - 0 5 ', ' 4 . 1 1 6 5 4 6 0 1 0 6 4 e - 0 5 ', ' 4 . 0 0 0 9 1 8 3 5 4 3 6 e - 0 5 ', ' 3 . 8 8 1 8 8 5 4 8 4 0 8 e - 0 5 ', ' 3 . 7 5 9 3 0 4 5 3 9 9 1 e - 0 5 ', ' 3 . 6 3 3 2 6 4 4 5 6 5 7 e - 0 5 ', ' 3 . 5 0 4 4 2 0 7 0 7 8 5 e - 0 5 ', ' 3 . 3 8 6 9 0 0 6 0 1 6 3 e - 0 5 ', ' 3 . 4 9 9 4 7 1 0 6 0 9 5 e - 0 5 ', ' 2 . 7 5 6 5 5 1 0 8 3 0 9 e - 0 5 ', ' 2 . 5 9 2 3 3 1 9 2 7 5 9 e - 0 5 ', ' 2 . 8 0 9 9 3 5 7 8 6 1 6 e - 0 5

In [21]:
 test_generate_sample()