In [5]:
# general imports
import numpy as np
import pandas as pd

# scipy imports
from scipy.fftpack import fft
from scipy.io import wavfile

# custom classes
%run classes/measure.py
%run classes/note.py

def closest_duration(duration):
        durations = np.array(list(duration_to_notes.keys()))
        idx = (np.abs(durations - duration)).argmin()
        return durations[idx]

class Music:
    
    def __init__(self, 
                 time_signature=(4, 4),
                 tempo=120,
                 ver_number="0.00"):
            
        self.time_signature = time_signature
        self.tempo = tempo
        self.unit = 60 / (tempo * 4) # Finest resolution is 16th notes
        self.unit_duration = tempo / 60
        self.ver_number = ver_number # version number of decoder
        
    def find_peaks(self, sound, separation, min_volume_level):

        # return value of peak positions and signal strength
        peaks = list()

        # initializing variables
        max_prev_i = np.argmax(sound[:separation])
        max_next_i = np.argmax(sound[separation + 1: 2 * separation]) + separation + 1
        max_prev   = sound[max_prev_i]
        max_next   = sound[max_next_i]

        for i in range(separation, len(sound) - separation - 1):

            # Determining the maximum value in the previous window
            if sound[i - 1] > max_prev:
                max_prev_i = i - 1
                max_prev   = sound[max_prev_i]
            elif i - max_prev_i > separation:
                max_prev_i = np.argmax(sound[i - separation: i - 1]) + i - separation
                max_prev   = sound[max_prev_i]

            # Determining the maximum value in the next window
            if sound[i + separation + 1] > max_next:
                max_next_i = i + separation + 1
                max_next   = sound[max_next_i]
            elif max_next_i == i:
                max_next_i = np.argmax(sound[i + 1: i + separation + 1]) + i + 1
                max_next = sound[max_next_i]

            # Determining if the current point is a peak
            if sound[i] > max_prev and sound[i] > max_next and sound[i] > min_volume_level:
                if len(peaks) == 0 or i - peaks[-1][0] > separation:
                    peaks.append((i, sound[i]))
        return peaks

    def read(self, input_path, is_wav_format=True):
        self.input_path = input_path
        if is_wav_format:
            self.sample_rate, self.raw = wavfile.read(input_path)
        self.chan1, self.chan2 = list(map(list, zip(*self.raw)))
        self.duration = len(self.raw) / self.sample_rate
        
    def get_input_path(self):
        return self.input_path
        
    def compile_music(self, separation=3000, min_volume_level=5000, max_pitch=4000, stength_cutoff=0.75, use_chan1=True):
        self.measures = list()
        
        if use_chan1:
            peaks = self.find_peaks(self.chan1, separation, min_volume_level)
            notes = self.get_notes(self.chan1, peaks, separation, max_pitch, stength_cutoff)
        notes = self.filter_groups(notes)
        notes = self.filter_nearby_times(notes)
        return notes
    
    def get_notes(self, sound, peaks, separation, max_pitch, stength_cutoff):
        notes = list()
        for peak, loudness in peaks:
            
            inspection_zone = sound[peak: peak + separation]
            fft_data = np.abs(fft(inspection_zone))

            conversion_factor = self.sample_rate / len(fft_data)
            max_signal = max(fft_data)
            resonant_freqs = (-fft_data).argsort()
            timestamp = peak / self.sample_rate

            for freq in resonant_freqs:
                signal = fft_data[freq]
                if signal < stength_cutoff * max_signal:
                    break
                if freq * conversion_factor < max_pitch:
                    note = Note(freq * conversion_factor, signal, loudness, timestamp)
                    notes.append(note.getInfo())
        notes = pd.DataFrame(notes, columns=["time", "id", "signal", "pitch", "given_pitch",
                                             "loudness", "note", "octave", "alter"])
        return notes

    # picks the loudest frequency for a certain time
    def filter_groups(self, notes):
        ret = pd.DataFrame(columns=notes.columns)
        groups = notes.groupby("time")

        for key, note in groups:
            
            if len(note) == 1:
                ret = ret.append(note)
            else:
                to_delete = list()
                index_offset = min(note.index)
                for i in range(index_offset, len(note) + index_offset):
                    for j in range(i + 1, len(note) + index_offset):
#                         if abs(note.id[i] - note.id[j]) < 2:
                        to_delete.append(i if note.loudness[i] < note.loudness[j] else j)
                ret = ret.append(note.drop(to_delete))
        return ret
    
    # checks nearby notes for validation
    def filter_nearby_times(self, notes):
        self.start_offset = notes.iloc[0].time
        notes["time"]     = notes["time"] - self.start_offset
        notes["duration"] = notes.time.shift(-1) - notes.time
        notes["duration"] = notes.duration.map(closest_duration)
        notes["typ"]      = notes.duration.map(lambda x: duration_to_notes[x]["name"])
        return notes
        
    def addMeasure(self, measure):
        self.measures.append(measure)

In [6]:
music = Music()
music.read('sounds/wav/cello_pluck/expert/bach.wav')
notes = music.compile_music()

In [7]:
notes

Unnamed: 0,time,id,signal,pitch,given_pitch,loudness,note,octave,alter,duration,typ
0,0.000000,32,1.015916e+07,96.0,98.00,9994,G,2,0,0.25,sixteenth
1,0.309729,32,6.764939e+06,96.0,98.00,5652,G,2,0,0.25,sixteenth
2,0.567375,32,4.442607e+06,96.0,98.00,7766,G,2,0,0.50,eighth
3,1.032292,47,1.215697e+07,240.0,233.08,12794,A,3,1,0.25,sixteenth
4,1.353563,46,1.823645e+07,224.0,220.00,23237,A,3,0,0.25,sixteenth
5,1.683625,47,1.391885e+07,240.0,233.08,13849,A,3,1,0.25,sixteenth
6,1.966375,51,4.247976e+06,288.0,293.66,8572,D,4,0,0.25,sixteenth
7,2.053646,42,2.476690e+06,176.0,174.61,7327,F,3,0,0.25,sixteenth
9,2.309354,47,1.231114e+07,240.0,233.08,12303,A,3,1,0.25,sixteenth
10,2.620292,51,3.037446e+06,288.0,293.66,5820,D,4,0,0.25,sixteenth
