In [5]:
import numpy as np
import wave
import struct


class NoteDetector:
    
    def __init__(self, path):
        self.frequencies = [65.41, 69.3, 73.42, 77.78, 82.41, 87.31, 92.5, 98.0, 103.8, 110.0, 116.5, 123.5,
         130.8, 138.6, 146.8, 155.6, 164.8, 174.6, 185.0, 196.0, 207.7, 220.0, 233.1, 246.9,
         261.6, 277.2, 293.7, 311.1, 329.6, 349.2, 370.0, 392.0, 415.3, 440.0, 466.2, 493.9,
         523.3, 554.4, 587.3, 622.3, 659.3, 698.5, 740.0, 784.0, 830.6, 880.0, 932.3, 987.8,
         1047.0, 1109.0, 1175.0, 1245.0, 1319.0, 1397.0, 1480.0, 1568.0, 1661.0, 1760.0, 1865.0, 1976.0,
         2093.00, 2349.32, 2637.02, 2793.83, 3135.96, 3520.00, 3951.07,
         4186.01, 4698.63, 5274.04, 5587.65, 6271.93, 7040.00, 7902.13]
        
        self.notes = ['C2', 'C2#', 'D2', 'E2b', 'E2', 'F2', 'F2#', 'G2', 'G2#', 'A2', 'B2b', 'B2', 
         'C3', 'C3#', 'D3', 'E3b', 'E3', 'F3', 'F3#', 'G3', 'G3#', 'A3', 'B3b', 'B3', 
         'C4', 'C4#', 'D4', 'E4b', 'E4', 'F4', 'F4#', 'G4', 'G4#', 'A4', 'B4b', 'B4', 
         'C5', 'C5#', 'D5', 'E5b', 'E5', 'F5', 'F5#', 'G5', 'G5#', 'A5', 'B5b', 'B5', 
         'C6', 'C6#', 'D6', 'E6b', 'E6', 'F6', 'F6#', 'G6', 'G6#', 'A6', 'B6b', 'B6',
         'C7', 'D7', 'E7', 'F7', 'G7', 'A7', 'B7',
         'C8', 'D8', 'E8', 'F8', 'G8', 'A8', 'B8']
                
        self.output = []
        
        self.read_file(path)
        self.process_file()        
        
    
    def find_nearest(self, value):
        idx = (np.abs(self.frequencies - value)).argmin()
        return self.notes[idx]
    
    def read_file(self, path):
        sound_file = wave.open(path, 'r')
        file_length = sound_file.getnframes()    # number of audio frames

        input_sound = np.zeros(file_length)
        for i in range(file_length):
            data = sound_file.readframes(1)
            data = struct.unpack("hh", data)
            input_sound[i] = int(data[0])
            
        input_sound = np.divide(input_sound, float(2**15)) # Normalize data: (-1, 1)
        self.input_sound = input_sound
        
    def process_file(self):
        window_size = 2205    # Size of window to be used for detecting silence
        sampling_freq = 44100   # Sampling frequency of audio signal
        threshold = 600
        
        sound = np.square(self.input_sound)  # can also use np.abs
        output_frequencies = []
        dft = []
        i = 0
        j = 0
        k = 0    

        while i <= (len(sound) - window_size):
            s = 0.0
            j = 0
            while j <= window_size:
                s += sound[i + j]
                j += 1
            # detecting the silence waves
            if s < threshold:
                if (i - k) > (window_size * 4):
                    # applying fast fourier transform
                    dft = np.fft.fft(self.input_sound[k:i])
                    dft = np.argsort(dft) # indexes of sorted order
                    # find smallest index at (-1, 0, 1)
                    if dft[0] > dft[-1] and dft[1] > dft[-1]:
                        i_max = dft[-1]
                    elif dft[1] > dft[0] and dft[-1] > dft[0]:
                        i_max = dft[0]
                    else:
                        i_max = dft[1]
                    # calculating frequency
                    output_frequencies.append((i_max * sampling_freq) / (i - k))
                    k = i + 1
            i += window_size
        [self.output.append(self.find_nearest(frequency)) for frequency in output_frequencies]

            
    def get_output(self):
        return self.output
    


n = NoteDetector('melody-1.wav')
print(n.get_output())

['C2', 'C2', 'C2', 'C2', 'C2', 'F3', 'D3', 'D3', 'A3', 'E4b', 'E6b', 'D5', 'D5', 'B4b', 'B3b', 'B6b', 'B5b', 'B4b', 'E3', 'E3b', 'B3', 'B4b', 'B4b', 'B4b', 'C5#', 'B4b', 'B4b', 'B4b', 'B4b', 'B5', 'E3b', 'D3', 'A3', 'B5b', 'B3b', 'B3b', 'C5', 'B4b', 'B4b', 'B4b', 'B4b', 'B4b', 'D3', 'E3b', 'B3b', 'B4b', 'A3', 'C5', 'B4b', 'C5#', 'B4b', 'G4#', 'G4#', 'G4', 'F4#', 'B6b', 'F4#', 'A4', 'F4#', 'F5']


In [6]:
n2 = NoteDetector('melody-2.wav')
print(n2.get_output())

['C2', 'C2', 'C2', 'C2', 'C2', 'C2', 'C2', 'C2', 'E3', 'C4', 'F4', 'F4#', 'G4#', 'C3', 'G6#', 'E6b', 'E6b', 'D3', 'E3b', 'B4b', 'C5#', 'C5', 'B4', 'B4', 'C5#', 'C5#', 'G3', 'G3#', 'B3b', 'C4', 'B3b', 'C5#', 'C5', 'B4', 'B4', 'A4', 'B4b', 'C4', 'C4', 'E3b', 'C4', 'G6#', 'G6#', 'G3', 'C4', 'G6#', 'C4', 'C4', 'C3', 'F4', 'F4', 'C3', 'E6b', 'C4', 'C4#', 'C4', 'C4', 'C4#', 'C4', 'E3']
