In [None]:
import numpy as np
import matplotlib.pyplot as plt

import zipfile #read files
import os #go to directories
import json #read the .json filetype data

from scipy import signal #use specterogram function
from scipy.io import wavfile #read .wav filetype data

from sklearn.preprocessing import StandardScaler #scale/fit the data
from sklearn.model_selection import ShuffleSplit #randomize training and testing data /couldn't find any othe way for numpy arrays
from sklearn.neural_network import MLPClassifier #model: neural network 

In [None]:
# The function spectrogram reads the .wav file and returns basic values required for the interpretation.
def spectrogram_values(file, filteri):
    
    rate, sample = wavfile.read(file)
    sample_frequencies, segment_times, spectrogram = signal.spectrogram(sample, fs=rate, nperseg=1024)
    
    intensity = np.amax(spectrogram, axis=0)
    chosen = np.array(intensity > filteri)

    return rate, sample, sample_frequencies, segment_times[chosen], spectrogram[:, chosen], intensity[chosen]

In [None]:
#The function midi_to_note converts a given MIDI value to the corresponding note.
def midi_to_note(midi):
    if (midi>=21 and midi<=108):
        if (midi-24 <= 0):
            octave = 0
        else:
            octave = int((midi-24)/12) + 1
        noten = (midi-24)%12
        if noten == 0:
            note = "C"
        elif noten == 1:
            note = "C#/Db"
        elif noten == 2:
            note = "D"
        elif noten == 3:
            note = "D#/Eb"
        elif noten == 4:
            note = "E"
        elif noten == 5:
            note = "F"
        elif noten == 6:
            note = "F#/Gb"
        elif noten == 7:
            note = "G"
        elif noten == 8:
            note = "G#/Ab"
        elif noten == 9:
            note = "A"
        elif noten == 10:
            note = "A#/Bb"
        elif noten == 11:
            note = "B"
        note = note + str(octave)
    else:
        note = "no"
    
    return note

In [None]:
#The function prepare prepares the raw data for interpretation.
def prepare(file, sample, pitch):
    _, _, sample_frequencies, segment_times, spectrogram, intensity = spectrogram_values(file, 1e6)
    spectrogram = spectrogram.transpose()
    
    #This is a default structure I saw on the internet.
    for time_bucket in spectrogram:
        if pitch is not None:
            pitch.append(value['pitch'])
        sample.append(time_bucket)
        

In [None]:
#Reading the data in the .json file
json_file = open('nsynth-test/examples.json').read()
data = json.loads(json_file)

#Creating sample and pitch arrays and preparing them to be used.
sample = []
pitch = []

for key, value in data.items():
    prepare('/Users/sedef/Desktop/Final Project/nsynth-test/audio/' + key + '.wav', sample, pitch)
     
scaler = StandardScaler().fit(sample)
sample = scaler.transform(sample)

#Randomizing the data so the test and train sets will be "shuffled".
shuffle_split = ShuffleSplit(n_splits=1, test_size=.3, random_state=0)

X_train = np.array(sample)
y_train = np.array(pitch)
X_test = np.array(sample)
y_test = np.array(pitch)

In [None]:
#Creating the model (this part takes some time)
mlp = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)

#Evaluating the accuracy of the model
print("Accuracy of the model (Training): ",str(mlp.score(X_train, y_train)))
print("Accuracy of the model (Training): ",str(mlp.score(X_test, y_test)))

In [None]:
#Making a prediction
prediction = []
# prepare('/Users/sedef/Desktop/Final Project/nsynth-test/audio/keyboard_electronic_001-057-100.wav', prediction, None)
# prepare('/Users/sedef/Desktop/Final Project/nsynth-test/audio/organ_electronic_001-090-100.wav', prediction, None)
# prepare('/Users/sedef/Desktop/Final Project/nsynth-test/audio/vocal_synthetic_003-105-100.wav', prediction, None)
# prepare('/Users/sedef/Desktop/Final Project/nsynth-test/audio/brass_acoustic_006-030-075.wav', prediction, None)
prepare('/Users/sedef/Desktop/Final Project/nsynth-test/audio/bass_synthetic_033-022-050.wav', prediction, None)

scaled = scaler.transform(prediction)
scaled = np.array(scaled)

midi = mlp.predict(scaled)
note = midi_to_note(midi[0])
if note=="no":
    print("Value outside of the interval (24,108)")
else:
    print("MIDI Value: ",midi[0])
    print("Corresponding Note: ", midi_to_note(midi[0]))