$\newcommand{\xv}{\mathbf{x}}
\newcommand{\Xv}{\mathbf{X}}
\newcommand{\yv}{\mathbf{y}}
\newcommand{\zv}{\mathbf{z}}
\newcommand{\av}{\mathbf{a}}
\newcommand{\Wv}{\mathbf{W}}
\newcommand{\wv}{\mathbf{w}}
\newcommand{\tv}{\mathbf{t}}
\newcommand{\Tv}{\mathbf{T}}
\newcommand{\muv}{\boldsymbol{\mu}}
\newcommand{\sigmav}{\boldsymbol{\sigma}}
\newcommand{\phiv}{\boldsymbol{\phi}}
\newcommand{\Phiv}{\boldsymbol{\Phi}}
\newcommand{\Sigmav}{\boldsymbol{\Sigma}}
\newcommand{\Lambdav}{\boldsymbol{\Lambda}}
\newcommand{\half}{\frac{1}{2}}
\newcommand{\argmax}[1]{\underset{#1}{\operatorname{argmax}}}
\newcommand{\argmin}[1]{\underset{#1}{\operatorname{argmin}}}$

Basic TODO:
 - Get some audio samples
 - Figure out how we are gonna read them in
 - Figure out if we are gonna do fourier analysis or recurrent nn

In [1]:
import os
import random
import soundfile as sf
from sklearn.neural_network import MLPClassifier
from numpy.fft import rfft,rfftfreq
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def ReadAudioData(filename):
    samples,samplerate = sf.read(filename)
    return samples

In [4]:
classes = {'trumpet':0,'violin':1,'guitar':2,'piano':3,'saxaphone':4,
           0:'trumpet',1:'violin',2:'guitar',3:'piano',4:'saxaphone',}

In [5]:
def SplitSamples(samples, widthOfSlice = 2000):
    return np.reshape(samples[:len(samples)-len(samples)%widthOfSlice],(-1,widthOfSlice))

In [6]:
a = np.array([1,2,3,4,5,6,7,8,9])
SplitSamples(a,4)

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [7]:
def DeleteSamplesLowAmplitude(samples):
    threshold = np.mean(samples) * np.std(samples)
    return np.delete(samples,np.where(np.mean(samples,axis=1) < threshold),axis=0)

In [8]:
def FourierTransform(samples):
    return np.abs(rfft(samples))

In [9]:
def FileToData(filename):
    samples,samplerate = sf.read(filename)
    samples = SplitSamples(samples)
    samples = DeleteSamplesLowAmplitude(samples)
    samples = FourierTransform(samples)
    return samples

In [10]:
def GenerateData():
    trumpetTrain,trumpetTest = GenerateDataFromLabel('trumpet')
    violinTrain,violinTest = GenerateDataFromLabel('violin')
    guitarTrain,guitarTest = GenerateDataFromLabel('guitar')
    pianoTrain,pianoTest = GenerateDataFromLabel('piano')
    saxaphoneTrain,saxaphoneTest = GenerateDataFromLabel('saxaphone')
    train = np.vstack((trumpetTrain,violinTrain,guitarTrain,pianoTrain,saxaphoneTrain))
    test = np.vstack((trumpetTest,violinTest,guitarTest,pianoTest,saxaphoneTest))
    traindata = train[:,1:]
    testdata = test[:,1:]
    traintargets = train[:,0].astype(int)
    testtargets = test[:,0].astype(int)
    return traindata,traintargets,testdata,testtargets

def GenerateDataFromLabel(label):
    train = []
    test = []
    for file in os.listdir('samples/' + label):
        if(file.endswith('.wav')):
            if(random.choice([True,True,False])):
               train += FileToData('samples/' + label + '/' + file).tolist()
            else:
               test += FileToData('samples/' + label + '/' + file).tolist()
    train = np.array(train)
    test = np.array(test)
    train = np.insert(train,0,classes[label],axis=1)
    test = np.insert(test,0,classes[label],axis=1)
    return train,test

In [11]:
def CreateModel():
    print('Generating data...')
    trainingData,trainingTargets,testingData,testingTargets = GenerateData()
    print('Finished generating data. Training model...')
    classifier = MLPClassifier(solver='lbfgs',hidden_layer_sizes=(200,50))
    classifier.fit(trainingData,trainingTargets)
    print('Finished training model. Gathering statistics...')
    predicted = classifier.predict(testingData)
    totalNumberCorrect = np.sum(predicted == testingTargets)
    print('total % correct on testing data:',totalNumberCorrect / len(testingTargets))
    predictedVsTargets = np.vstack((predicted,testingTargets)).T
    for c in range(5):
        subset = predictedVsTargets[predictedVsTargets[:,0] == c]
        print('\t' + classes[c].upper())
        print(classes[c],'% correct:',
              "%.2f" % (100*len(subset[subset[:,0]==subset[:,1]]) / len(subset)))
        print('% guessed piano:',
              "%.2f" % (100*len(subset[subset[:,1]==classes['piano']])/len(subset)))
        print('% guessed violin:',
              "%.2f" % (100*len(subset[subset[:,1]==classes['violin']])/len(subset)))
        print('% guessed trumpet:',
              "%.2f" % (100*len(subset[subset[:,1]==classes['trumpet']])/len(subset)))
        print('% guessed guitar:',
              "%.2f" % (100*len(subset[subset[:,1]==classes['guitar']])/len(subset)))
        print('% guessed saxaphone:',
              "%.2f" % (100*len(subset[subset[:,1]==classes['saxaphone']])/len(subset)))

    return classifier

In [None]:
def ClassifyAudioFile(classifier,filename):
    vectors = FileToData(filename)
    if len(vectors) < 1:
        return -1
    predicted = classifier.predict(vectors)
    mostPredicted = np.argmax(np.bincount(predicted))
    return classes[mostPredicted]

In [None]:
classifier = CreateModel()

Generating data...
Finished generating data. Training model...

In [27]:
print('Violin?',ClassifyAudioFile(classifier,'samples/violin/a2.wav'))
print('Guitar?',ClassifyAudioFile(classifier,'samples/guitar/353492__matteshaus__guitchord1.wav'))
print('Trumpet?',ClassifyAudioFile(classifier,'samples/trumpet/357326__mtg__trumpet-b3-bad-richness.wav'))

Violin? violin
Guitar? guitar
Trumpet? trumpet


In [40]:
print('Piano?',ClassifyAudioFile(classifier,'samples/piano/39206__jobro__piano-ff-058.wav'))

Piano? trumpet


In [20]:
classifier = CreateModel()

Generating data...
Finished generating data. Training model...
Finished training model. Gathering statistics...
total % correct on testing data: 0.945274576593
	TRUMPET
trumpet % correct: 96.09
% guessed piano: 0.25
% guessed violin: 2.24
% guessed trumpet: 96.09
% guessed guitar: 0.30
% guessed saxaphone: 1.12
	VIOLIN
violin % correct: 97.49
% guessed piano: 0.07
% guessed violin: 97.49
% guessed trumpet: 0.90
% guessed guitar: 0.48
% guessed saxaphone: 1.06
	GUITAR
guitar % correct: 88.99
% guessed piano: 6.19
% guessed violin: 2.20
% guessed trumpet: 1.62
% guessed guitar: 88.99
% guessed saxaphone: 1.00
	PIANO
piano % correct: 19.72
% guessed piano: 19.72
% guessed violin: 0.66
% guessed trumpet: 1.60
% guessed guitar: 66.10
% guessed saxaphone: 11.92
	SAXAPHONE
saxaphone % correct: 91.07
% guessed piano: 1.45
% guessed violin: 1.55
% guessed trumpet: 1.18
% guessed guitar: 4.75
% guessed saxaphone: 91.07
