### Machine Learning for Audio data
##### Pushkar Sheth
##### https://phsheth.github.io

#### Load the Python libraries

In [None]:
from __future__ import print_function, division

import thinkdsp
import thinkplot

import numpy
import numpy as np
import matplotlib.pyplot as plt

from time import perf_counter
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.multiclass import OneVsRestClassifier

%matplotlib inline

#### Load the audio data of Mixer Grinder captured using mobile phone

In [None]:
mixer_speed_1_raw = thinkdsp.read_wave('mixer_speed_1.wav')
mixer_speed_2_raw = thinkdsp.read_wave('mixer_speed_2.wav')
mixer_speed_3_raw = thinkdsp.read_wave('mixer_speed_3.wav')

#### Play the three speeds of Mixer Grinder audio files

In [None]:
mixer_speed_1_raw.make_audio()

In [None]:
mixer_speed_2_raw.make_audio()

In [None]:
mixer_speed_3_raw.make_audio()

#### Plot the three mixer audio clips

In [None]:
plt.plot(mixer_speed_1_raw.ts,mixer_speed_1_raw.ys)
plt.show()

In [None]:
plt.plot(mixer_speed_2_raw.ts,mixer_speed_2_raw.ys)
plt.show()

In [None]:
plt.plot(mixer_speed_3_raw.ts,mixer_speed_3_raw.ys)
plt.show()

#### Split the audio data to segments of 0.5 seconds

In [None]:
mixer_speed_1_raw_dataset1 = mixer_speed_1_raw.segment(0.0, 0.5)
mixer_speed_1_raw_dataset2 = mixer_speed_1_raw.segment(0.5, 0.5)
mixer_speed_1_raw_dataset3 = mixer_speed_1_raw.segment(1.0, 0.5)
mixer_speed_1_raw_dataset4 = mixer_speed_1_raw.segment(1.5, 0.5)
mixer_speed_1_raw_dataset5 = mixer_speed_1_raw.segment(2.0, 0.5)

In [None]:
mixer_speed_2_raw_dataset1 = mixer_speed_2_raw.segment(0.0, 0.5)
mixer_speed_2_raw_dataset2 = mixer_speed_2_raw.segment(0.5, 0.5)
mixer_speed_2_raw_dataset3 = mixer_speed_2_raw.segment(1.0, 0.5)
mixer_speed_2_raw_dataset4 = mixer_speed_2_raw.segment(1.5, 0.5)
mixer_speed_2_raw_dataset5 = mixer_speed_2_raw.segment(2.0, 0.5)
mixer_speed_2_raw_dataset6 = mixer_speed_2_raw.segment(2.5, 0.5)

In [None]:
mixer_speed_3_raw_dataset1 = mixer_speed_3_raw.segment(0.0, 0.5)
mixer_speed_3_raw_dataset2 = mixer_speed_3_raw.segment(0.5, 0.5)
mixer_speed_3_raw_dataset3 = mixer_speed_3_raw.segment(1.0, 0.5)
mixer_speed_3_raw_dataset4 = mixer_speed_3_raw.segment(1.5, 0.5)

#### Convert the audio segments to spectrum

In [None]:
mixer_speed_1_raw_dataset1_spectrum = mixer_speed_1_raw_dataset1.make_spectrum()
mixer_speed_1_raw_dataset2_spectrum = mixer_speed_1_raw_dataset2.make_spectrum()
mixer_speed_1_raw_dataset3_spectrum = mixer_speed_1_raw_dataset3.make_spectrum()
mixer_speed_1_raw_dataset4_spectrum = mixer_speed_1_raw_dataset4.make_spectrum()
mixer_speed_1_raw_dataset5_spectrum = mixer_speed_1_raw_dataset5.make_spectrum()

In [None]:
mixer_speed_2_raw_dataset1_spectrum = mixer_speed_2_raw_dataset1.make_spectrum()
mixer_speed_2_raw_dataset2_spectrum = mixer_speed_2_raw_dataset2.make_spectrum()
mixer_speed_2_raw_dataset3_spectrum = mixer_speed_2_raw_dataset3.make_spectrum()
mixer_speed_2_raw_dataset4_spectrum = mixer_speed_2_raw_dataset4.make_spectrum()
mixer_speed_2_raw_dataset5_spectrum = mixer_speed_2_raw_dataset5.make_spectrum()
mixer_speed_2_raw_dataset6_spectrum = mixer_speed_2_raw_dataset6.make_spectrum()

In [None]:
mixer_speed_3_raw_dataset1_spectrum = mixer_speed_3_raw_dataset1.make_spectrum()
mixer_speed_3_raw_dataset2_spectrum = mixer_speed_3_raw_dataset2.make_spectrum()
mixer_speed_3_raw_dataset3_spectrum = mixer_speed_3_raw_dataset3.make_spectrum()
mixer_speed_3_raw_dataset4_spectrum = mixer_speed_3_raw_dataset4.make_spectrum()

### Create repeated Data Sets for ML model training and testing

In [None]:
def incrsd(data, sdp):
    data2 = data + np.random.normal(0,np.abs(np.max(data)*sdp), len(data))
    return data2

## Generate Sound Level Truth Table
## level = l
def truthtablegen_level(l1,l2,l3):
    truthtabletemp = np.array([l1, l2, l3])
    return truthtabletemp

In [None]:
sdevmodel = 2.0

In [None]:
mixer_speed_1_rds1_df = [incrsd(mixer_speed_1_raw_dataset1_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_1_rds1_tt = [truthtablegen_level(1,0,0) for _ in range(120)]
mixer_speed_1_rds2_df = [incrsd(mixer_speed_1_raw_dataset2_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_1_rds2_tt = [truthtablegen_level(1,0,0) for _ in range(120)]
mixer_speed_1_rds3_df = [incrsd(mixer_speed_1_raw_dataset3_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_1_rds3_tt = [truthtablegen_level(1,0,0) for _ in range(120)]
mixer_speed_1_rds4_df = [incrsd(mixer_speed_1_raw_dataset4_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_1_rds4_tt = [truthtablegen_level(1,0,0) for _ in range(120)]
mixer_speed_1_rds5_df = [incrsd(mixer_speed_1_raw_dataset5_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_1_rds5_tt = [truthtablegen_level(1,0,0) for _ in range(120)]

In [None]:
mixer_speed_2_rds1_df = [incrsd(mixer_speed_2_raw_dataset1_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_2_rds1_tt = [truthtablegen_level(0,1,0) for _ in range(120)]
mixer_speed_2_rds2_df = [incrsd(mixer_speed_2_raw_dataset2_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_2_rds2_tt = [truthtablegen_level(0,1,0) for _ in range(120)]
mixer_speed_2_rds3_df = [incrsd(mixer_speed_2_raw_dataset3_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_2_rds3_tt = [truthtablegen_level(0,1,0) for _ in range(120)]
mixer_speed_2_rds4_df = [incrsd(mixer_speed_2_raw_dataset4_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_2_rds4_tt = [truthtablegen_level(0,1,0) for _ in range(120)]
mixer_speed_2_rds5_df = [incrsd(mixer_speed_2_raw_dataset5_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_2_rds5_tt = [truthtablegen_level(0,1,0) for _ in range(120)]
mixer_speed_2_rds6_df = [incrsd(mixer_speed_2_raw_dataset6_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_2_rds6_tt = [truthtablegen_level(0,1,0) for _ in range(120)]

In [None]:
mixer_speed_3_rds1_df = [incrsd(mixer_speed_3_raw_dataset1_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_3_rds1_tt = [truthtablegen_level(0,0,1) for _ in range(120)]
mixer_speed_3_rds2_df = [incrsd(mixer_speed_3_raw_dataset2_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_3_rds2_tt = [truthtablegen_level(0,0,1) for _ in range(120)]
mixer_speed_3_rds3_df = [incrsd(mixer_speed_3_raw_dataset3_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_3_rds3_tt = [truthtablegen_level(0,0,1) for _ in range(120)]
mixer_speed_3_rds4_df = [incrsd(mixer_speed_3_raw_dataset4_spectrum.amps, sdevmodel) for _ in range(120)]
mixer_speed_3_rds4_tt = [truthtablegen_level(0,0,1) for _ in range(120)]

In [None]:
np.shape(mixer_speed_1_rds1_df)

In [None]:
np.shape(mixer_speed_1_rds1_tt)

In [None]:
x = np.block([np.transpose(mixer_speed_1_rds1_df), 
              np.transpose(mixer_speed_1_rds2_df),
              np.transpose(mixer_speed_1_rds3_df), 
              np.transpose(mixer_speed_1_rds4_df),
              np.transpose(mixer_speed_1_rds5_df),
              np.transpose(mixer_speed_2_rds1_df),
              np.transpose(mixer_speed_2_rds2_df),
              np.transpose(mixer_speed_2_rds3_df),
              np.transpose(mixer_speed_2_rds4_df),
              np.transpose(mixer_speed_2_rds5_df),
              np.transpose(mixer_speed_2_rds6_df),
              np.transpose(mixer_speed_3_rds1_df),
              np.transpose(mixer_speed_3_rds2_df),
              np.transpose(mixer_speed_3_rds3_df),
              np.transpose(mixer_speed_3_rds4_df)])
y = np.block([np.transpose(mixer_speed_1_rds1_tt), 
              np.transpose(mixer_speed_1_rds2_tt),
              np.transpose(mixer_speed_1_rds3_tt), 
              np.transpose(mixer_speed_1_rds4_tt),
              np.transpose(mixer_speed_1_rds5_tt),
              np.transpose(mixer_speed_2_rds1_tt),
              np.transpose(mixer_speed_2_rds2_tt),
              np.transpose(mixer_speed_2_rds3_tt),
              np.transpose(mixer_speed_2_rds4_tt),
              np.transpose(mixer_speed_2_rds5_tt),
              np.transpose(mixer_speed_2_rds6_tt),
              np.transpose(mixer_speed_3_rds1_tt),
              np.transpose(mixer_speed_3_rds2_tt),
              np.transpose(mixer_speed_3_rds3_tt),
              np.transpose(mixer_speed_3_rds4_tt)])

In [None]:
np.shape(x),np.shape(y)

In [None]:
mixer_df = pd.DataFrame(np.transpose(x))
mixer_tt_df = pd.DataFrame(np.transpose(y))

In [None]:
mixer_df.head()

In [None]:
mixer_tt_df.head()

In [None]:
x_train, x_test, y_train, y_test = train_test_split(mixer_df,mixer_tt_df, test_size = 0.3, random_state=0)

In [None]:
np.shape(x_train), np.shape(y_train)

In [None]:
np.shape(x_test), np.shape(y_test )

In [None]:
classifier_multilabel_xgb = OneVsRestClassifier(XGBClassifier())
classifier_multilabel_xgb.fit(x_train, y_train)

In [None]:
y_pred_xgb = classifier_multilabel_xgb.predict(x_test)

In [None]:
accuracy_xgb = accuracy_score(y_test, y_pred_xgb )
print("Accuracy of XGBoost Classifier: %.2f%%" % (accuracy_xgb * 100.0))