In [0]:
import librosa
import numpy as np
import pandas as pd
from google.colab import drive

In [0]:
drive.mount('/content/drive', force_remount=True)

# Building Dataset

In [0]:
def process(file, label):
  try:
    print("load file {}".format(file))
    # here kaiser_fast is a technique used for faster extraction
    
    X, sample_rate = librosa.load(file, res_type='kaiser_fast') 
    X = librosa.resample(X, sample_rate, 16000)
      
    duration = librosa.get_duration(X, sample_rate)
    stft = np.abs(librosa.stft(X))
    mfccs_40 = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)    
    mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate,n_mels=128,fmax=8000).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    row = pd.Series({'file_path':file,'mfccs_40':mfccs_40, 'chroma':chroma, 'mel':mel, 'contrast':contrast, 'tonnetz':tonnetz, 'duration':duration,'label':label},name=7)
    return row
  
  except Exception as e:
    print("Error encountered while parsing file: ", file)
    
def download_files(path, label, max=150):
  index = 0
  for file in glob.glob(path):
    if index <=max:
      row = process(file, label)
      df.loc[len(df)] = row
      index = index +1
      

In [0]:
#compute
import glob, os
import pandas as pd
import numpy as np

df = pd.DataFrame(columns=['file_path', 'mfccs_40', 'chroma', 'mel','contrast','tonnetz', 'duration','label'])
df = df.fillna(0) # with 0s rather than NaNs

download_files("drive/My Drive/amd-training/answering-machine/*.wav", label="beep")
download_files("drive/My Drive/amd-training/careangel-recordings/*.wav", label="beep")
download_files("drive/My Drive/amd-training/soundbible_beep_sounds/*.mp3", label="beep")

download_files("drive/My Drive/amd-training/speech-recordings/*.wav", label="speech")
download_files("drive/My Drive/amd-training/answeringmachinemessages/*.mp3", label="speech")

df = df.drop_duplicates(subset=['file_path'], keep=False)
df.dropna(inplace=True)



load file drive/My Drive/amd-training/answering-machine/8cc2e697-18ac-490d-b111-c90350708684.wav
load file drive/My Drive/amd-training/answering-machine/e5030474-e71d-42ef-ae19-9de293a60c05.wav
load file drive/My Drive/amd-training/answering-machine/86e364ff-93a6-44af-8118-080024e6b45d.wav
load file drive/My Drive/amd-training/answering-machine/55b654e5-7d9f-4132-bc98-93e576b2d665.wav
load file drive/My Drive/amd-training/answering-machine/77310681-9ccf-4da7-bf55-dda0815dcf04-2.wav
load file drive/My Drive/amd-training/answering-machine/abb95162-9f09-4b35-a890-6f4aa0e2b66b.wav
load file drive/My Drive/amd-training/answering-machine/bccc4780-02f8-11e9-aa3d-ad1a095d8d72.wav
load file drive/My Drive/amd-training/answering-machine/22071349-44d6-448d-8d76-834db8d97475.wav
load file drive/My Drive/amd-training/answering-machine/cbbc6d2a-5bf3-4ca4-9920-a12ad6d75d9e.wav
load file drive/My Drive/amd-training/answering-machine/e54d1f7e-d1c9-4982-80e9-eeb56e714840.wav
load file drive/My Drive/amd



load file drive/My Drive/amd-training/answering-machine/1b1d6385-7a20-4c3b-9759-b216cb358c51 (1).wav
load file drive/My Drive/amd-training/answering-machine/4d668e28-28d9-480a-9075-f0b4069a7020.wav
load file drive/My Drive/amd-training/answering-machine/ec0dfded-d54e-4c96-838a-8c2b64157469-1.wav
load file drive/My Drive/amd-training/answering-machine/ec0dfded-d54e-4c96-838a-8c2b64157469.wav
load file drive/My Drive/amd-training/answering-machine/rec-8587b299448c48989063c49ee5cf9d59-20190503T080106.wav
load file drive/My Drive/amd-training/answering-machine/rec-fa42988185744bb0a675ec0049d122d5-20190503T080614.wav
load file drive/My Drive/amd-training/answering-machine/rec-7f3a67f622bd47ac902dfbf4a3a89377-20190503T125031.wav
load file drive/My Drive/amd-training/answering-machine/rec-0b63afe6e14a4547ac2e5d75fb64d4bc-20190503T131537.wav
load file drive/My Drive/amd-training/answering-machine/rec-39427f912abe42d29d4fb65312c8697e-20190503T125506.wav
load file drive/My Drive/amd-training/car



load file drive/My Drive/amd-training/soundbible_beep_sounds/Electronic_Chime-KevanGC-495939803.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Computer Error Alert-SoundBible.com-783113881.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Eas Beep-SoundBible.com-238025417.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Bleep-SoundBible.com-1927126940.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Beep-SoundBible.com-923660219.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Morse Code-SoundBible.com-810471357.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Beep Ping-SoundBible.com-217088958.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Busy Signal-SoundBible.com-1695161320.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Radio Interruption-SoundBible.com-1434341263.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Truck Air Brakes-SoundB



load file drive/My Drive/amd-training/soundbible_beep_sounds/Censor Beep-SoundBible.com-250233510.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Grocery Scanning-SoundBible.com-1637675042.mp3




load file drive/My Drive/amd-training/soundbible_beep_sounds/Checkout Scanner Beep-SoundBible.com-593325210.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Microwave Alarm-SoundBible.com-382933696.mp3




load file drive/My Drive/amd-training/soundbible_beep_sounds/Beep 2-SoundBible.com-1798581971.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Beep-SoundBible.com-1689177436.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Backing Up-SoundBible.com-788937884.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Horn Honk-SoundBible.com-1634776698.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Strange Beeping-SoundBible.com-2088039238.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Alarm Clock-SoundBible.com-437257341.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Pager Beeps-SoundBible.com-260751720.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Fuzzy Beep-SoundBible.com-1580329899.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Radar Detector Beeps-SoundBible.com-892148482.mp3
load file drive/My Drive/amd-training/soundbible_beep_sounds/Short Beep Tone-Sou

In [0]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 368 entries, 0 to 367
Data columns (total 8 columns):
file_path    368 non-null object
mfccs_40     368 non-null object
chroma       368 non-null object
mel          368 non-null object
contrast     368 non-null object
tonnetz      368 non-null object
duration     368 non-null float64
label        368 non-null object
dtypes: float64(1), object(7)
memory usage: 25.9+ KB


# DF Processing

In [0]:
df.head()

Unnamed: 0,file_path,mfccs_40,chroma,mel,contrast,tonnetz,duration,label
0,drive/My Drive/amd-training/answering-machine/...,"[-379.78920197198744, 109.52652158523611, -63....","[0.27032481878700204, 0.05348226776316049, 0.0...","[0.005562816976167255, 0.0003082504894551795, ...","[17.815662982405097, 9.270500351294086, 14.366...","[0.016557670236403872, -0.044106911332447894, ...",0.227528,beep
1,drive/My Drive/amd-training/answering-machine/...,"[-503.14043618726924, 97.91657901160586, -88.7...","[0.04605066635072472, 0.23692643368296615, 1.0...","[7.806247901451935e-05, 2.3955645604629927e-05...","[17.19046710455086, 12.898032765815296, 13.740...","[0.04201544101511034, 0.027156067648276636, 0....",0.284172,beep
2,drive/My Drive/amd-training/answering-machine/...,"[-493.4307644356875, 99.41443683761632, -85.67...","[0.05874194507054588, 0.23422284828411624, 1.0...","[0.00014522694641017108, 0.0001266829324090013...","[18.060194986066143, 11.580904339789774, 12.10...","[0.05363053816254126, 0.009149971251823444, -0...",0.284989,beep
3,drive/My Drive/amd-training/answering-machine/...,"[-260.5614328799362, 94.07030790222211, -120.4...","[0.2623837806852157, 1.0, 0.2666184448982902, ...","[0.040916334065404784, 0.029089462654893983, 0...","[12.5846937008122, 9.913521267798984, 11.24649...","[-0.011873148568373328, 0.007187880250277481, ...",0.121633,beep
4,drive/My Drive/amd-training/answering-machine/...,"[-432.81462243071866, 123.00607628658472, -66....","[0.23865130126866835, 0.05035802591554152, 0.0...","[6.847455416667602e-05, 0.00011353229587751057...","[12.118921583096693, 10.282943235291603, 13.13...","[0.030742900690135584, -0.03863135972705621, 0...",0.228889,beep


In [0]:
def one_hot_encode(df):
  #convert label seriers to ints
  from sklearn import preprocessing
  le = preprocessing.LabelEncoder()
  df['label'] = le.fit_transform(df['label'].astype(str))

  print(df.label.value_counts())
  le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
  print(le_name_mapping)
  class_names = le.classes_
  return df

def remove_long_samples(df, duration=10):
  df = df.copy()
  df = df.drop(df[ df['duration']>duration].index)
  df.reset_index(drop=True, inplace=True)
  return df
  

In [0]:
df = one_hot_encode(df)

1    196
0    172
Name: label, dtype: int64
{'beep': 0, 'speech': 1}


In [0]:
df = remove_long_samples(df)

In [0]:
df.label.value_counts()

0    168
1    121
Name: label, dtype: int64

In [0]:
import pickle
import datetime

def generateFeaturesLabels(features_list, df):
  total_features_len = np.sum([len(df[feature][0]) for feature in features_list])
  print("total number of features",total_features_len)
  features, labels = np.empty((0,total_features_len)), np.empty(0)
  for index, row in df.iterrows():
    a = []
    for feature in features_list:
      a.append(row[feature])
      
    features = np.vstack([features,np.hstack(a)])
    labels = np.append(labels, row["label"])
  return np.array(features), np.array(labels, dtype = np.int)

def score(model, X_test, y_test):
  print("Score:",model.score(X_test, y_test))

  cross_val_scores = cross_val_score(model, X, y, cv=5, scoring='f1_macro')
  print("cross_val_scores:", cross_val_scores)
  print("Accuracy: %0.2f (+/- %0.2f)" % (cross_val_scores.mean(), cross_val_scores.std() * 2))

  predictions = model.predict(X_test)

  print(metrics.accuracy_score(y_test, predictions))
  print(metrics.classification_report(y_test, predictions))
  print(metrics.confusion_matrix(y_test, predictions))
  print("")


def save_model(model):
  filename = "{}-{}.pkl".format(model.__class__.__name__,datetime.datetime.now().strftime("%Y%m%dT%H%M"))
  pickle.dump(model, open(filename, 'wb'))

  from google.colab import files
  files.download(filename)

## Training With only feature (mfccs_40)

In [0]:
#working model
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import metrics


features = ['mfccs_40']
X, y = generateFeaturesLabels(features, df)

total number of features 40


### Training

## Train Classifiers

In [0]:
# Code source: Gaël Varoquaux
#              Andreas Müller
# Modified for documentation by Jaques Grobler
# License: BSD 3 clause

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

def train_classifiers(X, y):
  trained_classifiers = []
  classifiers = [
      KNeighborsClassifier(3),
      SVC(kernel="linear", C=0.025),
      SVC(gamma=2, C=1),
      GaussianProcessClassifier(1.0 * RBF(1.0)),
      DecisionTreeClassifier(max_depth=5),
      RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
      MLPClassifier(alpha=1),
      AdaBoostClassifier(),
      GaussianNB(),
      QuadraticDiscriminantAnalysis()]

  names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
           "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
           "Naive Bayes", "QDA"]
  i = 0
  for classifier in classifiers:
    print("***"+ names[i] + "*****")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

    fit_model = classifier.fit(X_train, y_train)
    trained_classifiers.append(fit_model)
    
    print("Score:",fit_model.score(X_test, y_test))

    cross_val_scores = cross_val_score(fit_model, X, y, cv=5, scoring='f1_macro')
    print("cross_val_scores:", cross_val_scores)
    print("Accuracy: %0.2f (+/- %0.2f)" % (cross_val_scores.mean(), cross_val_scores.std() * 2))

    predictions = fit_model.predict(X_test)

    print(metrics.accuracy_score(y_test, predictions))
    print(metrics.classification_report(y_test, predictions))
    print(metrics.confusion_matrix(y_test, predictions))
    print("")
    i = i + 1
  return trained_classifiers

In [0]:
X = list(df.mfccs_40.values)
y = df.label.values
classifiers = train_classifiers(X, y)

***Nearest Neighbors*****
Score: 0.9270833333333334
cross_val_scores: [0.89588235 0.96484848 0.87898659 0.82860007 0.75248139]
Accuracy: 0.86 (+/- 0.14)
0.9270833333333334
              precision    recall  f1-score   support

           0       0.98      0.90      0.94        58
           1       0.86      0.97      0.91        38

   micro avg       0.93      0.93      0.93        96
   macro avg       0.92      0.94      0.93        96
weighted avg       0.93      0.93      0.93        96

[[52  6]
 [ 1 37]]

***Linear SVM*****
Score: 0.8645833333333334
cross_val_scores: [0.93171296 0.98233323 0.91251885 0.96401515 0.61449275]
Accuracy: 0.88 (+/- 0.27)
0.8645833333333334
              precision    recall  f1-score   support

           0       0.92      0.84      0.88        58
           1       0.79      0.89      0.84        38

   micro avg       0.86      0.86      0.86        96
   macro avg       0.86      0.87      0.86        96
weighted avg       0.87      0.86      0.87 

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Score: 0.9479166666666666
cross_val_scores: [0.86117647 0.98233323 0.91314765 0.84920635 0.73990874]
Accuracy: 0.87 (+/- 0.16)
0.9479166666666666
              precision    recall  f1-score   support

           0       0.98      0.93      0.96        58
           1       0.90      0.97      0.94        38

   micro avg       0.95      0.95      0.95        96
   macro avg       0.94      0.95      0.95        96
weighted avg       0.95      0.95      0.95        96

[[54  4]
 [ 1 37]]

***Decision Tree*****
Score: 0.90625
cross_val_scores: [0.85748792 0.96446078 0.85939394 0.90949508 0.62975564]
Accuracy: 0.84 (+/- 0.23)
0.90625
              precision    recall  f1-score   support

           0       0.95      0.90      0.92        58
           1       0.85      0.92      0.89        38

   micro avg       0.91      0.91      0.91        96
   macro avg       0.90      0.91      0.90        96
weighted avg       0.91      0.91      0.91        96

[[52  6]
 [ 3 35]]

***Random Fore



Score: 0.9270833333333334




cross_val_scores: [0.89464286 0.98233323 0.91251885 0.96401515 0.82320099]
Accuracy: 0.92 (+/- 0.11)
0.9270833333333334
              precision    recall  f1-score   support

           0       0.98      0.90      0.94        58
           1       0.86      0.97      0.91        38

   micro avg       0.93      0.93      0.93        96
   macro avg       0.92      0.94      0.93        96
weighted avg       0.93      0.93      0.93        96

[[52  6]
 [ 1 37]]

***AdaBoost*****
Score: 0.90625
cross_val_scores: [0.91274771 0.9469997  0.82424242 0.94631083 0.71994759]
Accuracy: 0.87 (+/- 0.17)
0.90625
              precision    recall  f1-score   support

           0       0.95      0.90      0.92        58
           1       0.85      0.92      0.89        38

   micro avg       0.91      0.91      0.91        96
   macro avg       0.90      0.91      0.90        96
weighted avg       0.91      0.91      0.91        96

[[52  6]
 [ 3 35]]

***Naive Bayes*****
Score: 0.9270833333333334

In [0]:
import glob
correct = []
incorrect = []

def predict_from_file_mfccs(wav_file, model, show_results=False):  
  X, sample_rate = librosa.load(wav_file, res_type='kaiser_fast')
  mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
  X = [mfccs]
  
  file_cat = wav_file.split("/")[-2]
  prediction = model.predict(X)
  
  if show_results:
    print(file_cat)
    print(prediction)
    print("\n")
  
  is_correct = False
  if (file_cat == "beeps" and prediction == 0) or (file_cat == "non-beeps" and prediction == 1):
    is_correct = True

  return is_correct

def run_predictions_mfccs(classifiers):
  for classifier in classifiers:
    num_correct = 0

    print("predction using classifier", classifier)
    path = glob.glob("drive/My Drive/amd-training/sample-files/*/*")
    for file in path:
      is_correct = predict_from_file_mfccs(file, classifier)
      num_correct += is_correct

    print("{} correct out of {}".format(num_correct,len(path)) )


### Predictions

In [0]:
run_predictions_mfccs(classifiers)

predction using classifier KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform')
37 correct out of 38
predction using classifier SVC(C=0.025, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
31 correct out of 38
predction using classifier SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=2, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
9 correct out of 38
predction using classifier GaussianProcessClassifier(copy_X_train=True,
             kernel=1**2 * RBF(length_scale=1), max_iter_predict=100,
             multi_class='one_vs_rest', n_jobs=None,
             n_restarts_optimizer=0,

### Test Audio Samples

In [0]:
import IPython.display as ipd
#beep
file = "drive/My Drive/amd-training/sample-files/beeps/rec-c96ee63d346b4c678eb4f57721197398-20190129T160336-beep.wav"
ipd.Audio(file)
print(predict_from_file(file, fit_model))

drive/My Drive/amd-training/sample-files/beeps/rec-c96ee63d346b4c678eb4f57721197398-20190129T160336-beep.wav
[0]


None


In [0]:
import IPython.display as ipd
#beep
file = "drive/My Drive/amd-training/sample-files/non-beeps/rec-c96ee63d346b4c678eb4f57721197398-20190129T160017.wav"
ipd.Audio(file)
print(predict_from_file(file, fit_model))

drive/My Drive/amd-training/sample-files/non-beeps/rec-c96ee63d346b4c678eb4f57721197398-20190129T160017.wav
[0]


None


## Training With mutliple features

In [162]:
features = ['mfccs_40','chroma', 'mel', 'contrast','tonnetz']
X, y = generateFeaturesLabels(features, df)

total number of features 193


In [163]:
multi_feature_classifiers = train_classifiers(X, y)

***Nearest Neighbors*****
Score: 0.8129032258064516
cross_val_scores: [0.93543956 0.85900542 0.76080833 0.87971781 0.8083404 ]
Accuracy: 0.85 (+/- 0.12)
0.8129032258064516
              precision    recall  f1-score   support

           0       0.93      0.68      0.78        77
           1       0.75      0.95      0.84        78

   micro avg       0.81      0.81      0.81       155
   macro avg       0.84      0.81      0.81       155
weighted avg       0.84      0.81      0.81       155

[[52 25]
 [ 4 74]]

***Linear SVM*****
Score: 0.9354838709677419
cross_val_scores: [0.94651189 0.89284086 0.85857995 0.93487395 0.86015038]
Accuracy: 0.90 (+/- 0.07)
0.9354838709677419
              precision    recall  f1-score   support

           0       0.94      0.94      0.94        77
           1       0.94      0.94      0.94        78

   micro avg       0.94      0.94      0.94       155
   macro avg       0.94      0.94      0.94       155
weighted avg       0.94      0.94      0.94 



Score: 0.8903225806451613




cross_val_scores: [0.89318182 0.90372141 0.79096179 0.90209381 0.82697674]
Accuracy: 0.86 (+/- 0.09)
0.8903225806451613
              precision    recall  f1-score   support

           0       0.95      0.82      0.88        77
           1       0.84      0.96      0.90        78

   micro avg       0.89      0.89      0.89       155
   macro avg       0.90      0.89      0.89       155
weighted avg       0.90      0.89      0.89       155

[[63 14]
 [ 3 75]]

***AdaBoost*****
Score: 0.8903225806451613
cross_val_scores: [0.96805257 0.93605442 0.84732645 0.96768215 0.91396855]
Accuracy: 0.93 (+/- 0.09)
0.8903225806451613
              precision    recall  f1-score   support

           0       0.95      0.82      0.88        77
           1       0.84      0.96      0.90        78

   micro avg       0.89      0.89      0.89       155
   macro avg       0.90      0.89      0.89       155
weighted avg       0.90      0.89      0.89       155

[[63 14]
 [ 3 75]]

***Naive Bayes*****
Sco



In [0]:
def predict_multiple_features_from_file(file, model,show_results=False):
  X, sample_rate = librosa.load(file, res_type='kaiser_fast') 
  X = librosa.resample(X, sample_rate, 16000)

  duration = librosa.get_duration(X, sample_rate)
  stft = np.abs(librosa.stft(X))
  mfccs_40 = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
  chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)    
  mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate,n_mels=128,fmax=8000).T,axis=0)
  contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
  tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
  sr=sample_rate).T,axis=0)

  a = [mfccs_40, chroma, mel, contrast, tonnetz]
  
  
  total_len = 0
  for f in a:
    total_len += f.shape[0]
 
  features = np.vstack([np.empty((0,total_len)),np.hstack(a)])
  
  file_cat = file.split("/")[-2]
  prediction = model.predict(features)
  
  if show_results:
    print(file)
    print(prediction)
    
  is_correct = False
  if (file_cat == "beeps" and prediction == 0) or (file_cat == "non-beeps" and prediction == 1):
    is_correct = True

  return is_correct
  
  
def run_multi_features_predictions(classifiers):
  for classifier in classifiers:
    num_correct = 0

    print("predction using classifier", classifier)
    path = glob.glob("drive/My Drive/amd-training/sample-files/*/*")
    for file in path:
      is_correct = predict_multiple_features_from_file(file, classifier)
      num_correct += is_correct

    print("{} correct out of {}".format(num_correct,len(path)) )


In [0]:
run_multi_features_predictions(multi_feature_classifiers)

predction using classifier KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform')
36 correct out of 38
predction using classifier SVC(C=0.025, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
35 correct out of 38
predction using classifier SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=2, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
10 correct out of 38
predction using classifier GaussianProcessClassifier(copy_X_train=True,
             kernel=1**2 * RBF(length_scale=1), max_iter_predict=100,
             multi_class='one_vs_rest', n_jobs=None,
             n_restarts_optimizer=0

### Refactor

Here, we're trying to see if combining the features into one array
TLDR, no difference

In [0]:
def padarray(A, size):
    t = size - len(A)
    return np.pad(A, pad_width=(0, t), mode='constant')

In [0]:
l = 0
features_list = ['mfccs_40', 'chroma', 'mel', 'contrast', 'tonnetz']
for feature in features_list:
  row = df[feature][0]
  z = len(row)
  if z > l:
    l = z    
l

b = []
c = []
for index, row in df.iterrows():
  a = []
  for feature in features_list:
    f = row[feature]
    if len(f) < l:
      a.append(padarray(f, l))
    else:
      a.append(row[feature])
  c.append(row['label'])
    
  b.append(a)

In [0]:
b = np.array(b)
c = np.array(c)
b.shape,c.shape

((279, 5, 128), (279,))

In [0]:
# X = np.array([[ x1, x2 ]])
# print(X.shape)

nsamples, nx, ny = b.shape
B = b.reshape((nsamples,nx*ny))
print(B.shape)

X_train, X_test, y_train, y_test = train_test_split(B, c, test_size=0.33, random_state=42)

# clf1 = RandomForestClassifier(n_estimators=100, max_depth=2,random_state=0)
# clf1.fit(B, c)
# X,y

(279, 640)


In [0]:
clf = RandomForestClassifier(n_estimators=100, max_depth=2,random_state=42)
fit_model = clf.fit(X_train, y_train)
print("Score:",fit_model.score(X_test, y_test))

Score: 0.9247311827956989


In [0]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((186, 640), (93, 640), (186,), (93,))

In [0]:
def predict_from_file(file):
  X, sample_rate = librosa.load(file, res_type='kaiser_fast') 
  duration = librosa.get_duration(X, sample_rate)
  stft = np.abs(librosa.stft(X))
  mfccs_40 = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
  chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)    
  mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate,n_mels=128,fmax=8000).T,axis=0)
  contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
  tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
  sr=sample_rate).T,axis=0)

  l1 = 0
  features = [mfccs_40, chroma, mel, contrast, tonnetz]
  for feature in features:
    z1 = len(feature)
    if z1 > l1:
      l1 = z1    
  l1

  b1 = []
  c1 = []

  a1 = []
  for f in features:
    if len(f) < l:
      a1.append(padarray(f, l))
    else:
      a1.append(f)

  b1.append(a1)
  
  b1 = np.array(b1)
  b1.shape

  nsamples, nx, ny = b1.shape
  b1 = b1.reshape((nsamples,nx*ny))
#   print(b1.shape)
  
  prediction = clf.predict(b1)
  
  file_cat = file.split("/")[-2]
  
  print(file)
  print(prediction)
  if (file_cat == "beeps" and prediction == 0) or (file_cat == "non-beeps" and prediction == 1):
    correct.append(file)
  else:
    incorrect.append(file)
  print("\n")


In [0]:
import glob
index = 0
correct = []
incorrect = []
# loaded_model = pickle.load(open("/content/RandomForestClassifier-20190502T1202.pkl", "rb"))

for file in glob.glob("drive/My Drive/amd-training/sample-files/*/*"):
  predict_from_file(file)
  index = index+1
  
print("correct",len(correct))
print("incorrect",len(incorrect))



drive/My Drive/amd-training/sample-files/beeps/rec-838c2f2ae3ba4e72b5ec1846d42e216f-20190129T125904-beep.wav
[0]


drive/My Drive/amd-training/sample-files/beeps/rec-838c2f2ae3ba4e72b5ec1846d42e216f-20190129T134024-beep.wav
[0]


drive/My Drive/amd-training/sample-files/beeps/rec-838c2f2ae3ba4e72b5ec1846d42e216f-20190129T134058-beep.wav
[0]


drive/My Drive/amd-training/sample-files/beeps/rec-5017b54e7b76476098786a4112d9463b-20190129T134059-beep.wav
[0]


drive/My Drive/amd-training/sample-files/beeps/rec-5017b54e7b76476098786a4112d9463b-20190129T134025-speech-beep.wav
[1]


drive/My Drive/amd-training/sample-files/beeps/rec-c96ee63d346b4c678eb4f57721197398-20190129T160000-beep.wav
[1]


drive/My Drive/amd-training/sample-files/beeps/rec-c96ee63d346b4c678eb4f57721197398-20190129T160336-beep.wav
[1]


drive/My Drive/amd-training/sample-files/beeps/rec-c96ee63d346b4c678eb4f57721197398-20190129T160034-beep.wav
[0]


drive/My Drive/amd-training/sample-files/beeps/41d929b9-59d2-4634-bdea-7f

(1, 640)


In [0]:

save_model(trained_classifiers[-2])

## Audio Augmentation


This section is to expirment on using audio augmentation on dataset
augmentations are found here: https://www.kaggle.com/huseinzol05/sound-augmentation-librosa

In [0]:
def change_speed(wav_file, low=0.1, high=1):
  audio = wav_file.copy()
  # you can change low and high here
  length_change = np.random.uniform(low, high)
  speed_fac = 1.0  / length_change
  print("resample length_change = ",length_change)
  tmp = np.interp(np.arange(0,len(audio),speed_fac),np.arange(0,len(audio)),audio)
  minlen = min(audio.shape[0], tmp.shape[0])
  audio *= 0
  audio[0:minlen] = tmp[0:minlen]
  return audio

def change_pitch(wav_file, sample_rate):
  y_pitch = wav_file.copy()
  bins_per_octave = 12
  pitch_pm = 2
  pitch_change =  pitch_pm * 2*(np.random.uniform())   
  print("pitch_change = ",pitch_change)
  y_pitch = librosa.effects.pitch_shift(y_pitch.astype('float64'), 
                                      sample_rate, n_steps=pitch_change, 
                                      bins_per_octave=bins_per_octave)
  return y_pitch

def add_noise(wav_file, amt=0.005):
  y_noise = wav_file.copy()
  # you can take any distribution from https://docs.scipy.org/doc/numpy-1.13.0/reference/routines.random.html
  noise_amp = amt*np.random.uniform()*np.amax(y_noise)
  y_noise = y_noise.astype('float64') + noise_amp * np.random.normal(size=y_noise.shape[0])
  return y_noise

def stretch(wav_file, amt=1.1):
  input_length = len(wav_file)
  streching = wav_file.copy()
  streching = librosa.effects.time_stretch(streching.astype('float'), amt)
  if len(streching) > input_length:
    streching = streching[:input_length]
  else:
    streching = np.pad(streching, (0, max(0, input_length - len(streching))), "constant")
  return streching

### Audio Sample tests

In [131]:
from IPython.display import Audio

audio, sample_rate = librosa.load("/content/drive/My Drive/amd-training/sample-files/beeps/41d929b9-59d2-4634-bdea-7fd7f4f18b05.wav")
Audio(audio, rate=sample_rate)


In [0]:
Audio(change_speed(audio), rate=sample_rate)

resample length_change =  0.3702613474139048


In [0]:
Audio(change_pitch(audio), rate=sample_rate)

pitch_change =  1.5955860907844683


In [0]:
Audio(stretch(audio, 0.5), rate=sample_rate)

In [137]:
Audio(add_noise(audio, 0.01), rate=sample_rate)

### Train

we'll take the audio samples from google drive, write to the local dir of this notebook, and add augmentation for all samples. We'll have 2 sample datasets, one thats augmented, and one that is the orginal

In [0]:
import shutil
shutil.rmtree('output', ignore_errors=True)

In [0]:
def process(file, label, augment=False):
  try:
    print("load file {}".format(file))
    # here kaiser_fast is a technique used for faster extraction
    
    X, sample_rate = librosa.load(file, res_type='kaiser_fast') 
    X = librosa.resample(X, sample_rate, 16000)
    if augment:
      X = change_speed(X)
      X = change_pitch(X, sample_rate)
      X = stretch(X, float(np.random.uniform(0.5, 1, 1)))
      X = add_noise(X, float(np.random.uniform(0.1, 1, 1)))
      
    return X, 16000
  
  except Exception as e:
    print("Error encountered while parsing file: ", e)
    
def download_files(path, label, augment=False, max=150):
  index = 0
  directory = "output/"+label
  try:
    if not os.path.exists(directory):
      os.makedirs(directory)
  except:
    pass
  
  for file in glob.glob(path):
    if index <=max:
      audio, sample_rate = process(file, label, augment)
      if augment:
        file_name = "_augmented_"+file.split("/")[-1]
      else:
        file_name = file.split("/")[-1]

      librosa.output.write_wav(directory+"/"+file_name, audio, sample_rate)

In [145]:
import glob, os
import pandas as pd
import numpy as np

download_files("drive/My Drive/amd-training/answering-machine/*.wav", label="beep")
download_files("drive/My Drive/amd-training/careangel-recordings/*.wav", label="beep")

download_files("drive/My Drive/amd-training/answering-machine/*.wav", label="beep", augment=True)

download_files("drive/My Drive/amd-training/speech-recordings/*.wav", label="speech")
download_files("drive/My Drive/amd-training/answeringmachinemessages/*.mp3", label="speech")

download_files("drive/My Drive/amd-training/speech-recordings/*.wav", label="speech",augment=True)


load file drive/My Drive/amd-training/answering-machine/8cc2e697-18ac-490d-b111-c90350708684.wav
load file drive/My Drive/amd-training/answering-machine/e5030474-e71d-42ef-ae19-9de293a60c05.wav
load file drive/My Drive/amd-training/answering-machine/86e364ff-93a6-44af-8118-080024e6b45d.wav
load file drive/My Drive/amd-training/answering-machine/55b654e5-7d9f-4132-bc98-93e576b2d665.wav
load file drive/My Drive/amd-training/answering-machine/77310681-9ccf-4da7-bf55-dda0815dcf04-2.wav
load file drive/My Drive/amd-training/answering-machine/abb95162-9f09-4b35-a890-6f4aa0e2b66b.wav
load file drive/My Drive/amd-training/answering-machine/bccc4780-02f8-11e9-aa3d-ad1a095d8d72.wav
load file drive/My Drive/amd-training/answering-machine/22071349-44d6-448d-8d76-834db8d97475.wav
load file drive/My Drive/amd-training/answering-machine/cbbc6d2a-5bf3-4ca4-9920-a12ad6d75d9e.wav
load file drive/My Drive/amd-training/answering-machine/e54d1f7e-d1c9-4982-80e9-eeb56e714840.wav
load file drive/My Drive/amd

### Verify sample augmentation

In [0]:
#grab a random sample to verify
aug_file = "output/beep/_augmented_07a3d677-0fdd-4155-a804-37679c039a8e.wav"
norm_file = "output/beep/07a3d677-0fdd-4155-a804-37679c039a8e.wav"


In [147]:
audio, sample_rate = librosa.load(norm_file)
Audio(audio, rate=sample_rate)

In [148]:
audio, sample_rate = librosa.load(aug_file)
Audio(audio, rate=sample_rate)

### Test

In [169]:
#build dataframe from files in "output" directory
def process(file, label):
  try:
    print("load file {}".format(file))
    # here kaiser_fast is a technique used for faster extraction
    
    X, sample_rate = librosa.load(file, res_type='kaiser_fast') 
      
    duration = librosa.get_duration(X, sample_rate)
    stft = np.abs(librosa.stft(X))
    mfccs_40 = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)    
    mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate,n_mels=128,fmax=8000).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    is_augmented = "_augmented_" in file
    row = pd.Series({'file_path':file,'mfccs_40':mfccs_40, 'chroma':chroma, 'mel':mel, 'contrast':contrast, 'tonnetz':tonnetz, 'duration':duration,'label':label, 'augmented': is_augmented},name=8)
    return row
  
  except Exception as e:
    print("Error encountered while parsing file: ", file)
    
    
df = pd.DataFrame(columns=['file_path', 'mfccs_40', 'chroma', 'mel','contrast','tonnetz', 'duration','label', 'augmented'])
df = df.fillna(0) # with 0s rather than NaNs

for file in glob.glob("output/beep/*.wav"):
  row = process(file,"beep")
  df.loc[len(df)] = row

for file in glob.glob("output/speech/*.wav"):
  row = process(file,"speech")
  df.loc[len(df)] = row


      

load file output/beep/rec-92ec261b62d843dca160f9a25bdb2db6-20190123T183536.wav
load file output/beep/6d0c8d00-0202-11e9-bb68-51880c8718e4.wav
load file output/beep/rec-374ab07de763469082229087a55b26c0-20190123T185223.wav
load file output/beep/rec-76f5ef4057dc4642807d4289c3944fde-20190123T123507.wav
load file output/beep/8917c4b3-51fa-4456-9384-65678f6e19f9.wav
load file output/beep/ec0dfded-d54e-4c96-838a-8c2b64157469.wav
load file output/beep/_augmented_8c625802-daa5-4a93-8f9e-337fce1903de.wav
load file output/beep/rec-d094529f933a45a69dd85f3dcc4b7d22-20190123T184559.wav
load file output/beep/_augmented_7eaeb600-0202-11e9-bb68-51880c8718e4.wav
load file output/beep/_augmented_8aa7d5ff-b936-49c2-a1d5-bcfa73587bcd-old1.wav
load file output/beep/_augmented_a210698c-5e60-48bd-9ba3-c00981b0427a.wav
load file output/beep/rec-92ec261b62d843dca160f9a25bdb2db6-20190123T184634.wav
load file output/beep/_augmented_c2823a7a-87f9-4689-9b53-d37f76964243.wav
load file output/beep/rec-fa42988185744bb

In [170]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 467 entries, 0 to 466
Data columns (total 9 columns):
file_path    467 non-null object
mfccs_40     467 non-null object
chroma       467 non-null object
mel          467 non-null object
contrast     467 non-null object
tonnetz      467 non-null object
duration     467 non-null float64
label        467 non-null object
augmented    467 non-null object
dtypes: float64(1), object(8)
memory usage: 36.5+ KB


In [171]:
#convert label seriers to ints
df = one_hot_encode(df)

1    242
0    225
Name: label, dtype: int64
{'beep': 0, 'speech': 1}


### Train using mfccs_40

In [164]:
#working model
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import metrics


features = ['mfccs_40']
X, y = generateFeaturesLabels(features)

total number of features 40


In [165]:
classifiers = train_classifiers(X, y)

***Nearest Neighbors*****
Score: 0.8258064516129032
cross_val_scores: [0.95727273 0.87022549 0.77751196 0.89145658 0.79802124]
Accuracy: 0.86 (+/- 0.13)
0.8258064516129032
              precision    recall  f1-score   support

           0       0.88      0.75      0.81        77
           1       0.79      0.90      0.84        78

   micro avg       0.83      0.83      0.83       155
   macro avg       0.83      0.83      0.82       155
weighted avg       0.83      0.83      0.82       155

[[58 19]
 [ 8 70]]

***Linear SVM*****
Score: 0.8451612903225807
cross_val_scores: [0.83731395 0.81814043 0.81009009 0.82551595 0.82633053]
Accuracy: 0.82 (+/- 0.02)
0.8451612903225807
              precision    recall  f1-score   support

           0       0.91      0.77      0.83        77
           1       0.80      0.92      0.86        78

   micro avg       0.85      0.85      0.85       155
   macro avg       0.85      0.84      0.84       155
weighted avg       0.85      0.85      0.84 



Score: 0.8580645161290322




cross_val_scores: [0.92484295 0.88232616 0.81162874 0.90282132 0.88150122]
Accuracy: 0.88 (+/- 0.08)
0.8580645161290322
              precision    recall  f1-score   support

           0       0.89      0.82      0.85        77
           1       0.83      0.90      0.86        78

   micro avg       0.86      0.86      0.86       155
   macro avg       0.86      0.86      0.86       155
weighted avg       0.86      0.86      0.86       155

[[63 14]
 [ 8 70]]

***AdaBoost*****
Score: 0.8580645161290322
cross_val_scores: [0.89361702 0.88285941 0.8245283  0.93541667 0.88150122]
Accuracy: 0.88 (+/- 0.07)
0.8580645161290322
              precision    recall  f1-score   support

           0       0.91      0.79      0.85        77
           1       0.82      0.92      0.87        78

   micro avg       0.86      0.86      0.86       155
   macro avg       0.86      0.86      0.86       155
weighted avg       0.86      0.86      0.86       155

[[61 16]
 [ 6 72]]

***Naive Bayes*****
Sco

### Prediction using mfccs_40

In [166]:
run_predictions_mfccs(classifiers)

predction using classifier KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform')
36 correct out of 38
predction using classifier SVC(C=0.025, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
34 correct out of 38
predction using classifier SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=2, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
33 correct out of 38
predction using classifier GaussianProcessClassifier(copy_X_train=True,
             kernel=1**2 * RBF(length_scale=1), max_iter_predict=100,
             multi_class='one_vs_rest', n_jobs=None,
             n_restarts_optimizer=0

In [0]:
save_model(classifiers[2])

### Train using multiple features

In [174]:
features = ['mfccs_40','chroma', 'mel', 'contrast','tonnetz']
X, y = generateFeaturesLabels(features, df)

total number of features 193


In [175]:
multi_feature_classifiers = train_classifiers(X, y)

***Nearest Neighbors*****
Score: 0.8129032258064516
cross_val_scores: [0.93543956 0.85900542 0.76080833 0.87971781 0.8083404 ]
Accuracy: 0.85 (+/- 0.12)
0.8129032258064516
              precision    recall  f1-score   support

           0       0.93      0.68      0.78        77
           1       0.75      0.95      0.84        78

   micro avg       0.81      0.81      0.81       155
   macro avg       0.84      0.81      0.81       155
weighted avg       0.84      0.81      0.81       155

[[52 25]
 [ 4 74]]

***Linear SVM*****
Score: 0.9354838709677419
cross_val_scores: [0.94651189 0.89284086 0.85857995 0.93487395 0.86015038]
Accuracy: 0.90 (+/- 0.07)
0.9354838709677419
              precision    recall  f1-score   support

           0       0.94      0.94      0.94        77
           1       0.94      0.94      0.94        78

   micro avg       0.94      0.94      0.94       155
   macro avg       0.94      0.94      0.94       155
weighted avg       0.94      0.94      0.94 



Score: 0.9032258064516129




cross_val_scores: [0.92484295 0.82696733 0.83379006 0.89094747 0.88150122]
Accuracy: 0.87 (+/- 0.07)
0.9032258064516129
              precision    recall  f1-score   support

           0       0.92      0.88      0.90        77
           1       0.89      0.92      0.91        78

   micro avg       0.90      0.90      0.90       155
   macro avg       0.90      0.90      0.90       155
weighted avg       0.90      0.90      0.90       155

[[68  9]
 [ 6 72]]

***AdaBoost*****
Score: 0.8903225806451613
cross_val_scores: [0.96805257 0.93605442 0.84732645 0.96768215 0.91396855]
Accuracy: 0.93 (+/- 0.09)
0.8903225806451613
              precision    recall  f1-score   support

           0       0.95      0.82      0.88        77
           1       0.84      0.96      0.90        78

   micro avg       0.89      0.89      0.89       155
   macro avg       0.90      0.89      0.89       155
weighted avg       0.90      0.89      0.89       155

[[63 14]
 [ 3 75]]

***Naive Bayes*****
Sco



### Test using Mutliple Features

In [176]:
run_multi_features_predictions(multi_feature_classifiers)

predction using classifier KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform')
36 correct out of 38
predction using classifier SVC(C=0.025, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
19 correct out of 38
predction using classifier SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=2, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
29 correct out of 38
predction using classifier GaussianProcessClassifier(copy_X_train=True,
             kernel=1**2 * RBF(length_scale=1), max_iter_predict=100,
             multi_class='one_vs_rest', n_jobs=None,
             n_restarts_optimizer=0

In [0]:
save_model(multi_feature_classifiers[0])

### Tests with only using augmented files in training set

In [0]:
t = df[df['augmented'] == True];

In [0]:
#convert label seriers to ints
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
df['label'] = le.fit_transform(df['label'].astype(str))

In [0]:
print(df.label.value_counts())
le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(le_name_mapping)
class_names = le.classes_

1    242
0    225
Name: label, dtype: int64
{'beep': 0, 'speech': 1}


In [0]:

augmented_sort = df.sort_values(by=['augmented'], ascending=False)
augmented_sort[:len(t)]
train = augmented_sort[:len(t)]
test = augmented_sort[len(t):]

In [0]:
X_train = list(train['mfccs_40'].values)
y_train = train['label'].values

X_test = list(test['mfccs_40'].values)
y_test = test['label'].values

# X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [0]:
# Code source: Gaël Varoquaux
#              Andreas Müller
# Modified for documentation by Jaques Grobler
# License: BSD 3 clause

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

trained_classifiers = []
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=10, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes", "QDA"]
i = 0
for classifier in classifiers:
  print("***"+ names[i] + "*****")
  
  fit_model = classifier.fit(X_train, y_train)
  trained_classifiers.append(fit_model)

  print("Score:",fit_model.score(X_test, y_test))

  cross_val_scores = cross_val_score(fit_model, X, y, cv=5, scoring='f1_macro')
  print("cross_val_scores:", cross_val_scores)
  print("Accuracy: %0.2f (+/- %0.2f)" % (cross_val_scores.mean(), cross_val_scores.std() * 2))

  predictions = fit_model.predict(X_test)

  print(metrics.accuracy_score(y_test, predictions))
  print(metrics.classification_report(y_test, predictions))
  print(metrics.confusion_matrix(y_test, predictions))
  print("")
  i = i + 1
