In [None]:
COLAB = True

if COLAB: 
  from google.colab import drive
  drive.mount('/content/drive/')
  !unzip src.zip
  !mkdir data && mkdir data/models
  !pip install audiomentations
  speech_path, save_path = "drive/MyDrive/audio-pattern", "drive/MyDrive/audio-pattern"
else:
  speech_path, save_path = "data/raw/speech", "data/processed"

In [2]:
from src.data_processing import FeatureExtractor
from src.dataset import Dataset
from src.loader import Loader, Augmenter
from src.models import build_train_simple, train_nn, build_nn, build_cnn, build_lstm

import seaborn as sns
sns.set_theme()

import numpy as np
from numpy.random import seed
seed(1)

import tensorflow
tensorflow.random.set_seed(1)

import warnings  
warnings.filterwarnings("ignore")

In [3]:
loader = Loader(speech_path, save_path, True, 0, -1)
orig_data, orig_labels = loader.load(False)

In [4]:
# original features
orig_features, orig_labels = FeatureExtractor(raw_data=orig_data, labels=orig_labels, 
                                              save_path=f"{save_path}/features", 
                                              file_name="full", verbose=True, 
                                              only_mfcc=False).get_training_data(overwrite=False)

orig_features_mfcc, orig_labels_mfcc = FeatureExtractor(raw_data=orig_data, labels=orig_labels, 
                                                        save_path=f"{save_path}/features", 
                                                        file_name="mfcc", verbose=True, 
                                                        only_mfcc=True).get_training_data(overwrite=False)
orig_features_mfcc = np.expand_dims(orig_features_mfcc, axis=2)


augmented_data_full = []
augmented_data_mfcc = []

for name in ["speed", "noise", "pitch"]:
    full_data, full_labels = FeatureExtractor(raw_data=None, labels=orig_labels, 
                              save_path=f"{save_path}/features", 
                              file_name=f"full_augmented_{name}", verbose=True, 
                              only_mfcc=False).get_training_data(overwrite=False)
    augmented_data_full.append((full_data, full_labels))
    
    # extract mfcc for cnn
    mfcc_data, mfcc_labels = FeatureExtractor(raw_data=None, labels=orig_labels_mfcc, 
                              save_path=f"{save_path}/features", 
                              file_name=f"mfcc_augmented_{name}", verbose=True, 
                              only_mfcc=True).get_training_data(overwrite=False)
    mfcc_data = np.expand_dims(mfcc_data, axis=2)
    augmented_data_mfcc.append((mfcc_data, mfcc_labels))

Filename: full found on disk

Filename: mfcc found on disk

Filename: full_augmented_speed found on disk

Filename: mfcc_augmented_speed found on disk

Filename: full_augmented_noise found on disk

Filename: mfcc_augmented_noise found on disk

Filename: full_augmented_pitch found on disk

Filename: mfcc_augmented_pitch found on disk



In [5]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.tree import DecisionTreeClassifier as Dtree
from tqdm import tqdm 

d = Dataset(original_data=(orig_features, orig_labels), augmented_data=None)
generator = d.get_cross_val_generator(5)

scores = [[], [], []]
for (X_train, y_train), (X_test, y_test) in tqdm(generator):
    scores[0].append(build_train_simple((X_train, y_train), (X_test, y_test), SVC()))
    scores[1].append(build_train_simple((X_train, y_train), (X_test, y_test), KNN(int(len(X_train)**0.5))))
    scores[2].append(build_train_simple((X_train, y_train), (X_test, y_test), Dtree(max_depth=10)))
    
scores = [(np.mean(x), np.std(x)) for x in scores]
    
d = Dataset(original_data=(orig_features, orig_labels), augmented_data=augmented_data_full)
generator = d.get_cross_val_generator(5)

scores_aug = [[], [], []]
for (X_train, y_train), (X_test, y_test) in tqdm(generator):
    scores_aug[0].append(build_train_simple((X_train, y_train), (X_test, y_test), SVC()))
    scores_aug[1].append(build_train_simple((X_train, y_train), (X_test, y_test), KNN(int(len(X_train)**0.5))))
    scores_aug[2].append(build_train_simple((X_train, y_train), (X_test, y_test), Dtree(max_depth=10)))
    
scores_aug = [(np.mean(x), np.std(x)) for x in scores_aug]

100%|██████████| 5/5 [00:04<00:00,  1.05it/s]
100%|██████████| 5/5 [00:26<00:00,  5.29s/it]


In [6]:
print("Simple models cross validation scores without augmentation:\n")
print("\n".join([f"\t- {a:5} : {b:0.3f} (std: {c:0.3f})" for a, (b, c) in zip(["Svc", "Knn", "Dtree"], scores)]), end="\n\n")

print("Simple models cross validation scores with augmentation:\n")
print("\n".join([f"\t- {a:5} : {b:0.3f} (std: {c:0.3f})" for a, (b, c) in zip(["Svc", "Knn", "Dtree"], scores_aug)]))

Simple models cross validation scores without augmentation:

	- Svc   : 0.588 (std: 0.035)
	- Knn   : 0.441 (std: 0.023)
	- Dtree : 0.403 (std: 0.036)

Simple models cross validation scores with augmentation:

	- Svc   : 0.643 (std: 0.015)
	- Knn   : 0.422 (std: 0.016)
	- Dtree : 0.421 (std: 0.041)


In [7]:
scores = [[], []]
scores_aug = [[], []]

d = Dataset(original_data=(orig_features, orig_labels), augmented_data=None)
generator = d.get_cross_val_generator(5)
for (X_train, y_train), (X_test, y_test) in tqdm(generator):
    scores[0].append(train_nn((X_train, y_train), (X_test, y_test), build_nn, epochs=1500, verbose=0, plot=False)[1])

d = Dataset(original_data=(orig_features, orig_labels), augmented_data=augmented_data_full)
generator = d.get_cross_val_generator(5)
for (X_train, y_train), (X_test, y_test) in tqdm(generator):
    scores_aug[0].append(train_nn((X_train, y_train), (X_test, y_test), build_nn, epochs=1500, verbose=0, plot=False)[1])

100%|██████████| 5/5 [16:12<00:00, 194.50s/it]
100%|██████████| 5/5 [51:24<00:00, 616.92s/it]


In [9]:
scores[1] = []
scores_aug[1] = []

d = Dataset(original_data=(orig_features_mfcc, orig_labels), augmented_data=None)
generator = d.get_cross_val_generator(5)
for (X_train, y_train), (X_test, y_test) in tqdm(generator):
    scores[1].append(train_nn((X_train, y_train), (X_test, y_test), build_cnn, epochs=1500, verbose=0, plot=False)[1])
    
d = Dataset(original_data=(orig_features_mfcc, orig_labels), augmented_data=augmented_data_mfcc)
generator = d.get_cross_val_generator(5)
for (X_train, y_train), (X_test, y_test) in tqdm(generator):
    scores_aug[1].append(train_nn((X_train, y_train), (X_test, y_test), build_cnn, epochs=1500, verbose=0, plot=False)[1])

100%|██████████| 5/5 [19:53<00:00, 238.63s/it]
100%|██████████| 5/5 [1:06:34<00:00, 798.91s/it]


In [10]:
scores = [(np.mean(x), np.std(x)) for x in scores]
scores_aug = [(np.mean(x), np.std(x)) for x in scores_aug]

print("Neural networks cross validation scores without data augmentation:\n")
print("\n".join([f"\t- {a:5} : {b:0.3f} (std: {c:0.3f})" for a, (b, c) in zip(["MLP", "CNN"], scores)]), end = "\n\n")
print("Neural networks cross validation scores with data augmentation:\n")
print("\n".join([f"\t- {a:5} : {b:0.3f} (std: {c:0.3f})" for a, (b, c) in zip(["MLP", "CNN"], scores_aug)]))

Neural networks cross validation scores without data augmentation:

	- MLP   : 0.611 (std: 0.031)
	- CNN   : 0.490 (std: 0.058)

Neural networks cross validation scores with data augmentation:

	- MLP   : 0.692 (std: 0.012)
	- CNN   : 0.698 (std: 0.030)
