# Assignment 02 Frequency based audio analysis
### Marta Brasola 905305


In [2]:
*# Import packages
import os
import numpy as np
from time import time
from scipy.io import wavfile as wav

import matplotlib.pyplot as plt
import IPython.display as ipd # Notebook only

# Classification tools
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

# File management
from google.colab import drive
import tarfile
from shutil import copyfile

# Frequency analysis
from scipy.fft import fft, fftfreq, rfft, rfftfreq
from scipy.signal import spectrogram
from librosa.feature import melspectrogram, mfcc
# from librosa.display import specsho

Implement three distinct feature extractors based respectively on Spectrogram, Mel Spectrogram, and MFCC. Consider a single-channel audio signal, and start from the commands shown during the laboratory.

Rules:

1. Show the effects on classification performance for the dataset "free-spoken-digit-dataset" (recordings.tar).
2. No feature combination is required for this assignment.
3. You can (and should!) use Librosa for the audio description.

In [None]:
drive.mount('/content/gdrive')
tar = tarfile.open('gdrive/MyDrive/Colab Notebooks/Digital Signal/Datasets/recordings.tar')
tar.extractall()
tar.close()

In [None]:
sound_rate, sound_data = wav.read('recordings/0_jackson_0.wav')
print(sound_rate)
print(sound_data.shape)

In [None]:
ipd.Audio(sound_data, rate=sound_rate)

In [None]:
sound_time = np.arange(sound_data.shape[0])*1.0/sound_rate
plt.plot(sound_time, sound_data)

In [None]:
# Placecholder for feature extractor
def identity(input):
    return input

# Data loader
def load_data(feature_extractor=identity, normalize=False):

    labels = []
    features = []

    for f in sorted(os.listdir('./recordings')):
        if f.endswith('.wav'):
            # Load file and compute the requested features
            _, signal = wav.read('./recordings/' + f)
            cur_features = feature_extractor(signal)
            features.append(cur_features)

            # Classes
            label = f.split('_')[0]
            labels.append(label)

    # X: features, y: labels
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.1, random_state=1)

    if normalize:
        eps = 0.001
        X_train = np.array(X_train)
        X_train_mean = X_train.mean(axis=0)
        X_train_std = X_train.std(axis=0)
        X_train = (X_train - X_train_mean + eps)/(X_train_std + eps)
        X_train = [row for row in X_train]

        X_test = [row for row in (np.array(X_test) - X_train_mean + eps)/(X_train_std + eps)]

    return X_train, X_test, y_train, y_test

## SVM with Spectogram

### Feature extractor

In [None]:
def feats_spectrogram(input, rate=8000, tsize=10):
  _, _, spec = spectrogram(input, fs=rate)
  output = spec[:, 0:min(spec.shape[1], tsize)]
  output = np.pad(output, ((0, 0), (0, tsize-output.shape[1])))
  output_flatten = output.flatten()
  return output_flatten

In [None]:
X_train, X_test, y_train, y_test = load_data(feature_extractor=feats_spectrogram, normalize=True)

In [None]:
# Parameters to be tested in cross-validation
param_grid = {'C': [100, 500, 1000],
          'gamma': [0.005, 0.01, 0.1, 0.5, 1.0], }

# Support Vector Machine initialization
clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid, cv=2)

# Training
t0 = time()
clf = clf.fit(X_train, y_train)
print('Training completed in %0.3fs' % (time() - t0))

In [None]:
# Result of the cross validation for parameters selection
print('Best parameters combination:')
print(' C: '+str(clf.best_estimator_.C))
print(' gamma: '+str(clf.best_estimator_.gamma))

In [None]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
# Confusion matrix
print('Confusion matrix:')
cm = confusion_matrix(y_test, y_pred)
plt.imshow(cm, cmap=plt.cm.Blues);
plt.xlabel('Ground truth');
plt.ylabel('Prediction');

## SVM with Mel Spectogram

### Feature extractor

In [None]:
def feats_mel(input, rate=8000, tsize=10):
  mel = melspectrogram(y=input*1.0, sr=rate)
  output = mel[:, 0:min(mel.shape[1], tsize)]
  output = np.pad(output, ((0, 0), (0, tsize-output.shape[1])))
  output_flatten = output.flatten()
  return output_flatten

In [None]:
X_train, X_test, y_train, y_test = load_data(feature_extractor=feats_mel, normalize=True);

In [None]:
# Parameters to be tested in cross-validation
param_grid = {'C': [100, 500, 1000],
          'gamma': [0.005, 0.01, 0.1, 0.5, 1.0], }

# Support Vector Machine initialization
clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid, cv=2)

# Training
t0 = time()
clf = clf.fit(X_train, y_train)
print('Training completed in %0.3fs' % (time() - t0))

In [None]:
# Result of the cross validation for parameters selection
print('Best parameters combination:')
print(' C: '+str(clf.best_estimator_.C))
print(' gamma: '+str(clf.best_estimator_.gamma))

In [None]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
# Confusion matrix
print('Confusion matrix:')
cm = confusion_matrix(y_test, y_pred)
plt.imshow(cm, cmap=plt.cm.Blues);
plt.xlabel('Ground truth');
plt.ylabel('Prediction');

## SVM with MFCC

### Feature extractor

In [None]:
def feats_mfcc(input, rate=8000, tsize=10):
  mfccs = mfcc(input*1.0, sr=rate)
  output = mfccs[:, 0:min(mfccs.shape[1], tsize)]
  output = np.pad(output, ((0, 0), (0, tsize-output.shape[1])))
  output_flatten = output.flatten()
  return output_flatten

In [None]:
X_train, X_test, y_train, y_test = load_data(feature_extractor=feats_mfcc, normalize=True);

In [None]:
# Parameters to be tested in cross-validation
param_grid = {'C': [100, 500, 1000],
          'gamma': [0.005, 0.01, 0.1, 0.5, 1.0], }

# Support Vector Machine initialization
clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid, cv=2)

# Training
t0 = time()
clf = clf.fit(X_train, y_train)
print('Training completed in %0.3fs' % (time() - t0))

In [None]:
# Result of the cross validation for parameters selection
print('Best parameters combination:')
print(' C: '+str(clf.best_estimator_.C))
print(' gamma: '+str(clf.best_estimator_.gamma))

In [None]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
# Confusion matrix
print('Confusion matrix:')
cm = confusion_matrix(y_test, y_pred)
plt.imshow(cm, cmap=plt.cm.Blues);
plt.xlabel('Ground truth');
plt.ylabel('Prediction');