In [None]:

import os
import numpy as np
import time
import joblib
from matplotlib import pyplot as plt
from sklearn import metrics

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input as vgg16_preprocess
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input as resnet50_preprocess
from tensorflow.keras.preprocessing import image

from utils import FEATURES_FOLDER

In [None]:
root_dir = FEATURES_FOLDER + '/'
base_dir = root_dir + 'mel_fe/'
train_dir = base_dir + 'training/'
val_dir = base_dir + 'validation/'
test_dir = base_dir + 'test/'

input_shape = (224, 224)
batch_size=32

cut_level = 'fc2'
net = 'vgg16'
preprocess_input = vgg16_preprocess if net == 'vgg16' else resnet50_preprocess
features_filename = 'features_vgg16_block5_pool.npy'

In [None]:
datagen = ImageDataGenerator(preprocessing_function=preprocess_input, validation_split=0.2)

train_generator = datagen.flow_from_directory(
        base_dir,
        target_size=(input_shape[0], input_shape[1]),
        color_mode= "rgb",
        shuffle=False,
        batch_size=batch_size,
        class_mode='sparse', 
        subset='training')

test_generator = datagen.flow_from_directory(
        base_dir,
        target_size=(input_shape[0], input_shape[1]),
        color_mode= "rgb",
        shuffle=False,
        batch_size=batch_size,
        class_mode='sparse',
        subset='validation')

y_train = train_generator.classes
y_test = test_generator.classes

In [None]:
if net == 'vgg16':
    base_model = VGG16(weights='imagenet')
elif net == 'resnet50':
    base_model = ResNet50(weights='imagenet')

base_model.summary()

In [None]:
base_model = keras.Model(base_model.input, base_model.get_layer(cut_level).output)
base_model.trainable = False
base_model.summary()

In [None]:
def extract_features(model, datagen):
  features = model.predict_generator(datagen)
  features = features.reshape((features.shape[0], -1))
  return features

def pre_process(features, std_scaler, pca_scaler):
  if std_scaler == None:
    std_scaler = StandardScaler()
    std_scaler.fit(features)

  if pca_scaler == None:
    pca_scaler = PCA(0.90)
    pca_scaler.fit(features)

  features = std_scaler.transform(features)
  features = pca_scaler.transform(features)
  return features, std_scaler, pca_scaler

def train(features, labels, model, grid_params):
  t_start = time.time()

  clf = GridSearchCV(model, grid_params, scoring='accuracy', n_jobs=-1, cv=10)
  clf.fit(features, labels)

  time_train = time.time() - t_start
  print("time_{}_train: --- {} seconds ---".format("model", time_train))
  print('best_params: {}'.format(clf.best_params_))
  print('Accuracy: {}'.format(clf.best_score_))
  return clf


def test(clf, features, labels):
  pred = clf.predict(features)

  fig, ax = plt.subplots(figsize=(6, 6))
  disp = metrics.ConfusionMatrixDisplay(metrics.confusion_matrix(labels, pred), display_labels=range(8))
  disp.plot(ax=ax, colorbar=False,cmap='Blues')

  print('{}'.format(metrics.classification_report(labels, pred)))
  print('Accuracy: {}'.format(metrics.accuracy_score(labels, pred)))


In [None]:
features_path = os.path.join(FEATURES_FOLDER, 'deep_features', features_filename)
if os.path.isfile(features_path):
    features = np.load(features_path)
else:
    features = extract_features(base_model, train_generator)
    np.save(features_path, features)

X_train, std_scaler, pca_scaler = pre_process(features, None, None)
X_train.shape

# Linear SVM

In [None]:
svm_params = {'C': [0.01, 0.1, 1, 10]}
clf = train(X_train, y_train, LinearSVC(), svm_params)

In [None]:
X_test, _, _ = pre_process(extract_features(base_model, test_generator), std_scaler, pca_scaler)
print(X_test.shape)
test(clf, X_test, y_test)

# RBF SVM

In [None]:
svm_params = {'C': [0.01, 0.1, 1, 10], 'gamma': [0.01, 0.001, 0.0001]}
clf = train(X_train, y_train, SVC(kernel='rbf'), svm_params)

In [None]:
X_test, _, _ = pre_process(extract_features(base_model, test_generator), std_scaler, pca_scaler)
print(X_test.shape)
test(clf, X_test, y_test)

#joblib.dump([std_scaler, pca_scaler, clf], 'best_resnet50.sav', compress=1)
#joblib.dump([std_scaler, pca_scaler, clf], 'best_vgg16.sav', compress=1)

# NN

In [None]:
nn_params = {
    'hidden_layer_sizes': [(512, 256), (512, 32), (512, )],
    'alpha' : [0.01, 0.03, 0.05],
}
clf = train(X_train, y_train, MLPClassifier(activation='relu', solver='adam', batch_size=32, early_stopping=True), nn_params)

In [None]:
X_test, _, _ = pre_process(extract_features(base_model, test_generator), std_scaler, pca_scaler)
print(X_test.shape)
test(clf, X_test, y_test)