In [15]:
import numpy             as np
import pandas            as pd
import tensorflow        as tf
import matplotlib.pyplot as plt

from sklearn                 import svm
from sklearn.metrics         import plot_confusion_matrix, ConfusionMatrixDisplay, top_k_accuracy_score

In [17]:
%run preprocess_data.ipynb

## SVM (linear, rbf, poly)

In [None]:
svm_classifier = svm.SVC(kernel='linear', probability=True, C=1, decision_function_shape='ovo').fit(X_train_normal, y_train)
y_preds = svm_classifier.predict_proba(X_test_normal)
print(f'linear score (accuracy)\t: {svm_classifier.score(X_test_normal, y_test)}')
print(f'\tlinear score (top-2)\t: {top_k_accuracy_score(y_test, y_preds, k=2, labels=labels)}')

svm_classifier = svm.SVC(kernel='rbf',    probability=True, C=1, decision_function_shape='ovo').fit(X_train_normal, y_train)
y_preds = svm_classifier.predict_proba(X_test_normal)
print(f'rbf score (accuracy)\t: {svm_classifier.score(X_test_normal, y_test)}')
print(f'\tlinear score (top-2)\t: {top_k_accuracy_score(y_test, y_preds, k=2, labels=labels)}')

svm_classifier = svm.SVC(kernel='poly',   probability=True, C=c, decision_function_shape='ovo').fit(X_train_normal, y_train)
y_preds = svm_classifier.predict_proba(X_test_normal)
print(f'poly score (accuracy)\t: {svm_classifier.score(X_test_normal, y_test)}')
print(f'\tlinear score (top-2)\t: {top_k_accuracy_score(y_test, y_preds, k=2, labels=labels)}')

## Neural Network

In [None]:
tf.random.set_seed(11235)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(50, input_dim=X_train_normal.shape[1], activation='relu', kernel_initializer='he_uniform'),
    tf.keras.layers.Dense(30, activation='relu', kernel_initializer='he_uniform'),
    tf.keras.layers.Dense(20, activation='relu', kernel_initializer='he_uniform'),
    tf.keras.layers.Dense(10, activation='relu', kernel_initializer='he_uniform'),
    tf.keras.layers.Dense(len(np.unique(y)), activation='softmax', kernel_initializer='he_uniform')
])

model_metrics = ['sparse_categorical_accuracy', 
                 tf.metrics.SparseTopKCategoricalAccuracy(k=2)]
                
model.compile(loss='sparse_categorical_crossentropy', \
              #optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), \
              optimizer='sgd', \
              metrics=model_metrics, \
              steps_per_execution=50)

history = model.fit(X_train_normal, y_train, epochs=200, batch_size=256, verbose=2)

In [None]:
print(history.history['sparse_categorical_accuracy'][-1], '\t', \
      history.history['sparse_top_k_categorical_accuracy'][-1])

plt.rcParams.update({'figure.autolayout': True})
fig, axs = plt.subplots(2)
plt.xlabel('epoch')

axs[0].plot(history.history['sparse_top_k_categorical_accuracy'][3:])
axs[0].set_title("top-2 accuracy")

axs[1].plot(history.history['loss'][3:])
axs[1].set_title("\nloss")
plt.show()

In [None]:
class estimator:
  _estimator_type = ''
  classes_=[]
  def __init__(self, model, classes):
    self.model = model
    self._estimator_type = 'classifier'
    self.classes_ = classes
  def predict(self, X):
    y_prob= self.model.predict(X)
    y_pred = y_prob.argmax(axis=1)
    return y_pred

classifier = estimator(model, list(all_data_df['label'].unique()))

figsize = (12,12)
tmp = ConfusionMatrixDisplay.from_estimator(estimator=classifier, 
                                            X=X_test_normal, 
                                            y=y_test, 
                                            cmap='Blues', 
                                            normalize='true', 
                                            ax=plt.subplots(figsize=figsize)[1])