# Confusion Matrix

In [None]:

def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
        https://stackoverflow.com/a/50386871
    """

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

# Data Pickling

In [None]:
#Import statements
import pickle

In [None]:
with open('../../data/processed/full_set.pkl', 'rb') as f:
    full_set = pickle.load(f)
with open('../../data/processed/train_set.pkl', 'rb') as f:
    train_set = pickle.load(f)
with open('../../data/processed/test_set.pkl', 'rb') as f:
    test_set = pickle.load(f)
with open('../../data/processed/train_set_30.pkl', 'rb') as f:
    train_set_30 = pickle.load(f)
with open('../../data/processed/test_set_30.pkl', 'rb') as f:
    test_set_30 = pickle.load(f)
with open('../../data/processed/train_set_70.pkl', 'rb') as f:
    train_set_70 = pickle.load(f)
with open('../../data/processed/test_set_70.pkl', 'rb') as f:
    test_set_70 = pickle.load(f)

In [None]:
test_set_70.head()
train_set.head()

In [None]:
# Split into X, y format 
X_train = train_set.T.iloc[:-1].T
y_train = train_set.T.iloc[-1].T

X_test = test_set.T.iloc[:-1].T
y_test = test_set.T.iloc[-1].T

In [None]:
y_train.head()

# Linear Classifier

In [None]:
# I'm a little suspicious about how complicated the linear kernel is - possibly more than we need.
# A multilayer perceptrol with 0 layers is also a linear classifier if we need it.
from sklearn.svm import SVC
linclf = SVC(kernel="linear")
linclf.fit(X_train, y_train) 
linclf.score(X_test, y_test)

# Multilayer Perceptron

### Experiment with various Neural Network parameters: add or remove nodes, layers and connections, vary the learning rate, epochs and momentum, and validation threshold.

In [None]:
from sklearn.neural_network import MLPClassifier
# https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html
mlp = MLPClassifier(random_state=42
                    # more non default parameters? E.g. only stochastic gradient descent has been covered in lectures 
                    # also doesn't reach convergence before timeout with current settings
                   )
mlp.fit(X_train, y_train) 
mlp.score(X_test, y_test)

### Trying it using keras:

In [None]:
# https://machinelearningmastery.com/tutorial-first-neural-network-python-keras/
# https://machinelearningmastery.com/build-multi-layer-perceptron-neural-network-models-keras/
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(1024, input_dim=2303, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))

# compile the keras model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# fit the keras model on the dataset
model.fit(X_train, y_train, epochs=10, batch_size=100)

# evaluate the keras model
loss, accuracy = model.evaluate(X_test, y_test)

#Note, if you try running this example in an IPython or Jupyter notebook you may get an error.
#The reason is the output progress bars during training. 
#You can easily turn these off by setting verbose=0 in fit() and evaluate() calls

# 10-fold cross validation

In [None]:
# https://machinelearningmastery.com/evaluate-performance-deep-learning-models-keras/
from sklearn.model_selection import StratifiedKFold
# define 10-fold cross validation test harness
kfold = StratifiedKFold(n_splits=10, shuffle=False, random_state=42)


# Visualize

In [None]:
# https://keras.io/visualization/
from keras.utils import plot_model
from IPython.display import SVG
from keras.utils import model_to_dot

# SVG(model_to_dot(model).create(prog='dot', format='svg'))

### Results: Use confusion matrices as well as other metrics (TP Rate, FP Rate, Precision, Recall, F Measure, ROC Area).