## Multi Layer Perceptron approach to MPF

In [6]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from mpl_toolkits.axes_grid1 import make_axes_locatable
import warnings
warnings.filterwarnings('ignore')

import seaborn as sns
sns.set(style='whitegrid',
        rc={'lines.linewidth': 2.5,
        'figure.figsize': (10, 8),
        'text.usetex': False,
        # 'font.family': 'sans-serif',
        # 'font.sans-serif': 'Optima LT Std',
        })

from pandas import set_option
set_option("display.max_rows", 10)
pd.options.mode.chained_assignment = None

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from scipy.stats import truncnorm

filename = 'BDOShoham.csv'
training_data = pd.read_csv(filename)
training_data

Unnamed: 0,FlowPattern,Vsl,Vsg,VisL,VisG,DenL,DenG,ST,Ang,ID
0,1,6.30000,0.0250,0.001,0.00002,1000,1.8,0.07,0.0,0.051
1,1,4.00000,0.0400,0.001,0.00002,1000,1.8,0.07,0.0,0.051
2,1,6.30000,0.0400,0.001,0.00002,1000,1.8,0.07,0.0,0.051
3,1,4.00000,0.0630,0.001,0.00002,1000,1.8,0.07,0.0,0.051
4,1,6.30000,0.0630,0.001,0.00002,1000,1.8,0.07,0.0,0.051
...,...,...,...,...,...,...,...,...,...,...
5670,5,2.70096,15.8561,0.001,0.00002,1000,1.8,0.07,-80.0,0.025
5671,5,1.58784,15.8394,0.001,0.00002,1000,1.8,0.07,-80.0,0.025
5672,5,1.05338,16.0692,0.001,0.00002,1000,1.8,0.07,-80.0,0.025
5673,5,1.05397,25.7439,0.001,0.00002,1000,1.8,0.07,-80.0,0.025


In [7]:
# Flow Patterns
# 1=DB  2=SS   3=SW
# 4=A 5=I 7=B
flowpatterns_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00','#1B4F72','#2E86C1']

flowpatterns_labels = ['DB', 'SS', 'SW', 'A', 'I', 'B']

correct_FlowPattern_labels = training_data['FlowPattern'].values

feature_vectors = training_data.drop(['FlowPattern'], axis=1)
feature_vectors.describe()

from sklearn import preprocessing

scaler = preprocessing.StandardScaler().fit(feature_vectors)
scaled_features = scaler.transform(feature_vectors)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(scaled_features, correct_FlowPattern_labels, test_size=0.2, random_state=42)

#clf = MLPClassifier(solver='lbfgs', alpha=.1, hidden_layer_sizes=(25,25,25)) # 3 hidden layers with 25 neurons each

clf = MLPClassifier(solver='sgd', momentum=0.999, alpha=.0001, hidden_layer_sizes=(25,25,25)) # 3 hidden layers with 25 neurons each

clf.fit(X_train,y_train)
conf_te = confusion_matrix(y_test, clf.predict(X_test))

In [8]:
def accuracy(conf):
    total_correct = 0.
    nb_classes = conf.shape[0]
    for i in np.arange(0,nb_classes):
        total_correct += conf[i][i]
    acc = total_correct/sum(sum(conf))
    return acc

print('Predicted accuracy: %.3f%%' % (100*accuracy(conf_te),))

predicted_labels = clf.predict(X_test)

def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
    """pretty print for confusion matrixes"""
    columnwidth = max([len(x) for x in labels]+[5]) # 5 is value length
    empty_cell = " " * columnwidth
    # Print header
    print ("    " + empty_cell),
    for label in labels:
        print ("%{0}s".format(columnwidth) % label),
    print
    # Print rows
    for i, label1 in enumerate(labels):
        print ("    %{0}s".format(columnwidth) % label1),
        for j in range(len(labels)):
            cell = "%{0}.1f".format(columnwidth) % cm[i, j]
            if hide_zeroes:
                cell = cell if float(cm[i, j]) != 0 else empty_cell
            if hide_diagonal:
                cell = cell if i != j else empty_cell
            if hide_threshold:
                cell = cell if cm[i, j] > hide_threshold else empty_cell
            print (cell),
        print

conf = confusion_matrix(y_test, predicted_labels)
# then print it in a pretty way
#print_cm(conf, flowpatterns_labels)

print('Flow Pattern classification accuracy = %f' % accuracy(conf))

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
print ('Accuracy:', accuracy_score(y_test, predicted_labels))
print ('F1 score:', f1_score(y_test, predicted_labels,average='weighted'))
print ('Recall:', recall_score(y_test, predicted_labels,
                              average='weighted'))
print ('Precision:', precision_score(y_test, predicted_labels,
                                    average='weighted'))
print ('\n clasification report:\n', classification_report(y_test, predicted_labels))
print ('\n confussion matrix:\n',confusion_matrix(y_test, predicted_labels))

Predicted accuracy: 92.247%
Flow Pattern classification accuracy = 0.922467
Accuracy: 0.9224669603524229
F1 score: 0.9215625343017492
Recall: 0.9224669603524229
Precision: 0.9225550015629786

 clasification report:
               precision    recall  f1-score   support

           1       0.86      0.87      0.87       102
           2       0.89      0.63      0.74        27
           3       0.89      0.93      0.91       192
           4       0.93      0.86      0.89       200
           5       0.94      0.96      0.95       593
           7       1.00      0.95      0.98        21

   micro avg       0.92      0.92      0.92      1135
   macro avg       0.92      0.87      0.89      1135
weighted avg       0.92      0.92      0.92      1135


 confussion matrix:
 [[ 89   0   0   0  13   0]
 [  0  17   8   0   2   0]
 [  1   2 178   7   4   0]
 [  0   0  12 172  16   0]
 [ 13   0   3   6 571   0]
 [  0   0   0   0   1  20]]
