In [1]:
#https://machinelearningmastery.com/how-to-calculate-precision-recall-f1-and-more-for-deep-learning-models/
# demonstration of calculating metrics for a neural network model using sklearn
from sklearn.datasets import make_circles
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from keras.models import Sequential
from keras.layers import Dense

# generate and prepare the dataset
def get_data():
	# generate dataset
	X, y = make_circles(n_samples=1000, noise=0.1, random_state=1)
	# split into train and test
	n_test = 500
	trainX, testX = X[:n_test, :], X[n_test:, :]
	trainy, testy = y[:n_test], y[n_test:]
	return trainX, trainy, testX, testy

# define and fit the model
def get_model(trainX, trainy):
	# define model
	model = Sequential()
	model.add(Dense(100, input_dim=2, activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	# fit model
	model.fit(trainX, trainy, epochs=300, verbose=0)
	return model

# generate data
trainX, trainy, testX, testy = get_data()
# fit model
model = get_model(trainX, trainy)

Using TensorFlow backend.


In [2]:
# predict probabilities for test set
yhat_probs = model.predict(testX, verbose=0)
# predict crisp classes for test set
yhat_classes = model.predict_classes(testX, verbose=0)
# reduce to 1d array
yhat_probs = yhat_probs[:, 0]
yhat_classes = yhat_classes[:, 0]
print('yhat_classes:', yhat_classes)

yhat_classes: [1 0 0 0 1 0 0 1 1 1 1 0 0 1 1 1 0 1 0 0 0 1 1 0 1 0 0 1 1 0 1 1 1 1 0 1 0
 0 1 0 0 0 1 1 0 0 0 1 0 1 1 1 0 1 1 1 0 0 0 1 1 1 0 0 1 1 1 0 0 1 0 1 0 1
 1 1 1 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 1 0 0 0 0 1 1 0
 1 0 0 0 0 0 0 1 0 0 1 1 1 0 1 0 0 1 1 0 1 0 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1
 1 0 0 1 1 1 0 1 1 0 0 0 1 1 0 1 0 1 1 1 1 0 0 1 1 1 1 0 0 0 0 1 1 1 0 1 0
 0 0 1 0 0 0 1 0 1 0 1 1 0 0 1 0 0 1 1 1 0 0 0 0 0 1 0 0 1 0 1 1 1 1 1 1 1
 1 1 1 0 1 1 0 0 0 1 0 0 0 1 1 0 1 0 1 0 0 0 0 1 1 0 1 1 1 1 1 1 0 0 0 0 1
 0 1 0 1 1 1 0 1 0 1 1 0 1 0 0 1 1 0 0 1 1 1 1 0 0 1 0 1 1 1 1 0 0 1 0 0 0
 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 0 0 0 0 1 0 0 1 1 0 0 1 0 0
 1 1 0 1 1 0 0 0 1 0 0 1 0 0 0 1 0 1 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 0 0 0
 0 0 1 0 1 0 1 0 1 1 0 0 1 1 0 0 1 0 1 1 0 1 0 0 1 0 1 1 1 1 0 0 1 0 1 0 0
 0 0 0 0 1 1 1 1 1 0 1 0 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 1 0 1 1 1
 0 1 0 1 1 1 0 0 0 1 1 1 1 1 0 0 0 0 0 1 0 1 1 0 1 0 0 0 1 0 0 1 0 1 0 1 1
 0 0 0 1 1 

In [3]:
# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(testy, yhat_classes)
print('Accuracy: %f' % accuracy)

# precision tp / (tp + fp)
precision = precision_score(testy, yhat_classes)
print('Precision: %f' % precision)

# recall: tp / (tp + fn)
recall = recall_score(testy, yhat_classes)
print('Recall: %f' % recall)

# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(testy, yhat_classes)
print('F1 score: %f' % f1)

Accuracy: 0.850000
Precision: 0.844358
Recall: 0.861111
F1 score: 0.852652


In [4]:
# kappa
kappa = cohen_kappa_score(testy, yhat_classes)
print('Cohens kappa: %f' % kappa)
# ROC AUC
auc = roc_auc_score(testy, yhat_probs)
print('ROC AUC: %f' % auc)
# confusion matrix
matrix = confusion_matrix(testy, yhat_classes)
print(matrix)

Cohens kappa: 0.699933
ROC AUC: 0.921467
[[208  40]
 [ 35 217]]


In [5]:
from sklearn import metrics
#pre_classes = np.argmax(predicted, axis=-1)
print(metrics.classification_report(testy, yhat_classes))

              precision    recall  f1-score   support

           0       0.86      0.84      0.85       248
           1       0.84      0.86      0.85       252

    accuracy                           0.85       500
   macro avg       0.85      0.85      0.85       500
weighted avg       0.85      0.85      0.85       500

