In [30]:
import numpy as np
import pandas as pd
from time import time
import matplotlib.pyplot as plt
from IPython.display import display 

import data_utils as data_utils
# Pretty display for notebooks
%matplotlib inline
pd.set_option('display.max_colwidth', -1)

dataset = pd.read_csv("datasets/dataset_clean.csv")
data_2014 = pd.read_csv("datasets/Base_De_Datos_PQRD_2014.csv")


data_2014['IDPATOLOGIA_2'] = 0
data_2014 = data_2014[dataset.columns.values]

In [31]:
import keras.backend as K

def precision(y_true, y_pred):
    """
    Precision metric.
     Only computes a batch-wise average of precision.
     Computes the precision, a metric for multi-label classification of
    how many selected items are relevant.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def recall(y_true, y_pred):
    """
    Recall metric.
     Only computes a batch-wise average of recall.
     Computes the recall, a metric for multi-label classification of
    how many relevant items are selected.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def fbeta_score(y_true, y_pred, beta=2):
    """Computes the F score.
     The F score is the weighted harmonic mean of precision and recall.
    Here it is only computed as a batch-wise average, not globally.
     This is useful for multi-label classification, where input samples can be
    classified as sets of labels. By only using accuracy (precision) a model
    would achieve a perfect score by simply assigning every class to every
    input. In order to avoid this, a metric should penalize incorrect class
    assignments as well (recall). The F-beta score (ranged from 0.0 to 1.0)
    computes this, as a weighted mean of the proportion of correct class
    assignments vs. the proportion of incorrect class assignments.
     With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning
    correct classes becomes more important, and with beta > 1 the metric is
    instead weighted towards penalizing incorrect class assignments.
    """
    if beta < 0:
        raise ValueError('The lowest choosable beta is zero (only precision).')
     # If there are no true positives, fix the F score at 0 like sklearn.
    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    bb = beta ** 2
    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
    return fbeta_score

In [32]:
from keras.models import load_model

model = load_model('weights.best.from_scratch.hdf5', custom_objects={'fbeta_score': fbeta_score})

In [33]:
import pandas as pd
import numpy as np
from keras.utils import to_categorical

dataset_validation = pd.read_csv("datasets/dataset_validation.csv")

labels_validation = dataset_validation[['RIESGO_VIDA']].values
features_validation = dataset_validation.drop(['RIESGO_VIDA'], axis = 1).values

X_validation = np.asarray(list(map(lambda row: row.reshape(7,5,1), features_validation)))
y_validation = to_categorical(labels_validation, 2)

evaluate = model.evaluate(X_validation, y_validation)
print(model.metrics_names)
print(evaluate)

['loss', 'fbeta_score']
[0.14792804222979195, 0.9405944616881969]


In [34]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=10)

for train_index, test_index in kf.split(X_validation):
    X_val = X_validation[train_index]
    y_val = y_validation[train_index]
    evaluate = model.evaluate(X_val, y_val)
    print(evaluate)


[0.14794439127395437, 0.940682524766759]
[0.14784531662991246, 0.9405701753622364]
[0.14738633567987658, 0.9406553969025295]
[0.1480191566444008, 0.9403042478310341]
[0.14800602670505109, 0.9406699320403945]
[0.1479310330632351, 0.9405799453760725]
[0.1475161283949564, 0.9409743377054558]
[0.1480269224216663, 0.9405382215269659]
[0.14790023706625605, 0.9405502690748063]
[0.14870486742696745, 0.9404181680549688]


In [48]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        title='Normalized confusion matrix'
    else:
        title='Confusion matrix'

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()
    
## multiclass or binary report
## If binary (sigmoid output), set binary parameter to True
def full_multiclass_report(model,
                           x,
                           y_true,
                           classes,
                           batch_size=32,
                           binary=False):

    # 1. Transform one-hot encoded y_true into their class number
    if not binary:
        y_true = np.argmax(y_true,axis=1)
    
    # 2. Predict classes and stores in y_pred
    y_pred = model.predict_classes(x, batch_size=batch_size)
    
    # 3. Print accuracy score
    print("F2-score : "+ str(fbeta_score(y_true,y_pred)))
    
    print("")
    
    # 4. Print classification report
    print("Classification Report")
    print(classification_report(y_true,y_pred,digits=5))    
    
    # 5. Plot confusion matrix
    cnf_matrix = confusion_matrix(y_true,y_pred)
    print(cnf_matrix)
    plot_confusion_matrix(cnf_matrix,classes=classes)

In [49]:
full_multiclass_report(model,
                       X_validation,
                       y_validation,
                       classes=['LIFE AT RIST', 'LIFE NOT AT RISK'])

AttributeError: 'numpy.dtype' object has no attribute 'base_dtype'