In [1]:
import numpy as np
import os
import tensorflow as tf
from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score



In [2]:
#seed for reproducability
np.random.seed(10)
tf.random.set_seed(20)

In [3]:
def enable_dropout(model, rate=None, custom_objects={}):
    """
    Enables the droput layer - used for monte carlo droput based uncertainty computation
    Note: the weights needs to be reloaded after calling this model
    >>> model = enable_dropout(model)
    >>> model.load_weights('path to model weight')
    :param model:
    :param rate:
    :param custom_objects:
    :return:
    """
    if(rate is not None): assert rate >= 0 and rate < 1, 'dropout rate is out of range'

    model_config = model.get_config()
    for i in range(len(model_config['layers'])):
        class_name = model_config['layers'][i]['class_name']
        if (class_name == 'SpatialDropout2D' or class_name =='Dropout' ):
            model_config['layers'][i]['inbound_nodes'][0][0][-1]['training'] = True
            if (rate is not None): model_config['layers'][i]['config']['rate'] = rate
            #print('dropout enabled')

    model = tf.keras.models.Model.from_config(model_config, custom_objects=custom_objects)
    return model

def load_model(file_name_prefix, model_directory, model, custom_objects=None, **kwarg):
    """
    Loads keras model saved as hd5 and json defination
    :param file_name_prefix:  prefix of the file name
    :return:
    """

    print(file_name_prefix)
    print(model_directory)

    json = os.path.join(os.getcwd(), model_directory, file_name_prefix + '.json')
    with open(json) as j:
        json_string = j.read()

    if (model is None):
        amodel = tf.keras.models.model_from_json(json_string, custom_objects=custom_objects)
    else:
        amodel = model

    if ('enable_dropout' in kwarg and kwarg['enable_dropout'] == True):
        rate = kwarg['dropout_rate'] if 'dropout_rate' in kwarg else None
        print('Loading model by enabling dropout.')
        amodel = enable_dropout(amodel, custom_objects=custom_objects, rate=rate)

    amodel.load_weights(os.path.join(model_directory, file_name_prefix + '.hd5'))
    return amodel

In [4]:
#Load the MCD bayes model by enabling dropout in test phase
model_prefix = 'intent_clf'
model_save_dir = 'savedmodel'
bayes_model_logit = load_model(model_prefix, model_save_dir, None,  enable_dropout=True, rate=0.2)
# The saved model outputs the logits score. Therefore add the softmax layer to get probability of classes
sm_layer = tf.keras.layers.Activation('softmax', name='smact') (bayes_model_logit.output) 
bayes_model = tf.keras.Model(bayes_model_logit.input, sm_layer)

intent_clf
savedmodel
Loading model by enabling dropout.


In [5]:
def predict_mcd(features_in, T=50):
    """
    Use monte carlo dropout to compute the prediction as well as uncertianty of predictions
    params T: number of monte carlo iterations
    """
  
    N_class = bayes_model.outputs[0].get_shape().as_list()[1]
    entropy_func = lambda x: -1 * np.sum(np.log(x + np.finfo(np.float32).eps) * x, axis=1)
    predictive_prob_total = np.zeros((features_in.shape[0], N_class))
    for i in range(T):
        predictive_prob_total += bayes_model.predict(features_in)

    predictive_prob_average = predictive_prob_total / (T * 1.0) 
    uncertainty = entropy_func(predictive_prob_average)
    return predictive_prob_average, uncertainty

In [6]:
##Load test inscope features

model_name ='paraphrase-distilroberta-base-v1'
npzfile = np.load('train_val_test_'+model_name+'.npz',allow_pickle=True)

val_features = npzfile['val_features']
val_labels = npzfile['val_labels']
test_features = npzfile['test_features']
test_labels = npzfile['test_labels']
label_map, label_map_inv = npzfile['label_maps']

In [7]:
# Predict on inscope test set and compute accuracy 
test_pred, uncertainty = predict_mcd(test_features)
insc_acc = accuracy_score(test_labels, test_pred.argmax(axis=1))
print('Inscore accuracy', insc_acc)

Inscore accuracy 0.9215555555555556


In [8]:
#Check in-score accuracy on the samples with lower uncertainty i.e., confident samples 
idx_good= np.where(uncertainty<1.3)
acc_conf = accuracy_score(test_labels[idx_good], test_pred.argmax(axis=1)[idx_good])
print('Accuracy=',acc_conf, ' on confident samples', len(idx_good[0]), 'out of ', len(uncertainty), 'samples')

Accuracy= 0.9497888315344909  on confident samples 4262 out of  4500 samples


In [9]:
# Use uncertainty of classify oos test set
npzfile_oos = np.load('train_val_test_oos_'+model_name+'.npz',allow_pickle=True)
train_features_oos = npzfile_oos['train_features_oos']
train_labels_oos = 1-npzfile_oos['train_labels_oos'] # set all zeros to 1
val_features_oos = npzfile_oos['val_features_oos']
val_labels_oos = 1-npzfile_oos['val_labels_oos'] # set all zeros to 1
test_features_oos = npzfile_oos['test_features_oos']
test_labels_oos =  1-npzfile_oos['test_labels_oos'] # set all zeros to 1
label_map_oos, label_map_inv_oos = npzfile_oos['label_maps_oos']

In [10]:
#create a single oos trainval set for parameter estimation by adding fraction of inscope val set(by setting 0 labels)
Nval_tune=1500
idx_temp = np.random.permutation(val_features.shape[0])[:Nval_tune]
trainval_features_oos = np.vstack([train_features_oos, val_features_oos, val_features[idx_temp] ])
trainval_labels_oos = np.concatenate([train_labels_oos, val_labels_oos, np.zeros(Nval_tune,)])

In [11]:
#trainval_pred_intent_scores_oos  denotes the score for a feature to be classified as one of the intents. 
#When the input feature does not belong to the distribution of the train set(in scope), we expect higher uncertainty associated with it.
#Therefore the the uncertainty alone  can be used to detect oos case.
trainval_pred_intent_scores_oos, trainval_uncertainty = predict_mcd(trainval_features_oos)
#Search for an optimal threshold using f1score as criterion
grid = np.arange(0,trainval_uncertainty.max(),0.1)
fscores = []
for i in grid:
    predi = (trainval_uncertainty > i ).astype(np.uint8)
    fscore = f1_score(trainval_labels_oos, predi)
    fscores.append(fscore)

In [12]:
optimal_threshold = grid[np.argmax(fscores)]
print('Uncertainty threshold that maximizes f1 score ', optimal_threshold)
import matplotlib.pyplot as plt
plt.plot(grid, fscores)
plt.xlabel('uncertainty')
plt.ylabel('f1score')
#plt,show()

Uncertainty threshold that maximizes f1 score  1.3


Text(0, 0.5, 'f1score')

In [13]:
#How this method works in deployment phase?
#When uncertainty <=optimal_threshold - use test_pred_intent_scores_oos to choose one of the intent
#When uncertaity > optimal_threshold - the feature belongs to oos class

In [14]:
#Evaluate on a combined set of  test_features_oos (labels 1) and test_features (label 0)
test_features_ins_oos = np.vstack([test_features, test_features_oos])
test_labels_ins_oos = np.concatenate([ np.zeros(test_features.shape[0]),np.ones(test_features_oos.shape[0])])
test_pred_intent_scores_ins_oos, test_uncertainty = predict_mcd(test_features_ins_oos)
test_pred_ins_oos = (test_uncertainty > optimal_threshold).astype(np.uint8)

#Compute evaluation metrics
recall = recall_score(test_labels_ins_oos, test_pred_ins_oos)
precision  = precision_score(test_labels_ins_oos, test_pred_ins_oos)
f1score = f1_score(test_labels_ins_oos, test_pred_ins_oos)
print('Recall = ', recall, 'Precision =', precision, 'Fscore=', f1score)


Recall =  0.653 Precision = 0.7403628117913832 Fscore= 0.69394261424017
