In [30]:
import json

NUM_INTENTS = 10
NLU_NAMES = ['watson', 'luis', 'snips', 'rasa-sklearn', 'rasa-diet']
COLORS = ['lightcoral', 'yellowgreen', 'deepskyblue', 'purple', 'mediumturquoise', 'mediumorchid', 'khaki', 'salmon', 'darkturquoise', 'gold']
VERSION = 8
NUM_SPLITS = 10

def load_json(path):
    with open(path) as json_file:
        data = json.load(json_file)
    return data

def get_nlu_result_file(nlu, version, split_num):
    nlu_result_file = '../results/' + nlu + '/v' + str(version) + '/' + nlu + '_split_' + str(split_num) + '_results_v' + str(version) + '.json'
    return nlu_result_file

def get_confidence_score(test, rank=0, nlu="watson", level="rank"):
    """
    the output of this function differs depending on the level.
    On rank level, the output is a single float that represents the confidence score of a rank 
    On model level, the output is a list of floats that includes the confidence scores of all ranks
    """
    if level == "rank":
        predicted_intent_conf = test['intent_ranking'][rank]['confidence'] # rasa
        return predicted_intent_conf 
    
    else: # level == "model"
        predicted_confidences = [rank['confidence'] for rank in test['intent_ranking']]
        return predicted_confidences
    
def remove_empty_preds(data):
    new_data = []
    for test in data:
        if test['text'] != '':
            new_data.append(test)
    return new_data


def get_ambiguity_data(nlu, sample):
    rank_0_score = get_confidence_score(sample, 0, nlu)
    rank_1_score = get_confidence_score(sample, 1, nlu)
    rank_0_intent = sample['intent_ranking'][0]['name']
    rank_1_intent = sample['intent_ranking'][1]['name']
    ambiguity = rank_1_score / rank_0_score
    return {
        'text': sample['text'],
        'ambiguity': ambiguity,
        'intents': [rank_0_intent, rank_1_intent],
        'scores': [rank_0_score, rank_1_score]
    }

def get_most_ambiguous(nlu, version=VERSION, n=10):
    def ambiguity_data_for_split(split_num):
        nlu_result_file = get_nlu_result_file(nlu, version, split_num)
        data = remove_empty_preds(load_json(nlu_result_file))
        return [get_ambiguity_data(nlu, sample) for sample in data]

    ambiguity_data = [
        x for split_num in range(1, NUM_SPLITS + 1)
        for x in ambiguity_data_for_split(split_num)
    ]
    return sorted(ambiguity_data, key=lambda sample: -sample['ambiguity'])[0:n]

for nlu in NLU_NAMES:
    print(nlu)
    print(json.dumps([x['text'] for x in get_most_ambiguous(nlu)], indent=2))
    print()

watson
[
  "Tell me about ...",
  "Tell me about...",
  "Tell me about...",
  "Tell me about...",
  "Tell me about ...",
  "What does the hyphen represent in the equation?",
  "tell me the lucky color of today",
  "hey olly what has been happening interesting in sports lately",
  "Make blank on july 3 calender",
  "PDA! Tell mom ill be late this evening"
]

luis
[
  "email update",
  "Do you sell any of my information to advertisers?",
  "Please delete the last song that played from the playlist.",
  "what would you recommend for me to read",
  "How many legs does a dog have?",
  "Refresh Email inbox",
  "Schedule my meeting with Mr. John Hopkins tomorrow 1 PM in the afternoon after lunch.",
  "When was (insert) last academy award winning movie?",
  "How would you describe a person that is called a snowflake?",
  "What does a basketball feel like?"
]

snips
[
  "How do i figure women out?",
  "what does obama do in his day to day life",
  "How many new articles have been posted at USA 