## Code

In [39]:
import pandas as pd
import numpy as np
from IPython.display import Audio,display
import os
from IPython.display import clear_output
import re
from IPython.display import HTML as html_print

In [52]:
def contact_rule_judge(file_path,audio_folder_path,startpoint,endpoint,key = 'mRawDataId'):
    '''
        This function is used to listen to the audio of selected utterances and decide if the contact rule would be helpful
    '''
    
    # Read in the data file with RawDataId and sort it by the id
    df = pd.read_table(file_path)
    df = df.sort_values([key],kind='mergesort')
    df.reset_index(inplace=True,drop=True)
    
    # Read in all the audio file names as a list
    audio_names = os.listdir(audio_folder_path)
    audio_id = [x.strip('.wav') for x in audio_names]
    
    # Create the list to store the judgement about the contact rule
    answers = []
    
    if startpoint <0:
        print('The start index of the utterances should be larger thanor equal to 0')
        return
        
    if endpoint >=len(audio_names):
        print('The end index of the utterances should be less than ' + str(len(audio_names)))
        return
        
    # Loop through the selected utterances and display the audio, ask the user to input the choice
    for i in range(startpoint,endpoint):
        clear_output()
        print ('Case: ' + str(i))
        name_strip = audio_id[i]
        print('The rawID: ' + name_strip)
        
        # Print out the N-best list
        # print ('The N best Lexical Form:')
        #n_best = df[df['mRawDataId'] == name_strip]['mSRLexicalFormN'].item()
        #n_best_list = n_best.split('#TAB#')
        
        #rule = df[df['mRawDataId'] == name_strip]['ruleTraversalN'].item()
        #rule = re.sub('loop|[)(]', '', rule)
        #rules = rule.split('#TAB#')
        #rules = [re.sub('^,|,$', ' ', x) for x in rules]
        
        #for a in range(len(n_best_list)):
            #print(str(a) + ': ' + n_best_list[a] + '   --rule:' + rules[a])
            #print(str(a) + ': ' + n_best_list[a])
                  
        print('') 
        display(html_print(cstr(''.join(['The form from the Rule: ', 
                                cstr(df[df['mRawDataId'] == name_strip]['mSRLexicalFormRule'].item(), color='red')]), 
                                color='black')))
        
        display(html_print(cstr(''.join(['The current 1-best form:  ', 
                                cstr(df[df['mRawDataId'] == name_strip]['mSRBestLexicalForm'].item(), color='red')]), 
                                color='black')))
        #print('The form from the Rule:   ' + df[df['mRawDataId'] == name_strip]['mSRLexicalFormRule'].item())
        #print('The current 1-best form:  ' + df[df['mRawDataId'] == name_strip]['mSRBestLexicalForm'].item())
        print('')
        print ('Here is the audio ' + audio_names[i])
        display(Audio(audio_folder_path+audio_names[i]))
        
        # Ask the user to input the judge whether the contact rule is good
        while True:
            value = input("Is the contact rule good? Answer 0 for Loss, 1 for Tie-Wrong , 2 for Tie-Correct, "
                          "3 for Win, 9 for quit: ")
            try:
                value = int(value)
                answers.append(value)
            except ValueError:
                print ('Invalid number, please: 0 or 1 or 2 or 3 or 9')
                continue
            if value in [0,1,2,3,9]:
                break
            else:
                print ('Invalid range, please: 0 or 1 or 2 or 3 or 9')
                
        if value == 9:
            break
            
    print('')
    print ('Yeahhhh! You did the Deep Human Learning!')
    
    return answers,audio_id[startpoint:endpoint]

def save_data_with_judge(answers,audio_id,input_path,out_path,key = 'mRawDataId'):
    '''
        This function is used to add the column Contact_Rule_Judge back to the data table and save it 
    '''
    df = pd.read_table(input_path)
    df = df.sort_values([key],kind='mergesort')
    df.reset_index(inplace=True,drop=True)
    
    dictionary = dict(zip(audio_id, answers))
    contact_rule = []
    for i in range(df.shape[0]):
        this_id = df[key][i]
        if this_id in audio_id:
            contact_rule.append(str(dictionary[this_id]))
        else:
            contact_rule.append('')
    
    int_to_str_dict = {'0':'Loss','1':'Tie-Wrong','2':'Tie-Correct','3':'Win','9':'Quit'}
    contact_rule = [int_to_str_dict[x] if x!='' else '' for x in contact_rule]
    df['contact_rule_judge'] = contact_rule
    df.to_csv(out_path,sep='\t',index = False,encoding = 'utf-8')
    
def cstr(s, color='black'):
    return "<text style=color:{}>{}</text>".format(color, s)

## Run the program

In [53]:
###############
# Parameters###
###############
# file_path: the path of the data table file that includes the rawDataID and other information
# key: the name of the id column
# audio_folder_path: the path of the folder that have all the audio files
# startpoint: which utterance to start the listen (0-based)
# endpoint: which utterance to end the listen

file_path = '../data/cuviewspeakerlogs.en-US.20170301_20170531.ss.tsv.contactnamerule'
key = 'mRawDataId'
audio_folder_path = "../data/audio_423_data/"
startpoint = 0
endpoint = 1

In [54]:
answers,audio_id = contact_rule_judge(file_path,audio_folder_path,startpoint=startpoint,endpoint=endpoint,key = 'mRawDataId')

Case: 0
The rawID: 0059973D2CFBF8A490C32E76A493BEE3




Here is the audio 0059973D2CFBF8A490C32E76A493BEE3.wav


Is the contact rule good? Answer 0 for Loss, 1 for Tie-Wrong , 2 for Tie-Correct, 3 for Win, 9 for quit: 9

Yeahhhh! You did the Deep Human Learning!


## Save the Judge to the data

In [33]:
# Save the data
out_path = '../data/new_data_with_judge.tsv'
save_data_with_judge(answers,audio_id,input_path=file_path,out_path = out_path,key = 'mRawDataId')

In [34]:
tem = pd.read_table('../data/new_data_with_judge.tsv')

In [35]:
tem.head()

Unnamed: 0,mRawDataId,mImpressionGuid,mTrafficType,mSRBestDisplayText,mSRBestLexicalForm,mSRLexicalFormN,mSRDeviceLexicalFormN,mSRDisplayTextN,mSRDeviceDisplayTextN,mLUDomain,...,HomophoneGroupIdN,mCUServiceClientInstDeviceInfoManufacturer,ErrorType,isReranked,1stPassResult,mSRLexicalFormRule,mSRLexicalFormRuleRank,SameResult,HomophoneGroupIdMatch,contact_rule_judge
0,0059973D2CFBF8A490C32E76A493BEE3,2D58C96C781749989883DD4774745CEF,Untagged,hey cortana call skype,hey cortana call skype,hey cortana call skype#TAB#hey cortana call sk...,,hey cortana call skype#TAB#hey cortana call sk...,,,...,0#TAB#0#TAB#0,Harman Kardon,1best_Nbest_single,False,hey cortana call skype,hey cortana call Skype,3,True,True,Tie-Wrong
1,0077755B48912DF49481965318B973FF,4FF188F7773948A594E0BDC6AE37E71F,Untagged,hey cortana call michael,hey cortana call michael,hey cortana call michael#TAB#hey cortana call ...,,hey cortana call michael#TAB#hey cortana call ...,,,...,0#TAB#0#TAB#0#TAB#0#TAB#0,Harman Kardon,1best_Nbest_single,False,hey cortana call michael,hey cortana call Michael,5,True,True,
2,00ADAA5BDC7688B480BA9451692D3E43,0C6C96FE01F04F0DB4E8E3D1D84F02BE,Untagged,hey cortana call AJ,hey cortana call AJ,hey cortana call AJ#TAB#hey cortana call AJ#TA...,,hey cortana call AJ#TAB#hey cortana call AJ#TA...,,,...,0#TAB#0#TAB#0#TAB#0,Harman Kardon,1best_Nbest_single,False,hey cortana call AJ,hey cortana call AJ,3,True,True,
3,0171BC1D5B6AD014AFBE12A9C90A534D,9B687F5DA79C4791BA84FE18C1E00DDC,Untagged,hey cortana call river,hey cortana call river,hey cortana call river#TAB#hey cortana call ri...,,hey cortana call river#TAB#hey cortana call ri...,,,...,0#TAB#0#TAB#0#TAB#1#TAB#2,Harman Kardon,1best_Nbest_single,False,hey cortana call river,hey cortana call river,2,True,True,
4,0197DA6C3F0CBF24ACC2DE26ECBB8A3A,1DB6D2CF768A419AA0673A93658FCB58,Untagged,hey cortana skype echo,hey cortana skype echo,hey cortana skype echo#TAB#hey cortana skype Echo,,hey cortana skype echo#TAB#hey cortana skype Echo,,,...,0#TAB#0,Harman Kardon,1best_Nbest_single,False,hey cortana skype echo,hey cortana skype Echo,2,True,True,
