In [2]:
from collections import defaultdict
import json

In [3]:
import re

In [4]:
with open('cor.json') as data:
    file = json.load(data)

In [5]:
dialogues = [] 
for d in file[:2]:
    samples = defaultdict(dict)
    result = d['completions'][0]['result']
    texts_without_labels = d['data']['text']
    for sample in result:
        speaker = texts_without_labels[int(sample['value']['start'])]['speaker']
        samples[sample['id']]['speaker'] = speaker
        samples[sample['id']]['text'] = sample['value']['text']
        samples[sample['id']]['start'] = int(sample['value']['start'])
        if 'paragraphlabels' in sample['value']:
            samples[sample['id']]['paragraphlabels'] = sample['value']['paragraphlabels'][0]
        if 'choices' in sample['value']:
            samples[sample['id']]['choices'] = sample['value']['choices'][0]
    
    sorted_samples = sorted([(samples[sample_id]['start'], sample_id) for sample_id in samples])
    texts = []
    labels = []
    speakers = []
    for _, sample_id in sorted_samples:
        if samples[sample_id]['text'] != 'PAUSE':
            texts.append(str(samples[sample_id]['text']).replace('\n', ''))
            speakers.append(samples[sample_id]['speaker'])
            paragraph_labels = samples[sample_id].get('paragraphlabels', '')
            choices = samples[sample_id].get('choices', '')
            labels.append(paragraph_labels + '.' + choices)
    dialogues.append((texts, labels, speakers))

In [6]:
train_labels = dialogues[1][1]
test_labels = dialogues[0][1]

In [7]:
def cut_labels(list_of_labels):
    for i in range(len(list_of_labels)):
        if 'Support.' in list_of_labels[i]:
            if 'Register' not in list_of_labels[i] and 'Engage' not in list_of_labels[i]:
                list_of_labels[i]=re.sub('Support.','',list_of_labels[i])
        if 'Confront.' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Confront.','',list_of_labels[i])
        if 'Append' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Append','Prolong',list_of_labels[i])
        if 'Initiate.' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Initiate.','',list_of_labels[i])
        if 'Challenge.' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Challenge.','',list_of_labels[i])
        if 'Answer' in list_of_labels[i]:
            list_of_labels[i]='React.Rejoinder.Response.Resolve'
        if 'Open.Opinion' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Open.Opinion','Opinion',list_of_labels[i])
        if 'Open.Fact' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Open.Fact','Fact',list_of_labels[i])
        if 'Open.Fact' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Open.Fact','Fact',list_of_labels[i])
        if 'Decline' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Decline','Contradict',list_of_labels[i])
        if 'Accept' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Accept','Affirm',list_of_labels[i])
        if 'Response.Re-challenge' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Response.Re-challenge','Re-challenge',list_of_labels[i])
        if 'Response.Refute' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Response.Refute','Counter',list_of_labels[i])
        if 'Response.Acquiesce' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Response.Acquiesce','Response.Resolve',list_of_labels[i])
        if 'Detach' in list_of_labels[i]:
            list_of_labels[i]='React.Rejoinder.Rebound'
        if 'Rejoinder.Develop.Elaborate' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Rejoinder','Respond',list_of_labels[i])
        if 'React.Respond.Disengage' in list_of_labels[i]:
            list_of_labels[i]='React.Respond.Support.Register'
        if 'Response.Repair' in list_of_labels[i]:
            list_of_labels[i]='React.Respond.Develop.Extend'
        if 'React.Rejoinder.Counter' in list_of_labels[i]:
            list_of_labels[i]='Rejoinder.Counter'
        if 'Closed.Fact' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Closed.Fact','Fact',list_of_labels[i])
        if 'Closed.Opinion' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Closed.Opinion','Opinion',list_of_labels[i])
        if 'React.Rejoinder.Response.Resolve' in list_of_labels[i]:
            list_of_labels[i]=re.sub('Closed.Opinion','Opinion',list_of_labels[i])
    return list_of_labels     
            

In [19]:
set(cut_labels(train_labels)+cut_labels(test_labels))

{'Open.Attend.',
 'Open.Demand.Fact',
 'Open.Demand.Opinion',
 'Open.Give.Fact',
 'Open.Give.Opinion',
 'React.Rejoinder.Re-challenge',
 'React.Rejoinder.Rebound',
 'React.Rejoinder.Response.Resolve',
 'React.Rejoinder.Track.Check',
 'React.Rejoinder.Track.Clarify',
 'React.Rejoinder.Track.Confirm',
 'React.Rejoinder.Track.Probe',
 'React.Respond.Develop.Elaborate',
 'React.Respond.Develop.Enhance',
 'React.Respond.Develop.Extend',
 'React.Respond.Reply.Acknowledge',
 'React.Respond.Reply.Affirm',
 'React.Respond.Reply.Agree',
 'React.Respond.Reply.Contradict',
 'React.Respond.Reply.Disagree',
 'React.Respond.Reply.Disawow',
 'React.Respond.Support.Engage',
 'React.Respond.Support.Register',
 'Rejoinder.Counter',
 'Sustain.Continue.Monitor',
 'Sustain.Continue.Prolong.Elaborate',
 'Sustain.Continue.Prolong.Enhance',
 'Sustain.Continue.Prolong.Extend'}

In [8]:
class_dict = {}
label_to_name = []
i=0
for el in set(cut_labels(train_labels)+cut_labels(test_labels)):
    class_dict[el] = i
    i = i+1
    label_to_name.append(el)

In [9]:
A = [[0]*len(class_dict) for _ in range(len(class_dict))]

In [10]:
for label_sequence in (train_labels,test_labels):
    for i,lbl in enumerate(label_sequence):
        if i+1 < len(label_sequence):
            num_class = class_dict[label_sequence[i]]
            num_class2 = class_dict[label_sequence[i+1]]
            A[num_class][num_class2] +=1

In [11]:
for i in range(len(A)):
    total_count=sum(A[i])
    for j in range(len(A[i])):
        A[i][j]/=max(total_count,1)

In [30]:
def print_most_probable_labels(label_name):
    most_prob_lbls=[]
    kd=[]
    if label_name=='React.Respond.Response.Resolve.':
        label_name='React.Rejoinder.Response.Resolve'
    class_id = class_dict[label_name]
    sorted_lbls = sorted(enumerate(A[class_id]),reverse=True,key=lambda x: x[1])
    for label, probability in sorted_lbls:
        if probability!=0.0:
            most_prob_lbls.append(label_to_name[label])
    return most_prob_lbls

In [31]:
print_most_probable_labels('Open.Give.Fact')

['Sustain.Continue.Prolong.Elaborate',
 'React.Rejoinder.Track.Clarify',
 'React.Respond.Support.Register',
 'Sustain.Continue.Monitor',
 'React.Respond.Reply.Acknowledge',
 'Open.Give.Fact',
 'React.Respond.Reply.Agree',
 'Open.Attend.',
 'Open.Demand.Opinion',
 'React.Respond.Support.Engage']