## Some preliminary steps (installations, imports, etc)


In [None]:
!pip install openai -q

In [None]:

!pip install transformers -q

In [None]:
!pip install python-Levenshtein -q

In [None]:
pip install git+https://github.com/Kpetyxova/DeepPavlov.git@feat/topic_shift

In [None]:
pip install transformers

In [None]:
import openai
import time
from transformers import pipeline
import pandas as pd
import xml.etree.ElementTree as ET
from Levenshtein import distance as lev
import json
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
import random
from collections import Counter
import numpy as np
from tqdm.notebook import tqdm
import sys
import os
import re
from deeppavlov import build_model
from google.colab import drive
drive.mount('/content/drive')

In [None]:
pipe = pipeline(model="facebook/bart-large-mnli")

In [None]:
model = build_model('superglue_topic_shift.json')

# Working with datasets

In [None]:
def make_dialog_dict(filename, sep='\t', filetype='common'):
    if filetype == 'common':
        dialogs = pd.read_csv(filename, sep=sep)
        d_list = dialogs[['INPUT:dialog_id', 'INPUT:utterances']].values.tolist()
        fin_dict = {}
        for item in d_list:
            # split the source_data string into a list of utterances
            utterances = re.findall(r'<span.*?>(.*?)</span>:\s*"(.*?)"', item[1])

            # initialize the target_data list
            target_data = []

            # iterate through the utterances and convert them to dictionaries
            for i, (speaker, utterance) in enumerate(utterances):
                # extract the speaker number from the class name
                speaker_num = int(re.search(r'speaker_(\d+)', speaker).group(1))

                # create the dictionary and append it to the target_data list
                target_data.append({"speaker": speaker_num, "utterance": utterance.strip().replace('\\', '')})
            fin_dict[str(item[0])] = target_data
    elif filetype == 'gold':
        dialogs = pd.read_csv(filename, sep=sep)
        d_list = dialogs[['dialogue_id', 'speaker', 'text']].values.tolist()
        fin_dict = {}
        for id, speaker, text in d_list:
            if 'O' in speaker:
                speaker = '1'
            elif 'X' in speaker:
                speaker = '2'
            if str(id) not in fin_dict:
                fin_dict[str(id)] = [{"speaker": speaker, "utterance": text.strip()}]
            else:
                fin_dict[str(id)] += [{"speaker": speaker, "utterance": text.strip()}]

    # print the target_data list
    return fin_dict

##Loading XML trees

In [None]:
tree = ET.parse('files/questions.drawio.xml')
root = tree.getroot()

In [None]:
tree_extra = ET.parse('files/questions_extra.drawio.xml')
root_extra = tree_extra.getroot()

# Loading instructions

In [None]:
instructions = pd.read_csv('files/instructions.tsv', sep='\t')

In [None]:
import re #добавила кастомную чистку инструкций от тегов (Ника)
CLEANR = re.compile('<.*?>')
DASH = re.compile('(?:([^>])<\/span><br>(<br>)?(<\/span>)*(<span class="text_attention">)*)|(?:([^>])<\/span>(<p>)+(<\/span>)*(<span class="text_attention">)*)')

def cleanhtml(raw_html):
  cleantext = ""
  if type(raw_html) == str:
      raw_html = raw_html.replace('\n', '')
      raw_html = re.sub(DASH, '\g<1> -- ', raw_html)
      raw_html = raw_html.replace('<br>', '\n').replace('<span class="text_attention">', '\n\n')
      cleantext = re.sub(CLEANR, '', raw_html)
  return cleantext

In [None]:
instructions['instruction'] = instructions['instruction'].apply(cleanhtml)

# Functions for annotation

In [None]:
annotation_log = ""

In [None]:
def fill_annotation_log(additional_text):
  global annotation_log
  annotation_log = annotation_log + """
"""
  annotation_log = annotation_log + additional_text

In [None]:
def preprocess_answer(right_answer_text, possible_answers):
  output = pipe(right_answer_text, candidate_labels=possible_answers)
  return output['labels'][0]

In [None]:
def select_answer(right_answer_text, possible_answers, target_nodes):
  cands = []
  if 'label' in right_answer_text:
    right_answer_text = right_answer_text.replace('.', '')
  for i in range(len(target_nodes)):
    if lev(right_answer_text, possible_answers[i])<=2:
      cands.append((lev(right_answer_text, possible_answers[i]), (target_nodes[i], i)))
  if cands:
    sorted_cands = min(cands, key=lambda x: x[0])
    return sorted_cands[1]

  processed_right_answer_text = preprocess_answer(right_answer_text, possible_answers)
  for i in range(len(target_nodes)):
    if possible_answers[i]==processed_right_answer_text:
      return target_nodes[i], i

In [None]:
def chatGPT_annotate(dialog, question, level_recursion=0):
  if level_recursion > 10:
      return 'Tried to get response 10 times and failed. Something went really wrong.'
  try:
    response = openai.ChatCompletion.create(
        model=chatGPT_settings['model'],
        temperature = chatGPT_settings['temperature'],
        messages=[
            {"role": "system", "content": dialog},
            {"role": "user", "content": question},
        ]
    )
  except:
      print("Something went wrong. Let's sleep (15 seconds) and try again")
      time.sleep(15)
      response = chatGPT_annotate(dialog, question, level_recursion=level_recursion+1)
  return response

## Masking

In [None]:
ambiguous_labels = ['Enhance', 'Extend', 'Elaborate', 'Fact', 'Opinion', 'Command']
full_labels_list = ['React.Rejoinder.Support.Response.Resolve', 'React.Rejoinder.Confront.Challenge.Detach',
                    'React.Rejoinder.Confront.Response.Refute', 'React.Rejoinder.Confront.Challenge.Counter', 'Open.Attend', 'Open.Command',
                    'Open.Demand.Opinion', 'Open.Demand.Fact', 'Open.Give.Fact', 'Open.Give.Opinion', 'Sustain.Continue.Monitor',
                    'React.Respond.Support.Register', 'React.Respond.Command', 'React.Respond.Support.Engage', 'React.Respond.Support.Reply.Accept',
                    'Sustain.Continue.Command', 'React.Rejoinder.Support.Track.Check', 'React.Rejoinder.Support.Track.Confirm',
                    'React.Rejoinder.Support.Track.Clarify', 'React.Rejoinder.Support.Track.Probe', 'Sustain.Continue.Prolong.Enhance',
                    'Sustain.Continue.Prolong.Extend', 'Sustain.Continue.Prolong.Elaborate', 'React.Rejoinder.Confront.Challenge.Rebound',
                    'React.Rejoinder.Confront.Response.Re-challenge', 'React.Respond.Support.Reply.Acknowledge',
                    'React.Respond.Support.Reply.Affirm', 'React.Respond.Support.Reply.Agree', 'React.Respond.Confront.Reply.Disavow',
                    'React.Respond.Confront.Reply.Disagree', 'React.Respond.Confront.Reply.Contradict', 'React.Respond.Support.Develop.Extend',
                    'React.Respond.Support.Develop.Enhance', 'React.Respond.Support.Develop.Elaborate', 'Detailedanswer']

In [None]:
def make_label_to_mask_dict():
    global ambiguous_labels, full_labels_list
    dict_names = {}
    for n, full_name in enumerate(full_labels_list):
        short_label = '.'.join(full_name.split('.')[-2:]) if any(amb in full_name for amb in ambiguous_labels) else full_name.split('.')[-1]
        dict_names[short_label] = {'mask': f'label_{n}', 'full_label': full_name}
    return dict_names

In [None]:
label_to_mask = make_label_to_mask_dict()

In [None]:
def process_text(text, function='mask'):
    """
    function = mask, unmask, full_label
    """
    global label_to_mask
    text_split = text.replace('Detailed answer', 'Detailedanswer').replace('\n', ' \n').split(' ')
    for index, word in enumerate(text_split):
        if function=='unmask':
            for value in label_to_mask.values():
                if re.search(rf"{value['mask']}([^a-zA-Z]|\b)", word):
                    text_split[index] = re.sub(r'[.a-zA-Z_0-9]+', value['full_label'], word)
                    text_split[index] = text_split[index].replace('Detailedanswer', 'Detailed answer')
        else:
            for key in label_to_mask.keys():
                if re.search(rf"{key}([^a-zA-Z]|\b)", word):
                    if function=='mask':
                        text_split[index] = re.sub(r'[.a-zA-Z]+', label_to_mask[key]['mask'], word)
                    elif function=='get_full_label':
                        text_split[index] = re.sub(r'[.a-zA-Z]+', label_to_mask[key]['full_label'], word)
    return ' '.join(text_split)

## Creating a prompt


In [None]:
def create_prompt(dialog_json, current_node_question, possible_answers, current_utterance_num, instruction_id):
  global chatGPT_settings
  if current_utterance_num == 0:
    curr_utt = dialog_json[int(current_utterance_num)]
    curr_utt = f"speaker_{curr_utt['speaker']}: {curr_utt['utterance']}"
    start = f"""DIALOG:
(Start the dialog)
CURRENT UTTERANCE: {curr_utt}"""
  else:
    len_context = chatGPT_settings['context_length']
    prev_context = dialog_json[max(int(current_utterance_num)-chatGPT_settings['context_length'], 0):int(current_utterance_num)]
    prev_context = [f"speaker_{x['speaker']}: {x['utterance']}" for x in prev_context]
    prev_join = '\n'.join(prev_context)
    curr_utt = dialog_json[int(current_utterance_num)]
    curr_utt = f"speaker_{curr_utt['speaker']}: {curr_utt['utterance']}"
    start = f"""DIALOG
PREVIOUS CONTEXT:
{prev_join}
CURRENT UTTERANCE:
{curr_utt}"""
  prompt_system = f"""TASK: This is part of the dialog is between 2 speakers. Answer QUESTION about CURRENT UTTERANCE. You must analyze relations between CURRENT UTTERANCE and PREVIOUS CONTEXT, not taking anything before it into account."""
  instruction = ""
  instruction_after_q = ""
  prev_utt = ""
  if chatGPT_settings['instruction'] == True:
    row = instructions[instructions['id'] == instruction_id]
    if not row.empty:
        instruction_text = row['instruction'].iloc[0]
        if type(instruction_text) == str:
            if len(instruction_text.split()) < 30:
                instruction_after_q = instruction_text
            else:
                instruction = f"""INSTRUCTION: {instruction_text} """
        else:
            instruction = ''
        if 'Miscellaneous' in instruction:
            current_node_question = current_node_question.replace('previous sentence', f'previous sentence ({prev_context[-1]})')
            current_node_question = current_node_question.replace('This sentence', f'This sentence ({curr_utt})')
  possible_answers = [x.strip() for x in possible_answers]
  if chatGPT_settings['explanation']:
    prompt_user = f"""{instruction}\n{start}\nQUESTION: {current_node_question} {instruction_after_q}
POSSIBLE ANSWERS: {', '.join(possible_answers)}\nProvide an explanation. If you are not sure, choose a most likely option. You must always select an option ({' or '.join(possible_answers)}). Structure your answer the following way:
'Reasoning: (your reasoning). Final answer: (your final answer, {' or '.join(possible_answers)})"""
  else:
    prompt_user = f"""{instruction}\n{start}\nQUESTION: {current_node_question} {instruction_after_q}
POSSIBLE ANSWERS: {', '.join(possible_answers)}\nYou must always select an option. Provide only one answer without explanation. ANSWER ({' or '.join(possible_answers)}):"""
  if chatGPT_settings['masking']:
    prompt_system = process_text(prompt_system, function='mask')
    prompt_user = process_text(prompt_user, function='mask')
  return prompt_system, prompt_user

## Working with the tree

In [None]:
def extract_node_features(current_node_path, current_node_path_object, current_node_path_object_child):
  print(current_node_path_object_child)
  if root.find(current_node_path):
    current_node_question = root.find(current_node_path).attrib['value']
    node_text = root.find(current_node_path).attrib['value']
    style_text = root.find(current_node_path).attrib['style']
  else:
    current_node_question = root.find(current_node_path_object).attrib['label']
    node_text = root.find(current_node_path_object).attrib['label']
    style_text = root.find(current_node_path_object_child).attrib['style']
  return current_node_question, node_text, style_text

In [None]:
def get_features_for_extra(current_node):
    print(current_node)
    extra_id = root_extra.find("diagram/mxGraphModel/root/object[@linked_parent_id='"+str(current_node)+"']").attrib['id']
    current_node_question = "Select the most appropriate description for the current utterance"
    extra_answers = root_extra.findall("diagram/mxGraphModel/root/mxCell[@source='"+str(extra_id)+"']")
    if extra_answers:
        possible_answers = []
        target_nodes = [answer.attrib['target'] for answer in extra_answers]
    for answer in extra_answers:
        possible_answer = root_extra.find(f"diagram/mxGraphModel/root/mxCell[@parent='{answer.attrib['id']}']")
        possible_answers.append(possible_answer.attrib['value'] if possible_answer else answer.attrib['value'])
    possible_answers = [cleanhtml(x) for x in possible_answers]
    possible_answers = [x.strip('\n') if '\n' in x else x for x in possible_answers]
    return possible_answers, target_nodes

In [None]:
def get_features_for_others(current_node):
    answers_lines = root.findall("diagram/mxGraphModel/root/mxCell[@source='"+str(current_node)+"']")
    answers_lines_objects = root.findall(".//mxCell[@source='"+str(current_node)+"']...")
    answers_lines_objects_child = root.findall("diagram/mxGraphModel/root/object/mxCell[@source='"+str(current_node)+"']")

    possible_answers = []
    target_nodes = []

    if answers_lines or answers_lines_objects_child:
      for answer in answers_lines:
        answer_value = root.find(f"diagram/mxGraphModel/root/mxCell[@parent='{answer.attrib['id']}']")
        possible_answers.append(answer_value.attrib['value'] if answer_value else answer.attrib['value'])
        target_nodes.append(answer.attrib['target'])
    i = 0
    for answer_line in answers_lines_objects_child:
        target_nodes.append(answer_line.attrib['target'])
        possible_answers.append(root.find(".//mxCell[@parent='"+str(answers_lines_objects[i].attrib['id'])+"']/*[@label]/*[last()]/*[@value]"))
        i += 1
    possible_answers = [cleanhtml(x) for x in possible_answers]
    possible_answers = [x.strip('\n') if '\n' in x else x for x in possible_answers]
    return possible_answers, target_nodes

In [None]:
def chain_answer(dialog_json, current_node, current_utterance_num):
  global final_answers, logs_real_time, prev_utterance_type
  if logs_real_time:
      stdout_meaning = sys.stdout
  else:
      stdout_meaning = open(os.devnull, 'w')
  LAST_NODE_COLOR = '#dae8fc'
  EXTRA_NODE_COLOR = '#e1d5e7'
  current_node_path = "diagram/mxGraphModel/root/mxCell[@id='"+str(current_node)+"']"
  current_node_path_object = "diagram/mxGraphModel/root/object[@id='"+str(current_node)+"']"
  current_node_path_object_child = "diagram/mxGraphModel/root/object[@id='"+str(current_node)+"']/mxCell"
  current_node_question, node_text, style_text = extract_node_features(current_node_path, current_node_path_object, current_node_path_object_child)
  instruction_id = '0_'+str(current_node)
  prev_utt = ""
  if current_utterance_num-2 >= 0:
    prev_utt =  f"speaker_{dialog_json[current_utterance_num-2]['speaker']}: {dialog_json[current_utterance_num-2]['utterance']} "
  prev_utt +=  f"speaker_{dialog_json[current_utterance_num-1]['speaker']}: {dialog_json[current_utterance_num-1]['utterance']}"
  curr_utt = f"speaker_{dialog_json[current_utterance_num]['speaker']}: {dialog_json[current_utterance_num]['utterance']}"
  if root.find(current_node_path) or root.find(current_node_path_object):
    print(style_text)
    if (f'fillColor={LAST_NODE_COLOR}' in style_text):
       final_answers.append(node_text)
       print("This node is the last one in the scheme", file=stdout_meaning)
       print(final_answers, file=stdout_meaning)
       fill_annotation_log(str(final_answers))
       print(f'answer_text for {curr_utt}: {node_text}')
       return node_text, 0
    if (f'fillColor={EXTRA_NODE_COLOR}' in style_text):
       possible_answers, target_nodes = get_features_for_extra(current_node)
       if chatGPT_settings['masking']:
         possible_answers = [process_text(answer, function = 'mask') for answer in possible_answers]
       prompt_system, prompt_user = create_prompt(dialog_json, current_node_question, possible_answers, current_utterance_num, instruction_id)
       print(prompt_system, file=stdout_meaning)
       print(prompt_user, file=stdout_meaning)
       fill_annotation_log(prompt_system)
       fill_annotation_log(prompt_user)
       answer_text = chatGPT_annotate(prompt_system, prompt_user).choices[0]['message']['content']
       print(answer_text, file=stdout_meaning)
       fill_annotation_log(answer_text)
       if chatGPT_settings['explanation']:
         answer_text = answer_text.split('Final answer: ')[-1]
       next_node, right_answer_id = select_answer(answer_text, possible_answers, target_nodes)
       node_text = process_text(possible_answers[right_answer_id], function = 'unmask')
       final_answers.append(node_text)
       print('This node was the last one in the extra file', file=stdout_meaning)
       print(node_text, file=stdout_meaning)
       fill_annotation_log(node_text)
       print(f'answer_text for {curr_utt}: {node_text}')
       return node_text, 0
    else:
      if int(current_node) == 2 and len(final_answers)!=0 and len(final_answers) > 0:
        if final_answers[current_utterance_num-1] == 'Open.Attend':
          if dialog_json[current_utterance_num]['speaker'] == dialog_json[current_utterance_num-1]['speaker']:
            print('The next node is also OPEN', file=stdout_meaning)
            return "Yes", 3
      if int(current_node) == 2 and current_utterance_num == 0:
        print('OPEN node', file=stdout_meaning)
        return "Yes", 3
      elif int(current_node) == 2:
        print(f"topic shift model working: {prev_utt} {curr_utt}", file=stdout_meaning)
        res = model(prev_utt, curr_utt)
        if res[0] == '1':
            return "Yes", 3
        else:
            return "No", 36
      if int(current_node) == 36 and current_utterance_num != 0:
        if dialog_json[current_utterance_num]['speaker'] != dialog_json[current_utterance_num-1]['speaker']:
          print('Speaker changes', file=stdout_meaning)
          return "Yes", 39
        else:
          print('Speaker is the same', file=stdout_meaning)
          return "No", 173
      if int(current_node) in [39, 173] and final_answers:
        if 'Open' not in final_answers[-1]:
          answer_text = prev_utterance_type
          possible_answers, target_nodes = get_features_for_others(current_node)
          next_node, right_answer_id = select_answer(answer_text, possible_answers, target_nodes)
          return answer_text, next_node
      if int(current_node) in [184, 271] and dialog_json[current_utterance_num-1]['speaker'] == dialog_json[current_utterance_num]['speaker']:
          return "No", 253
      possible_answers, target_nodes = get_features_for_others(current_node)
      prompt_system, prompt_user = create_prompt(dialog_json, current_node_question, possible_answers, current_utterance_num, instruction_id)
      print(prompt_system, file=stdout_meaning)
      print(prompt_user, file=stdout_meaning)
      fill_annotation_log(prompt_user)
      answer_text = chatGPT_annotate(prompt_system, prompt_user).choices[0]['message']['content']
      print(f'answer_text: {answer_text}', file=stdout_meaning)
      fill_annotation_log(f'answer_text: {answer_text}')
      if chatGPT_settings['explanation']:
          answer_text = answer_text.split('Final answer: ')[-1]
      if chatGPT_settings['masking']:
         possible_answers = [process_text(answer, function = 'mask') for answer in possible_answers]
      next_node, right_answer_id = select_answer(answer_text, possible_answers, target_nodes)
      node_text = possible_answers[right_answer_id]
      if int(current_node) == 112 and 'Other' in node_text:
          return "Other", 271
      if int(current_node) in [41, 176, 264, 145, 112]:
          prev_utterance_type = node_text
          print(f'just wrote prev_utterance_type: {prev_utterance_type}', file=stdout_meaning)
      return node_text, next_node

In [None]:
def chain(dialog_json, cur_utterance):
  global logs_real_time
  if logs_real_time:
      stdout_meaning = sys.stdout
  else:
      stdout_meaning = open(os.devnull, 'w')
  cur_node = 2
  while cur_node != 0:
    answer, cur_node = chain_answer(dialog_json, cur_node, cur_utterance)
    print(f'{answer} {cur_node}', file=stdout_meaning)

In [None]:
#это - главная функция, которая запускает всю работу и разметку датасета
#я ее не вызываю, потому что ChatGPT периодически обрывает API. В автоматическом режиме слетает разметка.
def annotate_dialog(dialog_id, dialog_json):
  global final_answers, annotation_log
  final_answers = [] #хранятся ответы разметки для одного диалога
  rows_list = []
  for current_utterance_id in tqdm(range(len(dialog_json))):
    annotation_log = ""
    chain(dialog_json, current_utterance_id)
    new_row = {'dialog_id':dialog_id, 'utterance_id':current_utterance_id, 'speaker':dialog_json[current_utterance_id]['speaker'], 'utterance':dialog_json[current_utterance_id]['utterance'], 'annotation': final_answers[-1], "logs": annotation_log}
    rows_list.append(new_row)
  results = pd.DataFrame(rows_list)
  return results


In [None]:
def annotate_all_dialogs(list_dialogs): # 'reannot_toloka (1).tsv'
    # list_dialogs = make_dialog_dict(filename)
    df_dummy = pd.DataFrame({'dialog_id': pd.Series(dtype='int'),
                            'utterance_id': pd.Series(dtype='int'),
                            'speaker': pd.Series(dtype='str'),
                            'utterance': pd.Series(dtype='str'),
                            'annotation': pd.Series(dtype='str'),
                            'logs': pd.Series(dtype='str')})
    for id, dialog in tqdm(list_dialogs.items()):
        df_to_merge = annotate_dialog(id, dialog)
        df_dummy = pd.concat([df_dummy, df_to_merge])
    return df_dummy

In [None]:
def annotate_file(filename): # 'reannot_toloka (1).tsv'
    list_dialogs = make_dialog_dict(filename, filetype='gold', sep=',')
    df_result = annotate_all_dialogs(list_dialogs)
    return df_result

# Functions for calculating metrcis

In [None]:
def cut_labels(golden_cut):
    short_labels = []
    for i in range(len(golden_cut)):
        if 'Open' in golden_cut[i]:
            if 'Initiate' in golden_cut[i]:
                short_labels.append(re.sub('Initiate.','', golden_cut[i]))
            if len(golden_cut[i].split('.')) == 3:
                short_labels.append('.'.join(golden_cut[i].split('.')[:-1]))
            else:
                short_labels.append(golden_cut[i])
        elif "Prolong" in golden_cut[i] or "Develop" in golden_cut[i]:
            short_labels.append('.'.join(golden_cut[i].split('.')[:-1]))
        elif "Track" in golden_cut[i]:
            short_labels.append('.'.join(golden_cut[i].split('.')[:-1]))
        elif "Reply" in golden_cut[i]:
            if "Accept" not in golden_cut[i]:
                short_labels.append('.'.join(golden_cut[i].split('.')[:-1]))
            else:
                short_labels.append(golden_cut[i])
        elif "Challenge" in golden_cut[i]:
            short_labels.append('.'.join(golden_cut[i].split('.')[:-1]))
        elif "Confront.Response" in golden_cut[i]:
            short_labels.append('.'.join(golden_cut[i].split('.')[:-1]))
        else:
            short_labels.append(golden_cut[i])

    return short_labels

In [None]:
def cut_labels_list(list_of_lists):
    for i, list_labels in enumerate(list_of_lists):
        list_of_lists[i] = cut_labels(list_of_lists[i])
    return list_of_lists

In [None]:
def make_annotation_list(annotation_results, gold_dialogs, type_ann='toloka', type_label='full'):
    """
    type_ann = toloka, good, excellent
    """
    annotation = {}
    for dialog_id in annotation_results['dialog_id'].unique():
        annotation[dialog_id] = []
        dialog_df = annotation_results[annotation_results['dialog_id'] == dialog_id]
        dialog_df = dialog_df.reset_index()
        if type_label=='cut':
            dialog_df['annotation'] = cut_labels(dialog_df['annotation'])
        if type_ann=='toloka':
            for _, row in dialog_df.iterrows():
                annotation[dialog_id].append(row['annotation'])
        else:
            gold_annot = gold_dialogs[str(dialog_id)][type_ann]
            if type_label=='cut':
                gold_annot = cut_labels_list(gold_annot)
            for _, row in dialog_df.iterrows():
                if row['annotation'] in gold_annot[row['utterance_id']]:
                    annotation[dialog_id].append(row['annotation'])
                else:
                    annotation[dialog_id].append(gold_annot[row['utterance_id']][0])
    return annotation

In [None]:
def return_metrics(annotation_results, gold_dialogs_filepath, type_gold='good', type_label='full', how='by_one_dialog'): #"gold_dialogs_train (1).json"
    """
    type_gold = good, excellent
    type_label = full, cut
    how = by_one_dialog, by_all
    """
    with open(gold_dialogs_filepath, "r") as f:
        gold_dialogs = json.load(f)
    toloka_annotation = make_annotation_list(annotation_results, gold_dialogs, type_ann='toloka', type_label=type_label)
    gold_annotation = make_annotation_list(annotation_results, gold_dialogs, type_ann=type_gold, type_label=type_label)
    if how=='by_one_dialog':
        for key in toloka_annotation.keys():
            print(f"Accuracy for dialog {key} ({type_gold}, {type_label} labels, no voting):")
            print(round(accuracy_score(gold_annotation[key], toloka_annotation[key]), 2))

            print(f"macro f1 for dialog {key} ({type_gold}, {type_label} labels, no voting)")
            print(round(f1_score(gold_annotation[key], toloka_annotation[key], average="macro"), 2))

            print(f"micro for dialog {key} ({type_gold}, {type_label} labels, no voting):")
            print(round(f1_score(gold_annotation[key], toloka_annotation[key], average="micro"), 2))

            print(f"weighted recall for dialog {key} ({type_gold}, {type_label} labels, no voting):")
            print(round(recall_score(gold_annotation[key], toloka_annotation[key], average="weighted"), 2))

            print(f"weighted precision for dialog {key} ({type_gold}, {type_label} labels, no voting):")
            print(round(precision_score(gold_annotation[key], toloka_annotation[key], average="weighted"), 2))
    elif how=='by_all':
        toloka_annotation =  sum(toloka_annotation.values(), [])
        gold_annotation =  sum(gold_annotation.values(), [])
        print(f"Accuracy for all dialogs ({type_gold}, {type_label} labels, no voting):")
        print(round(accuracy_score(gold_annotation, toloka_annotation), 2))

        print(f"macro f1 for all dialogs ({type_gold}, {type_label} labels, no voting)")
        print(round(f1_score(gold_annotation, toloka_annotation, average="macro"), 2))

        print(f"micro f1 for all dialogs ({type_gold}, {type_label} labels, no voting):")
        print(round(f1_score(gold_annotation, toloka_annotation, average="micro"), 2))

        print(f"weighted recall for all dialogs ({type_gold}, {type_label} labels, no voting):")
        print(round(recall_score(gold_annotation, toloka_annotation, average="weighted"), 2))

        print(f"weighted precision for all dialogs ({type_gold}, {type_label} labels, no voting):")
        print(round(precision_score(gold_annotation, toloka_annotation, average="weighted"), 2))

# TO EXPERIMENT WITH EVERYTHING BY YOURSELF, GO HERE

In [None]:
openai.api_key = 'YOUR_KEY' # you have to provide your own key here

In [None]:
count = 0

In [None]:
chatGPT_settings = {
    'model': "gpt-3.5-turbo",
    'temperature': 0.9,
    'context_length': 1,
    'instruction': True,
    'masking': False,
    'explanation': False
}

In [None]:
count += 1
logs_real_time = False #True if you want to see lots of logs real-time
final_answers = []
prev_utterance_type = ''
# keys = ['1'] #ids of dialogs that you want to annotate
dialogs = make_dialog_dict('gold_standard.csv', sep=',', filetype='gold')
dialogs_shorter = dict(list(dialogs.items())[:12]) #12 dialogs only
# dialogs_shorter = {key: dialogs[key] for key in keys} #uncomment if you want to use ids
annotation_results = annotate_all_dialogs(dialogs_shorter)
annotation_results['annotation'] = annotation_results['annotation'].apply(process_text, function='get_full_label')
annotation_results['annotation'] = annotation_results['annotation'].apply(lambda x: x.strip())
annotation_results.to_csv(f'/content/drive/MyDrive/annotation_SIGDIAL/sf_annotation_{count}.tsv', sep='\t')
return_metrics(annotation_results, "gold_standard.json", type_gold='good', type_label='full', how='by_all')
return_metrics(annotation_results, "gold_standard.json", type_gold='good', type_label='cut', how='by_all')
return_metrics(annotation_results, "gold_standard.json", type_gold='excellent', type_label='full', how='by_all')
return_metrics(annotation_results, "gold_standard.json", type_gold='excellent', type_label='cut', how='by_all')