## Загружаем датасет

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import transformers
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    LlamaTokenizer,
    GenerationConfig
)
import huggingface_hub

answer_length = '7 words'

In [None]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test_half.csv')

In [None]:
import os
import tarfile
import urllib.request

# Define the URL of the dataset
url = "https://dl.fbaipublicfiles.com/parlai/empatheticdialogues/empatheticdialogues.tar.gz"
dataset_name = "empatheticdialogues.tar.gz"
dataset_dir = "empatheticdialogues"

# Download the dataset
urllib.request.urlretrieve(url, dataset_name)

# Extract the dataset
with tarfile.open(dataset_name, "r:gz") as tar:
    tar.extractall(path=dataset_dir)

print("Dataset downloaded and extracted.")

Dataset downloaded and extracted.


In [None]:
for column in ['utterance', 'prompt']:
  train[column] = [utterance.replace('_comma_', ',') for utterance in train[column]]
  test[column] = [utterance.replace('_comma_', ',') for utterance in test[column]]

Проверим, есть ли у нас в датасете такие случаи, что две разные реплики подряд внутри одного разговора принадлежат одному и тому же человеку. Оказывается, что такое действительно встречается единожды.

In [None]:
for i in range(test.shape[0] - 1):
  if test.iloc[i]['speaker_idx'] == test.iloc[i + 1]['speaker_idx'] and test.iloc[i]['conv_id'] == test.iloc[i + 1]['conv_id']:
    print(i)

2051


Посмотрев полностью на этот разговор, можно понять, что, скорее всего, в диалоге не хватает одной реплики между репликами "What was brutal?" и "Really? No shower for few days?". Добавим руками дополнительную строчку в датасете.

In [None]:
test.iloc[2051]['prompt']

'i didnt shower for a few days and had an important meeting. I could tell that they could smell me. I became a tomato'

In [None]:
test.iloc[2049:2055]

Unnamed: 0,conv_id,utterance_idx,context,prompt,speaker_idx,utterance,selfeval,tags
2049,hit:2700_conv:5401,4,ashamed,I wish I never did it now,408,Was it liquid ?,5|5|5_3|4|4,
2050,hit:2710_conv:5421,1,embarrassed,i didnt shower for a few days and had an impor...,70,it was brutal,3|4|5_5|5|5,
2051,hit:2710_conv:5421,2,embarrassed,i didnt shower for a few days and had an impor...,329,What was brutal?,3|4|5_5|5|5,
2052,hit:2710_conv:5421,4,embarrassed,i didnt shower for a few days and had an impor...,329,Really? No shower for few days?,3|4|5_5|5|5,
2053,hit:2710_conv:5421,5,embarrassed,i didnt shower for a few days and had an impor...,70,yes. homeless for a bit,3|4|5_5|5|5,
2054,hit:2710_conv:5421,6,embarrassed,i didnt shower for a few days and had an impor...,329,ok. I understand,3|4|5_5|5|5,


In [None]:
new_line = [['hit:2710_conv:5421', 3, 'embarrassed', 'i didnt shower for a few days and had an important meeting. I could tell that they could smell me. I became a tomato', 70, 'i didnt shower for a few days and had an important meeting', '3|4|5_5|5|5', None]]
new_line = pd.DataFrame(new_line)
new_line.columns = test.columns
test = pd.concat([test.iloc[:2052], new_line, test.iloc[2052:]])

В некоторых экспериментах нам понадобятся метки эмоций. Сразу подготовим эти данные. Для этого используем уже готовый классификатор, обученный на датасете GoEmotions и содержащий 28 эмоций.


На тестовом датасете метки нам понадобятся для того, чтобы передавать их в модель для генерации ответа, соответствующего текущей эмоции. На обучающем датасете эти метки понадобятся для передачи примеров ответов на разные эмоции пользователя.

In [None]:
def get_top3_emotions(data, min_proba=0.15):
  first_emo = []
  second_emo = []
  third_emo = []
  first_emo_score = []
  second_emo_score = []
  third_emo_score = []
  counter = 0
  for utterance in data['utterance']:
    if counter % 1000 == 0:
      print(f'Iteration {counter} done.')
    classification = emo_clf(utterance)[0]
    emotions = [dictt['label'] for dictt in classification]
    probs = [dictt['score'] for dictt in classification]
    if len(classification) > 0:
      first_emo.append(emotions[0])
      first_emo_score.append(probs[0])
      if len(classification) > 1 and round(probs[1], 2) >= min_proba:
        second_emo.append(emotions[1])
        second_emo_score.append(probs[1])
        if len(classification) > 2 and round(probs[2], 2) >= min_proba:
          third_emo.append(emotions[2])
          third_emo_score.append(probs[2])
        else:
          third_emo.append('')
          third_emo_score.append(0)
      else:
        second_emo.append('')
        third_emo.append('')
        second_emo_score.append(0)
        third_emo_score.append(0)
    else:
      first_emo.append('')
      second_emo.append('')
      third_emo.append('')
      first_emo_score.append(0)
      second_emo_score.append(0)
      third_emo_score.append(0)
    counter += 1
  return first_emo, second_emo, third_emo, first_emo_score, second_emo_score, third_emo_score

In [None]:
def get_top3_dialog_emotions(data, min_proba=0.15):
  prev_conv_id = data.iloc[0]['conv_id']
  speaker_id = data.iloc[0]['speaker_idx']
  first_emo = []
  first_emo_score = []
  second_emo = []
  second_emo_score = []
  third_emo = []
  third_emo_score = []
  cur_context = ''
  counter = 0
  for i in range(data.shape[0]):
    if counter % 1000 == 0:
      print(f'Iteration {counter} done.')
    if data.iloc[i]['conv_id'] != prev_conv_id:
      prev_conv_id = data.iloc[i]['conv_id']
      speaker_id = data.iloc[i]['speaker_idx']
      cur_context = ''
    if data.iloc[i]['speaker_idx'] != speaker_id:
      first_emo.append('')
      second_emo.append('')
      third_emo.append('')
      first_emo_score.append(0)
      second_emo_score.append(0)
      third_emo_score.append(0)
      continue
    cur_context = f'{cur_context} {data.iloc[i]["utterance"]}'
    classification = emo_clf(data.iloc[i]['utterance'])[0]
    emotions = [dictt['label'] for dictt in classification]
    probs = [dictt['score'] for dictt in classification]
    counter += 1
    if len(classification) > 0:
      first_emo.append(emotions[0])
      first_emo_score.append(probs[0])
      if len(classification) > 1 and round(probs[1], 2) >= min_proba:
        second_emo.append(emotions[1])
        second_emo_score.append(probs[1])
        if len(classification) > 2 and round(probs[2], 2) >= min_proba:
          third_emo.append(emotions[2])
          third_emo_score.append(probs[2])
        else:
          third_emo.append('')
          third_emo_score.append(0)
      else:
        second_emo.append('')
        third_emo.append('')
        second_emo_score.append(0)
        third_emo_score.append(0)
    else:
      first_emo.append('')
      second_emo.append('')
      third_emo.append('')
      first_emo_score.append(0)
      second_emo_score.append(0)
      third_emo_score.append(0)
  return first_emo, second_emo, third_emo, first_emo_score, second_emo_score, third_emo_score

In [None]:
from transformers import pipeline
import huggingface_hub

huggingface_hub.login()
emo_clf = pipeline("text-classification",
                   model="j-hartmann/emotion-english-distilroberta-base",
                   top_k=None,
                   max_length=512,
                   truncation=True,
                   device_map="auto")

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
first_emo, second_emo, third_emo, first_emo_score, second_emo_score, third_emo_score = get_top3_emotions(train)
train['utter_emo_1'] = first_emo
train['utter_emo_2'] = second_emo
train['utter_emo_3'] = third_emo
train['utter_emo_1_score'] = first_emo_score
train['utter_emo_2_score'] = second_emo_score
train['utter_emo_3_score'] = third_emo_score

Iteration 0 done.
Iteration 1000 done.
Iteration 2000 done.
Iteration 3000 done.
Iteration 4000 done.
Iteration 5000 done.
Iteration 6000 done.
Iteration 7000 done.
Iteration 8000 done.
Iteration 9000 done.
Iteration 10000 done.
Iteration 11000 done.
Iteration 12000 done.
Iteration 13000 done.
Iteration 14000 done.
Iteration 15000 done.
Iteration 16000 done.
Iteration 17000 done.
Iteration 18000 done.
Iteration 19000 done.
Iteration 20000 done.
Iteration 21000 done.
Iteration 22000 done.
Iteration 23000 done.
Iteration 24000 done.
Iteration 25000 done.
Iteration 26000 done.
Iteration 27000 done.
Iteration 28000 done.
Iteration 29000 done.
Iteration 30000 done.
Iteration 31000 done.
Iteration 32000 done.
Iteration 33000 done.
Iteration 34000 done.
Iteration 35000 done.
Iteration 36000 done.
Iteration 37000 done.
Iteration 38000 done.
Iteration 39000 done.
Iteration 40000 done.
Iteration 41000 done.
Iteration 42000 done.
Iteration 43000 done.
Iteration 44000 done.
Iteration 45000 done.
I

In [None]:
first_emo, second_emo, third_emo, first_emo_score, second_emo_score, third_emo_score = get_top3_emotions(test)
test['utter_emo_1'] = first_emo
test['utter_emo_2'] = second_emo
test['utter_emo_3'] = third_emo
test['utter_emo_1_score'] = first_emo_score
test['utter_emo_2_score'] = second_emo_score
test['utter_emo_3_score'] = third_emo_score

Iteration 0 done.
Iteration 1000 done.
Iteration 2000 done.


Также учтём, что в некоторых случаях контекст может влиять на эмоцию, которую испытывает пользователь в конкретной фразе. Попробуем также добавить определение эмоций на основе не только последней реплики пользователя, но и с учётом предыдущей истории диалога.

In [None]:
first_emo, second_emo, third_emo, first_emo_score, second_emo_score, third_emo_score = get_top3_dialog_emotions(train)
train['dialog_emo_1'] = first_emo
train['dialog_emo_2'] = second_emo
train['dialog_emo_3'] = third_emo
train['dialog_emo_1_score'] = first_emo_score
train['dialog_emo_2_score'] = second_emo_score
train['dialog_emo_3_score'] = third_emo_score

In [None]:
first_emo, second_emo, third_emo, first_emo_score, second_emo_score, third_emo_score = get_top3_dialog_emotions(test)
test['dialog_emo_1'] = first_emo
test['dialog_emo_2'] = second_emo
test['dialog_emo_3'] = third_emo
test['dialog_emo_1_score'] = first_emo_score
test['dialog_emo_2_score'] = second_emo_score
test['dialog_emo_3_score'] = third_emo_score

In [None]:
from google.colab import drive
drive.mount('/content/drive')
train_save_path = "/content/drive/MyDrive/train.csv"
test_save_path = "/content/drive/MyDrive/test.csv"
train.to_csv(train_save_path, index=False)
test.to_csv(test_save_path, index=False)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import random

remove_duplicates = test.drop_duplicates(subset="conv_id", keep="first")
chosen_convs = []
for emotion in remove_duplicates['context'].unique():
  cur_emotion_data = list(remove_duplicates[remove_duplicates['context'] == emotion]['conv_id'].unique())
  size = len(cur_emotion_data)
  conv_indices = random.sample(range(size), max(1, size // 2))
  chosen_convs.extend([cur_emotion_data[conv_ind] for conv_ind in conv_indices])
final_test_data = test[test['conv_id'].isin(chosen_convs)]

## Функции получения ответа от Mistral

In [None]:
!pip install --upgrade sympy

Collecting sympy
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Using cached sympy-1.14.0-py3-none-any.whl (6.3 MB)
Installing collected packages: sympy
  Attempting uninstall: sympy
    Found existing installation: sympy 1.13.1
    Uninstalling sympy-1.13.1:
      Successfully uninstalled sympy-1.13.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torch 2.6.0+cu124 requires sympy==1.13.1; python_version >= "3.9", but you have sympy 1.14.0 which is incompatible.[0m[31m
[0mSuccessfully installed sympy-1.14.0


In [None]:
device = "cuda"
mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3", torch_dtype="auto")
mistral = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3",
                                               torch_dtype="auto")
mistral.to(device)
mistral_tokenizer.pad_token = mistral_tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
def run_prompt_mistral(prompt):
    message = [{"role": "user", "content": prompt}]
    data = mistral_tokenizer.apply_chat_template(message, return_tensors='pt').to(device)
    output_ids = mistral.generate(
        data,
        max_new_tokens=250,
        no_repeat_ngram_size=3,
        temperature=0.7,
        top_k=5,
        do_sample=True,
        pad_token_id=mistral_tokenizer.eos_token_id
    )
    gen_only = output_ids[:, data.shape[1]:]
    text = mistral_tokenizer.decode(gen_only[0], skip_special_tokens=True).strip()
    return text

In [None]:
def run_prompt_mistral_list(messages):
    message = [[{"role": "user", "content": prompt}] for prompt in messages]
    data = mistral_tokenizer.apply_chat_template(
        message,
        padding=True,
        return_tensors='pt'
    ).to(device)
    output_ids = mistral.generate(
        data,
        max_new_tokens=250,
        no_repeat_ngram_size=3,
        temperature=0.7,
        top_k=5,
        do_sample=True,
        pad_token_id=mistral_tokenizer.eos_token_id
    )
    gen_only = output_ids[:, data.shape[1]:]
    decoded = mistral_tokenizer.batch_decode(gen_only, skip_special_tokens=True)
    del data, output_ids, gen_only
    torch.cuda.empty_cache()
    return [response.strip() for response in decoded]

In [None]:
def form_pairs_dataset(data):
  dialog_pairs = []
  conversation_ids = data['conv_id'].unique()
  for conv_id in conversation_ids:
    utterances = data[data['conv_id'] == conv_id]
    user_id = utterances.iloc[0]['speaker_idx']
    question_answer = {}
    for i in range(0, utterances.shape[0] - 1, 2):
      dialog_pairs.append((conv_id, utterances.iloc[i]['utterance'],
                           utterances.iloc[i + 1]['utterance'],
                           utterances.iloc[i]['context'], utterances.iloc[i]['utter_emo_1'],
                           utterances.iloc[i]['utter_emo_2'], utterances.iloc[i]['utter_emo_3'],
                           utterances.iloc[i]['utter_emo_1_score'], utterances.iloc[i]['utter_emo_2_score'],
                           utterances.iloc[i]['utter_emo_3_score'],
                           utterances.iloc[i]['dialog_emo_1'], utterances.iloc[i]['dialog_emo_2'],
                           utterances.iloc[i]['dialog_emo_3'], utterances.iloc[i]['dialog_emo_1_score'],
                           utterances.iloc[i]['dialog_emo_2_score'], utterances.iloc[i]['dialog_emo_3_score']))
  dialog_pairs = pd.DataFrame(dialog_pairs)
  dialog_pairs.columns = ['conv_id', 'user_utterance', 'answer', 'context',
                          'utter_emo_1', 'utter_emo_2', 'utter_emo_3', 'utter_emo_1_score', 'utter_emo_2_score',
                          'utter_emo_3_score', 'dialog_emo_1', 'dialog_emo_2', 'dialog_emo_3', 'dialog_emo_1_score',
                          'dialog_emo_2_score', 'dialog_emo_3_score']
  return dialog_pairs

In [None]:
dialog_pairs = form_pairs_dataset(test)

## Mistral

### Эксперимент 1. Baseline

Для бейзлайна просто прогоним модели на тестовом датасете с промптом, в котором попросим модель быть эмпатичной.

In [None]:
def base_prompt(prompt):
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  User"s message: {prompt}
  '''

In [None]:
generated_answers = []
counter = 0
batch_size = 16
for i in range(0, dialog_pairs.shape[0], batch_size):
  if counter % 128 == 0:
    print(f'Iteration {counter} finished.')
  utterances = list(dialog_pairs.iloc[i:min(dialog_pairs.shape[0], i + batch_size)]['user_utterance'])
  utterances = [base_prompt(prompt) for prompt in utterances]
  generated_answers.extend(run_prompt_mistral_list(utterances))
  counter += batch_size
dialog_pairs['mistral_base'] = generated_answers

Iteration 0 finished.
Iteration 128 finished.
Iteration 256 finished.
Iteration 384 finished.
Iteration 512 finished.
Iteration 640 finished.
Iteration 768 finished.
Iteration 896 finished.
Iteration 1024 finished.
Iteration 1152 finished.
Iteration 1280 finished.
Iteration 1408 finished.
Iteration 1536 finished.
Iteration 1664 finished.
Iteration 1792 finished.
Iteration 1920 finished.
Iteration 2048 finished.
Iteration 2176 finished.
Iteration 2304 finished.
Iteration 2432 finished.
Iteration 2560 finished.


### Эксперимент 2. Передача контекста вместе с очередной репликой.

In [None]:
def dialog_context_prompt(prompt, context):
  if context == '':
    return f'''
    Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
    Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
    empathetic: does the responses show understanding of the feelings of the person talking about their experience?
    relevant: does the responses seem appropriate to the conversation? Are they on-topic?
    fluent: is it possible to understand the responses? Does the language seem accurate?
    User"s message: {prompt}
    '''
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  User"s message: {prompt}
  Also consider that this utterance is not the first in the dialog. The previous messages of the user look like this: {context}
  '''

In [None]:
def generate_context_answers(generate):
  generated_answers = []
  counter = 0
  context = []
  batch_size = 16
  for conv_id in dialog_pairs['conv_id'].unique():
    dialog = dialog_pairs[dialog_pairs['conv_id'] == conv_id]
    cur_context = ''
    for utterance in dialog['user_utterance']:
      context.append(cur_context[-200:])
      cur_context = f'{cur_context}\n{utterance}'
  for i in range(0, dialog_pairs.shape[0], batch_size):
    if counter % 128 == 0:
      print(f'Iteration {counter} done.')
    utterances = list(dialog_pairs.iloc[i:min(dialog_pairs.shape[0], i + batch_size)]['user_utterance'])
    contexts = context[i:min(dialog_pairs.shape[0], i + batch_size)]
    prompts = [dialog_context_prompt(prompt, context) for prompt, context in zip(utterances, contexts)]
    generated_answers.extend(generate(prompts))
    counter += batch_size
  return generated_answers

In [None]:
generated_answers = generate_context_answers(run_prompt_mistral_list)
dialog_pairs['mistral_context'] = generated_answers

Iteration 0 done.
Iteration 128 done.
Iteration 256 done.
Iteration 384 done.
Iteration 512 done.
Iteration 640 done.
Iteration 768 done.
Iteration 896 done.
Iteration 1024 done.
Iteration 1152 done.
Iteration 1280 done.
Iteration 1408 done.
Iteration 1536 done.
Iteration 1664 done.
Iteration 1792 done.
Iteration 1920 done.
Iteration 2048 done.
Iteration 2176 done.
Iteration 2304 done.
Iteration 2432 done.
Iteration 2560 done.


### Эксперимент 3. Передача эмоции вместе с репликой.

#### Передача одной эмоции

In [None]:
def top1_emotion_prompt(emotion, prompt):
  if emotion == '':
    return f'''
    Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
    Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
    empathetic: does the responses show understanding of the feelings of the person talking about their experience?
    relevant: does the responses seem appropriate to the conversation? Are they on-topic?
    fluent: is it possible to understand the responses? Does the language seem accurate?
    User"s message: {prompt}
    '''
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  User"s message: {prompt}
  You also should consider that the user is experiencing this emotion: {emotion}. Keep that in mind while writing the answer.
  '''

In [None]:
def generate_top1_emotion_answers(generate):
  generated_answers = []
  counter = 0
  for conv_id in dialog_pairs['conv_id'].unique():
    dialog = dialog_pairs[dialog_pairs['conv_id'] == conv_id]
    for i in range(dialog.shape[0]):
      utterance = dialog.iloc[i]['user_utterance']
      emotion = dialog.iloc[i]['utter_emo_1']
      if counter % 250 == 0:
        print(f'Iteration {counter} finished.')
      generated_answers.append(generate(top1_emotion_prompt(emotion, utterance)))
      counter += 1
  return generated_answers

In [None]:
generated_answers = generate_top1_emotion_answers(run_prompt_mistral)
dialog_pairs_copy['mistral_emo1'] = generated_answers

Iteration 0 finished.
Iteration 250 finished.
Iteration 500 finished.
Iteration 750 finished.
Iteration 1000 finished.
Iteration 1250 finished.
Iteration 1500 finished.
Iteration 1750 finished.
Iteration 2000 finished.
Iteration 2250 finished.
Iteration 2500 finished.


#### Передача трёх эмоций (при наличии)

In [None]:
def top3_emotion_prompt(emotions, prompt):
  if len(emotions) == 0:
    return f'''
    Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
    Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
    empathetic: does the responses show understanding of the feelings of the person talking about their experience?
    relevant: does the responses seem appropriate to the conversation? Are they on-topic?
    fluent: is it possible to understand the responses? Does the language seem accurate?
    User"s message: {prompt}
    '''
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  User"s message: {prompt}
  You also should consider that the user is experiencing these emotions: {', '.join(emotions)}. Keep that in mind while writing the answer.
  The emotions are stated from the strongest one in the message to the least strong.
  '''

In [None]:
def generate_top3_emotion_answers(generate):
  generated_answers = []
  counter = 0
  for conv_id in dialog_pairs['conv_id'].unique():
    dialog = dialog_pairs[dialog_pairs['conv_id'] == conv_id]
    for i in range(dialog.shape[0]):
      utterance = dialog.iloc[i]['user_utterance']
      emo1 = dialog.iloc[i]['utter_emo_1']
      emo2 = dialog.iloc[i]['utter_emo_2']
      emo3 = dialog.iloc[i]['utter_emo_3']
      emotions = list(filter(lambda x: type(x) == type('') and len(x) != 0, [emo1, emo2, emo3]))
      if counter % 250 == 0:
        print(f'Iteration {counter} finished.')
      generated_answers.append(generate(top3_emotion_prompt(emotions, utterance)))
      counter += 1
  return generated_answers

In [None]:
generated_answers = generate_top3_emotion_answers(run_prompt_mistral)
dialog_pairs_copy['mistral_emo3'] = generated_answers

Iteration 0 finished.
Iteration 250 finished.
Iteration 500 finished.
Iteration 750 finished.
Iteration 1000 finished.
Iteration 1250 finished.
Iteration 1500 finished.
Iteration 1750 finished.
Iteration 2000 finished.
Iteration 2250 finished.
Iteration 2500 finished.


#### Передача эмоции не только последней реплики, но и всего контекста диалога

In [None]:
def generate_dialog_top1_emotion_answers(generate):
  generated_answers = []
  counter = 0
  for conv_id in dialog_pairs['conv_id'].unique():
    dialog = dialog_pairs[dialog_pairs['conv_id'] == conv_id]
    for i in range(dialog.shape[0]):
      utterance = dialog.iloc[i]['user_utterance']
      emotion = dialog.iloc[i]['dialog_emo_1']
      if counter % 250 == 0:
        print(f'Iteration {counter} finished.')
      generated_answers.append(generate(top1_emotion_prompt(emotion, utterance)))
      counter += 1
  return generated_answers

In [None]:
generated_answers = generate_dialog_top1_emotion_answers(run_prompt_mistral)
dialog_pairs_copy['mistral_dialog_emo1'] = generated_answers

Iteration 0 finished.
Iteration 250 finished.
Iteration 500 finished.
Iteration 750 finished.
Iteration 1000 finished.
Iteration 1250 finished.
Iteration 1500 finished.
Iteration 1750 finished.
Iteration 2000 finished.
Iteration 2250 finished.
Iteration 2500 finished.


#### Передача 3 эмоций всего контекста диалога

In [None]:
def generate_dialog_top3_emotion_answers(generate):
  generated_answers = []
  counter = 0
  for conv_id in dialog_pairs['conv_id'].unique():
    dialog = dialog_pairs[dialog_pairs['conv_id'] == conv_id]
    for i in range(dialog.shape[0]):
      utterance = dialog.iloc[i]['user_utterance']
      emo1 = dialog.iloc[i]['dialog_emo_1']
      emo2 = dialog.iloc[i]['dialog_emo_2']
      emo3 = dialog.iloc[i]['dialog_emo_3']
      if counter % 250 == 0:
        print(f'Iteration {counter} finished.')
      emotions = list(filter(lambda x: type(x) == type('') and len(x) != 0, [emo1, emo2, emo3]))
      generated_answers.append(generate(top3_emotion_prompt(emotions, utterance)))
      counter += 1
  return generated_answers

In [None]:
generated_answers = generate_dialog_top3_emotion_answers(run_prompt_mistral)
dialog_pairs_copy['mistral_dialog_emo3'] = generated_answers

Iteration 0 finished.
Iteration 250 finished.
Iteration 500 finished.
Iteration 750 finished.
Iteration 1000 finished.
Iteration 1250 finished.
Iteration 1500 finished.
Iteration 1750 finished.
Iteration 2000 finished.
Iteration 2250 finished.
Iteration 2500 finished.


### Эксперимент 4. Передача эмоции и примеров ответа на эту эмоцию вместе с репликой.

In [None]:
train_score_per_emotion1 = dict()
for emotion in train['utter_emo_1'].unique():
  print(emotion)
  scores = [train.iloc[k]['utter_emo_1_score'] if train.iloc[k]['utter_emo_1'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion1[emotion] = scores

joy
neutral
sadness
surprise
disgust
anger
fear


In [None]:
train_score_per_emotion2 = dict()
for emotion in train['utter_emo_2'].unique():
  print(emotion)
  scores = [train.iloc[k]['utter_emo_2_score'] if train.iloc[k]['utter_emo_2'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion2[emotion] = scores

nan
surprise
disgust
fear
neutral
sadness
anger
joy


In [None]:
train_score_per_emotion3 = dict()
for emotion in train['utter_emo_3'].unique():
  print(emotion)
  scores = [train.iloc[k]['utter_emo_3_score'] if train.iloc[k]['utter_emo_3'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion3[emotion] = scores

nan
anger
sadness
joy
neutral
fear
surprise
disgust


#### Передача одной эмоции

In [None]:
def top_emotion_few_shot_prompt(emotion, examples, prompt):
  if emotion == '':
    return base_prompt(prompt)
  if len(examples) == 0:
    return top1_emotion_prompt(emotion, prompt)
  examples_joined = '\n'.join(examples)
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  User"s message: {prompt}
  You also should consider that the user is experiencing this emotion: {emotion}. Keep that in mind while writing the answer.
  I will also provide you with some examples of the answers to the users that were experiencing the same emotion. You can make use of the structure and emotional vocabulary of these examples, however, the content of your answer must corresponding to the user"s message I mentioned earlier.
  Here are the examples:
  {examples_joined}
  '''

In [None]:
def generate_top1_emotion_few_shot_answers(generate):
  generated_answers = []
  counter = 0
  batch_size = 16
  for ind in range(0, dialog_pairs.shape[0], batch_size):
    prompts = []
    if counter % 128 == 0:
      print(f'Iteration {counter} finished.')
    for i in range(ind, min(ind + batch_size, dialog_pairs.shape[0])):
      utterance = dialog_pairs.iloc[i]['user_utterance']
      emotion = dialog_pairs.iloc[i]['utter_emo_1']
      score = dialog_pairs.iloc[i]['utter_emo_1_score']
      scores = train_score_per_emotion1[emotion]
      diffs = [abs(score - cur_score) for cur_score in scores]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      prompts.append(top_emotion_few_shot_prompt(emotion, list(train.iloc[final_indices]['utterance']), utterance))
    generated_answers.extend(generate(prompts))
    counter += batch_size
  return generated_answers

In [None]:
generated_answers = generate_top1_emotion_few_shot_answers(run_prompt_mistral_list)
dialog_pairs['mistral_few_shot_emo1'] = generated_answers

Iteration 0 finished.
Iteration 128 finished.
Iteration 256 finished.
Iteration 384 finished.
Iteration 512 finished.
Iteration 640 finished.
Iteration 768 finished.
Iteration 896 finished.
Iteration 1024 finished.
Iteration 1152 finished.
Iteration 1280 finished.
Iteration 1408 finished.
Iteration 1536 finished.
Iteration 1664 finished.
Iteration 1792 finished.
Iteration 1920 finished.
Iteration 2048 finished.
Iteration 2176 finished.
Iteration 2304 finished.
Iteration 2432 finished.
Iteration 2560 finished.


#### Передача трёх эмоций (при наличии)

In [None]:
def top3_emotion_prompt(emotions, examples, prompt):
  if len(emotions) == 0:
    return base_prompt(prompt)
  if len(examples) == 0:
    return top3_emotion_prompt(emotions, prompt)
  joined_examples = '\n'.join(examples)
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  User"s message: {prompt}
  You also should consider that the user is experiencing these emotions: {', '.join(emotions)}. Keep that in mind while writing the answer.
  I will also provide you with some examples of the answers to the users that were experiencing the same emotion. You can make use of the structure and emotional vocabulary of these examples, however, the content of your answer must corresponding to the user"s message I mentioned earlier.
  Here are the examples:
  {joined_examples}
  '''

In [None]:
def generate_top3_few_shot_emotion_answers(generate):
  generated_answers = []
  counter = 0
  batch_size = 16
  train_score_emotions = [train_score_per_emotion1, train_score_per_emotion2, train_score_per_emotion3]
  for ind in range(0, dialog_pairs.shape[0], batch_size):
    prompts = []
    if counter % 128 == 0:
      print(f'Iteration {counter} finished.')
    for i in range(ind, min(ind + batch_size, dialog_pairs.shape[0])):
      utterance = dialog_pairs.iloc[i]['user_utterance']
      emo1 = dialog_pairs.iloc[i]['utter_emo_1']
      emo2 = dialog_pairs.iloc[i]['utter_emo_2']
      emo3 = dialog_pairs.iloc[i]['utter_emo_3']
      emo1_score = dialog_pairs.iloc[i]['utter_emo_1_score']
      emo2_score = dialog_pairs.iloc[i]['utter_emo_2_score']
      emo3_score = dialog_pairs.iloc[i]['utter_emo_3_score']
      emos = [emo1, emo2, emo3]
      scores = [emo1_score, emo2_score, emo3_score]
      cur_emotions = list(filter(lambda x: type(x) == type('') and x != '', emos))
      diffs = [0 for _ in range(train.shape[0])]
      for emo_ind in range(3):
        if type(emos[emo_ind]) != type('') or emos[emo_ind] == '':
          break
        cur_scores = train_score_emotions[emo_ind][emos[emo_ind]]
        diffs = [diffs[k] + abs(scores[emo_ind] - cur_scores[k]) for k in range(len(cur_scores))]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      prompts.append(top3_emotion_prompt(cur_emotions, list(train.iloc[final_indices]['utterance']), utterance))
    generated_answers.extend(generate(prompts))
    counter += batch_size
  return generated_answers

In [None]:
generated_answers = generate_top3_few_shot_emotion_answers(run_prompt_mistral_list)
dialog_pairs['mistral_few_shot_emo3'] = generated_answers

Iteration 0 finished.
Iteration 128 finished.
Iteration 256 finished.
Iteration 384 finished.
Iteration 512 finished.
Iteration 640 finished.
Iteration 768 finished.
Iteration 896 finished.
Iteration 1024 finished.
Iteration 1152 finished.
Iteration 1280 finished.
Iteration 1408 finished.
Iteration 1536 finished.
Iteration 1664 finished.
Iteration 1792 finished.
Iteration 1920 finished.
Iteration 2048 finished.
Iteration 2176 finished.
Iteration 2304 finished.
Iteration 2432 finished.
Iteration 2560 finished.


#### Передача эмоции не только последней реплики, но и всего контекста диалога

In [None]:
train_score_per_emotion1_dialog = dict()
for emotion in train['dialog_emo_1'].unique():
  print(emotion)
  scores = [train.iloc[k]['dialog_emo_1_score'] if train.iloc[k]['dialog_emo_1'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion1_dialog[emotion] = scores

joy
nan
sadness
neutral
surprise
disgust
fear
anger


In [None]:
train_score_per_emotion2_dialog = dict()
for emotion in train['dialog_emo_2'].unique():
  print(emotion)
  scores = [train.iloc[k]['dialog_emo_2_score'] if train.iloc[k]['dialog_emo_2'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion2_dialog[emotion] = scores

nan
disgust
fear
neutral
surprise
sadness
anger
joy


In [None]:
train_score_per_emotion3_dialog = dict()
for emotion in train['dialog_emo_3'].unique():
  print(emotion)
  scores = [train.iloc[k]['dialog_emo_3_score'] if train.iloc[k]['dialog_emo_3'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion3_dialog[emotion] = scores

nan
sadness
neutral
fear
surprise
anger
disgust
joy


In [None]:
def generate_dialog_top1_few_shot_emotion_answers(generate):
  generated_answers = []
  counter = 0
  batch_size = 16
  for ind in range(0, dialog_pairs.shape[0], batch_size):
    prompts = []
    if counter % 128 == 0:
      print(f'Iteration {counter} finished.')
    for i in range(ind, min(ind + batch_size, dialog_pairs.shape[0])):
      utterance = dialog_pairs.iloc[i]['user_utterance']
      emotion = dialog_pairs.iloc[i]['dialog_emo_1']
      score = dialog_pairs.iloc[i]['dialog_emo_1_score']
      scores = train_score_per_emotion1_dialog[emotion]
      diffs = [abs(score - cur_score) for cur_score in scores]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      prompts.append(top_emotion_few_shot_prompt(emotion, list(train.iloc[final_indices]['utterance']), utterance))
    generated_answers.extend(generate(prompts))
    counter += batch_size
  return generated_answers

In [None]:
generated_answers = generate_dialog_top1_few_shot_emotion_answers(run_prompt_mistral_list)
dialog_pairs['mistral_few_shot_emo1_dialog'] = generated_answers

Iteration 0 finished.
Iteration 128 finished.
Iteration 256 finished.
Iteration 384 finished.
Iteration 512 finished.
Iteration 640 finished.
Iteration 768 finished.
Iteration 896 finished.
Iteration 1024 finished.
Iteration 1152 finished.
Iteration 1280 finished.
Iteration 1408 finished.
Iteration 1536 finished.
Iteration 1664 finished.
Iteration 1792 finished.
Iteration 1920 finished.
Iteration 2048 finished.
Iteration 2176 finished.
Iteration 2304 finished.
Iteration 2432 finished.
Iteration 2560 finished.


#### Передача 3 эмоций всего контекста диалога

In [None]:
def generate_dialog_top3_few_shot_emotion_answers(generate):
  generated_answers = []
  counter = 0
  batch_size = 16
  train_score_emotions = [train_score_per_emotion1_dialog, train_score_per_emotion2_dialog, train_score_per_emotion3_dialog]
  for ind in range(0, dialog_pairs.shape[0], batch_size):
    prompts = []
    if counter % 128 == 0:
      print(f'Iteration {counter} finished.')
    for i in range(ind, min(ind + batch_size, dialog_pairs.shape[0])):
      utterance = dialog_pairs.iloc[i]['user_utterance']
      emo1 = dialog_pairs.iloc[i]['dialog_emo_1']
      emo2 = dialog_pairs.iloc[i]['dialog_emo_2']
      emo3 = dialog_pairs.iloc[i]['dialog_emo_3']
      emo1_score = dialog_pairs.iloc[i]['dialog_emo_1_score']
      emo2_score = dialog_pairs.iloc[i]['dialog_emo_2_score']
      emo3_score = dialog_pairs.iloc[i]['dialog_emo_3_score']
      emos = [emo1, emo2, emo3]
      scores = [emo1_score, emo2_score, emo3_score]
      cur_emotions = list(filter(lambda x: type(x) == type('') and x != '', emos))
      diffs = [0 for _ in range(train.shape[0])]
      for emo_ind in range(3):
        if type(emos[emo_ind]) != type('') or emos[emo_ind] == '':
          break
        cur_scores = train_score_emotions[emo_ind][emos[emo_ind]]
        diffs = [diffs[k] + abs(scores[emo_ind] - cur_scores[k]) for k in range(len(cur_scores))]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      prompts.append(top3_emotion_prompt(cur_emotions, list(train.iloc[final_indices]['utterance']), utterance))
    generated_answers.extend(generate(prompts))
    counter += batch_size
  return generated_answers

In [None]:
generated_answers = generate_dialog_top3_few_shot_emotion_answers(run_prompt_mistral_list)
dialog_pairs['mistral_few_shot_emo3_dialog'] = generated_answers

Iteration 0 finished.
Iteration 128 finished.
Iteration 256 finished.
Iteration 384 finished.
Iteration 512 finished.
Iteration 640 finished.
Iteration 768 finished.
Iteration 896 finished.
Iteration 1024 finished.
Iteration 1152 finished.
Iteration 1280 finished.
Iteration 1408 finished.
Iteration 1536 finished.
Iteration 1664 finished.
Iteration 1792 finished.
Iteration 1920 finished.
Iteration 2048 finished.
Iteration 2176 finished.
Iteration 2304 finished.
Iteration 2432 finished.
Iteration 2560 finished.


### Эксперимент 5. Передача словаря эмоциональных слов, подходящих для текущей эмоции пользователя.

In [None]:
emotions = []
for column in ['utter_emo_1', 'utter_emo_2', 'utter_emo_3', 'dialog_emo_1', 'dialog_emo_2', 'dialog_emo_3']:
  emotions.extend(test[column].unique())
set(emotions)

{'anger', 'disgust', 'fear', 'joy', nan, 'neutral', 'sadness', 'surprise'}

In [None]:
emo_dict = {
"joy": [
        "awesome!", "that’s fantastic!", "wonderful", "so happy for you",
        "congratulations", "you must be thrilled", "that’s great news",
        "how exciting!", "love it", "celebrating with you", "brilliant",
        "that’s amazing", "what a delight", "you did it!", "woo-hoo!"
    ],
    "anger": [
        "I can see you’re frustrated", "that sounds infuriating",
        "I’d be upset too", "totally understand your anger",
        "that’s really annoying", "no wonder you’re mad",
        "I hear your frustration", "that’s unacceptable",
        "you have every right to feel that way", "I’m sorry that happened",
        "let’s figure this out", "we’ll sort this", "that must be enraging",
        "I get why you’re furious", "take a moment to breathe"
    ],
    "disgust": [
        "that’s gross", "how unpleasant", "yikes", "that’s nasty",
        "I’m sorry you had to see that", "that must feel revolting",
        "ugh, that’s awful", "that’s really off-putting", "I totally get the ick",
        "that sounds sickening", "so unpleasant", "no one should endure that",
        "eww", "that’s disturbing", "definitely unsettling"
    ],
    "fear": [
        "that sounds scary", "I can understand your worry",
        "it’s okay to feel afraid", "you’re not alone",
        "let’s take this one step at a time", "we’ll get through this",
        "I’m here with you", "your feelings are valid",
        "breathing helps—deep breaths", "stay safe", "we’ll figure it out",
        "it’s natural to be anxious", "you’ve got support",
        "I hear your concern", "let’s talk about what’s next"
    ],
    "neutral": [
        "I see", "got it", "makes sense", "understood",
        "thanks for sharing", "appreciate the info",
        "noted", "okay", "sure thing", "let me know",
        "sounds good", "I’ll keep that in mind", "right away",
        "all right", "absolutely"
    ],
    "sadness": [
        "I’m sorry you’re going through this", "that sounds really hard",
        "I’m here for you", "sending you kindness",
        "it’s okay to feel sad", "I understand your pain",
        "take your time", "you’re not alone", "that must hurt",
        "let’s talk about it", "your feelings matter",
        "I wish things were easier", "lean on me if you need",
        "I hear you", "holding space for you"
    ],
    "surprise": [
        "wow!", "no way!", "that’s unexpected", "oh my!",
        "really?", "that caught me off-guard", "that’s surprising",
        "I didn’t see that coming", "amazing twist", "what a shock",
        "that’s unbelievable", "whoa", "incredible!", "gosh!",
        "that’s quite something"
    ]
}

In [None]:
def get_emotional_words_prompt(phrases, prompt, emotion):
  if len(phrases) == 0:
    return base_prompt(prompt)
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  User"s message: {prompt}

  The user is experiencing the following emotion: {emotion}.
  Typically when answering this emotion you might use some of the following phrases, you may find them useful: {','.join(phrases)}.
  Do not write any additional comments, return only the generated empathetic, relevant and fluent answer.
  '''

In [None]:
def generate_emotional_phrases_answers(generate):
  generated_answers = []
  counter = 0
  batch_size = 16
  for i in range(0, dialog_pairs.shape[0], batch_size):
    if counter % 128 == 0:
      print(f'Iteration {counter} finished.')
    utterances = dialog_pairs.iloc[i:min(dialog_pairs.shape[0], i + batch_size)]['user_utterance'].tolist()
    emotions = dialog_pairs.iloc[i:min(dialog_pairs.shape[0], i + batch_size)]['utter_emo_1'].tolist()
    prompts = [get_emotional_words_prompt(emo_dict[emotion], utterance, emotion) for utterance, emotion in zip(utterances, emotions)]
    generated_answers.extend(generate(prompts))
    counter += batch_size
  return generated_answers

In [None]:
generated_answers = generate_emotional_phrases_answers(run_prompt_mistral_list)
dialog_pairs['mistral_emotional_words'] = generated_answers

Iteration 0 finished.
Iteration 128 finished.
Iteration 256 finished.
Iteration 384 finished.
Iteration 512 finished.
Iteration 640 finished.
Iteration 768 finished.
Iteration 896 finished.
Iteration 1024 finished.
Iteration 1152 finished.
Iteration 1280 finished.
Iteration 1408 finished.
Iteration 1536 finished.
Iteration 1664 finished.
Iteration 1792 finished.
Iteration 1920 finished.
Iteration 2048 finished.
Iteration 2176 finished.
Iteration 2304 finished.
Iteration 2432 finished.
Iteration 2560 finished.


### Эксперимент 6. Использование COMET.

In [None]:
def get_comet_prompt(statements, prompt):
  if len(statements) == 0:
    return base_prompt(prompt)
  joined_statements = '\n'.join(statements)
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  User"s message: {prompt}

  You also might find the following statements about the user"s message useful. Use them only in case you think it"ll be appropriate and helpful. Make sure that the answer is not too long and that it is natural.
  Remember that your priority is to make the answer as empathetic, fluent and relevant (as mentioned above) as possible.
  The statements:
  {joined_statements}
  '''

In [None]:
pipe = pipeline("text2text-generation", model="mismayil/comet-bart-ai2", device_map='auto')

config.json:   0%|          | 0.00/1.37k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
def generate_comet_answers(generate, generated_endings):
  generated_answers = []
  counter = 0
  cause_of_ending = ['User wants to', 'User might feel', 'User might feel it because they might be']
  endings = ['xWant', 'xEffect', 'xReact']
  ending_counter = 0
  for conv_id in dialog_pairs['conv_id'].unique():
    dialog = dialog_pairs[dialog_pairs['conv_id'] == conv_id]
    for utterance in dialog['user_utterance']:
      statements = []
      for i in range(len(cause_of_ending)):
        generated_ending = generated_endings[ending_counter + i]['generated_text'].strip().lower()
        if generated_ending == 'none':
          continue
        statements.append(f'{cause_of_ending[i]} {generated_ending}.')
      ending_counter += len(cause_of_ending)
      if counter % 100 == 0:
        print(f'Iteration {counter} finished.')
      generated_answers.append(generate(get_comet_prompt(statements, utterance)))
      counter += 1
  return generated_answers

In [None]:
prompts = []
cause_of_ending = ['User wants to', 'User might feel', 'User might feel it because they might be']
endings = ['xWant', 'xEffect', 'xReact']
for conv_id in dialog_pairs['conv_id'].unique():
    dialog = dialog_pairs[dialog_pairs['conv_id'] == conv_id]
    for utterance in dialog['user_utterance']:
      prompts.extend([f"{utterance} {ending}" for ending in endings])
generated_endings = pipe(
            prompts,
            max_new_tokens=32,
            num_return_sequences=1,
            num_beams=5
  )
print(generated_endings)

[{'generated_text': ' to make sure they are ok'}, {'generated_text': ' none'}, {'generated_text': ' scared'}, {'generated_text': ' to apologize to the person who hit me'}, {'generated_text': ' none'}, {'generated_text': ' remorseful'}, {'generated_text': ' to talk to someone else'}, {'generated_text': ' none'}, {'generated_text': ' happy'}, {'generated_text': ' to get out of the car'}, {'generated_text': ' none'}, {'generated_text': ' scared'}, {'generated_text': ' to find a new girlfriend'}, {'generated_text': ' none'}, {'generated_text': ' sad'}, {'generated_text': ' to get away from the world'}, {'generated_text': ' none'}, {'generated_text': ' happy'}, {'generated_text': ' to go to the concert'}, {'generated_text': ' go to the concert'}, {'generated_text': ' excited'}, {'generated_text': ' to go to the concert'}, {'generated_text': ' go to the concert'}, {'generated_text': ' excited'}, {'generated_text': ' to go to the doctor'}, {'generated_text': ' none'}, {'generated_text': ' sad

In [None]:
generated_answers = generate_comet_answers(run_prompt_mistral, generated_endings)
dialog_pairs['mistral_comet'] = generated_answers

Iteration 0 finished.
Iteration 100 finished.
Iteration 200 finished.
Iteration 300 finished.
Iteration 400 finished.
Iteration 500 finished.
Iteration 600 finished.
Iteration 700 finished.
Iteration 800 finished.
Iteration 900 finished.
Iteration 1000 finished.
Iteration 1100 finished.
Iteration 1200 finished.
Iteration 1300 finished.
Iteration 1400 finished.
Iteration 1500 finished.
Iteration 1600 finished.
Iteration 1700 finished.
Iteration 1800 finished.
Iteration 1900 finished.
Iteration 2000 finished.
Iteration 2100 finished.
Iteration 2200 finished.
Iteration 2300 finished.
Iteration 2400 finished.
Iteration 2500 finished.
Iteration 2600 finished.


### Эксперимент 7. Переформулирование первичного промпта с учётом эмоции, которую испытывает пользователь.

In [None]:
def reformulate_prompt(emotion, prompt):
  if emotion == '':
    return base_prompt(prompt)
  return f'''
  Your task is to reformulate the user's message so that the emotion they are experiencing is more clearly expressed in the text.
  Make it more expressive. In your answer write just the reformulated text.
  The emotion: {emotion}.
  The message: {prompt}
  '''

In [None]:
def generate_reformulated_emotion_answers(generate):
  generated_answers = []
  counter = 0
  batch_size = 16
  for i in range(0, dialog_pairs.shape[0], batch_size):
    if counter % 128 == 0:
      print(f'Iteration {counter} finished.')
    messages = [reformulate_prompt(dialog_pairs.iloc[ind]['utter_emo_1'], dialog_pairs.iloc[ind]['user_utterance']) for ind in range(i, min(i + batch_size, dialog_pairs.shape[0]))]
    reformulated = generate(messages)
    generated_answers.extend(generate([base_prompt(message) for message in reformulated]))
    counter += batch_size
  return generated_answers

In [None]:
generated_answers = generate_reformulated_emotion_answers(run_prompt_mistral_list)
dialog_pairs['mistral_reformulate'] = generated_answers

Iteration 0 finished.
Iteration 128 finished.
Iteration 256 finished.
Iteration 384 finished.
Iteration 512 finished.
Iteration 640 finished.
Iteration 768 finished.
Iteration 896 finished.
Iteration 1024 finished.
Iteration 1152 finished.
Iteration 1280 finished.
Iteration 1408 finished.
Iteration 1536 finished.
Iteration 1664 finished.
Iteration 1792 finished.
Iteration 1920 finished.
Iteration 2048 finished.
Iteration 2176 finished.
Iteration 2304 finished.
Iteration 2432 finished.
Iteration 2560 finished.


### Эксперимент 8. Концентрация внимания на эмоционально окрашенных словах и словах, важных для понимания контекста.

In [None]:
vad = pd.read_csv("NRC-VAD-Lexicon-v2.1.txt", sep="\t")
min_val = -1
max_val = 1
diff = max_val - min_val
columns = ['valence', 'arousal', 'dominance']
for column in columns:
  vad[column] = [(value - min_val) / diff for value in vad[column]]
vad['emo_score'] = [(vad.iloc[i]['valence'] - 0.5) ** 2 + (vad.iloc[i]['arousal'] / 2) ** 2 for i in range(vad.shape[0])]
min_emo = min(vad['emo_score'])
max_emo = max(vad['emo_score'])
diff = max_emo - min_emo
vad['emo_score'] = [(score - min_emo) / diff for score in vad['emo_score']]
min(vad['emo_score']), max(vad['emo_score'])

In [None]:
import nltk
from nltk.stem import WordNetLemmatizer
nltk.download("wordnet")
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package wordnet to /root/nltk_data...


In [None]:
def emotional_words_prompt(prompt):
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  Some words are marked with [BOLD] [/BOLD] tags. Pay special attention to them as they empathesize the emotional state of the user.
  User"s message: {prompt}
  '''

In [None]:
def generate_emotional_words_answers(generate, min_emo=0.6):
  generated_answers = []
  counter = 0
  batch_size = 16
  for k in range(0, dialog_pairs.shape[0], batch_size):
    if counter % 128 == 0:
      print(f'Iteration {counter} finished.')
    prompts = []
    for ind in range(k, min(dialog_pairs.shape[0], k + batch_size)):
      utterance = dialog_pairs.iloc[ind]['user_utterance']
      new_utterance = ''
      cur_word = ''
      alpha = 'qwertyuiopasdfghjklzxcvbnm'
      i = 0
      while i < len(utterance):
        if utterance[i] in alpha:
          cur_word += utterance[i]
        else:
          vad_term = vad[vad['term'] == cur_word]
          if vad_term.shape[0] == 0:
            emo_score = 0
          else:
            emo_score = vad_term.iloc[0]['emo_score']
          if utterance[i] != ' ':
            while i < len(utterance) and utterance[i] != ' ':
              cur_word += utterance[i]
              i += 1
          if emo_score >= 0.7:
            new_utterance = f'{new_utterance}[BOLD]{cur_word}[/BOLD] '
          else:
            new_utterance = f'{new_utterance}{cur_word} '
          cur_word = ''
        i += 1
      prompts.append(emotional_words_prompt(new_utterance))
    generated_answers.extend(generate(prompts))
    counter += batch_size
  return generated_answers

In [None]:
generated_answers = generate_emotional_words_answers(run_prompt_mistral_list)
dialog_pairs['mistral_emo_words'] = generated_answers

Iteration 0 finished.
Iteration 128 finished.
Iteration 256 finished.
Iteration 384 finished.
Iteration 512 finished.
Iteration 640 finished.
Iteration 768 finished.
Iteration 896 finished.
Iteration 1024 finished.
Iteration 1152 finished.
Iteration 1280 finished.
Iteration 1408 finished.
Iteration 1536 finished.
Iteration 1664 finished.
Iteration 1792 finished.
Iteration 1920 finished.
Iteration 2048 finished.
Iteration 2176 finished.
Iteration 2304 finished.
Iteration 2432 finished.
Iteration 2560 finished.


### Эксперимент 9. Повторное улучшение полученного первичного ответа самой же моделью.

In [None]:
def improve_asnwer_prompt(answer, message):
  return f'''
  Your task now is to improve the following answer so that it is more expressive, empathetic, fluent and relevant to the user"s message.
  Here is a more detailed description of each characteristic:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?

  User"s message: {message}.
  The answer to improve, according to the above-mentioned characteristics: {answer}.

  Do not provide any additional comments: return just the improved answer.
  '''

In [None]:
def generate_improved_emotion_answers(generate):
  generated_answers = [[], []]
  counter = 0
  batch_size = 16
  for i in range(0, dialog_pairs.shape[0], batch_size):
    if counter % 128 == 0:
      print(f'Iteration {counter} finished.')
    utterances = []
    for ind in range(i, min(dialog_pairs.shape[0], i + batch_size)):
      utterances.append(dialog_pairs.iloc[ind]['user_utterance'])
    answers = generate([base_prompt(utterance) for utterance in utterances])
    for i in range(2):
      answers = generate([improve_asnwer_prompt(answers[j], utterances[j]) for j in range(len(utterances))])
      generated_answers[i].extend(answers)
    counter += batch_size
  return generated_answers

In [None]:
generated_answers = generate_improved_emotion_answers(run_prompt_mistral_list)
dialog_pairs['mistral_self_improved1'] = generated_answers[0]
dialog_pairs['mistral_self_improved2'] = generated_answers[1]

Iteration 0 finished.
Iteration 128 finished.
Iteration 256 finished.
Iteration 384 finished.
Iteration 512 finished.
Iteration 640 finished.
Iteration 768 finished.
Iteration 896 finished.
Iteration 1024 finished.
Iteration 1152 finished.
Iteration 1280 finished.
Iteration 1408 finished.
Iteration 1536 finished.
Iteration 1664 finished.
Iteration 1792 finished.
Iteration 1920 finished.
Iteration 2048 finished.
Iteration 2176 finished.
Iteration 2304 finished.
Iteration 2432 finished.
Iteration 2560 finished.


## GPT

### Настройка

In [None]:
!pip install openai
import json
from openai import OpenAI
model_gpt = "gpt-4.1-nano-2025-04-14"
client = OpenAI(
  api_key=""
)

### Эксперимент 1. Baseline

Для бейзлайна просто прогоним модели на тестовом датасете с промптом, в котором попросим модель быть эмпатичной.

In [None]:
gpt_base_prompt = '''
Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
empathetic: does the responses show understanding of the feelings of the person talking about their experience?
relevant: does the responses seem appropriate to the conversation? Are they on-topic?
fluent: is it possible to understand the responses? Does the language seem accurate?
Do not give any additional comments, provide only the answer
'''

In [None]:
def get_new_batch_id(file_name):
  batch_input_file = client.files.create(
    file=open(file_name, "rb"),
    purpose="batch"
  )
  return client.batches.create(
      input_file_id=batch_input_file.id,
      endpoint="/v1/chat/completions",
      completion_window="24h"
  ).id

In [None]:
def add_column(prefix, batch_id, column_name):
  output_file_id = client.batches.retrieve(batch_id).output_file_id
  file_response = client.files.content(output_file_id).text
  responses = ['' for _ in range(dialog_pairs.shape[0])]

  for line in file_response.split('\n'):
    try:
      response = json.loads(line)
    except:
      print(line)
    line_index = int(response['custom_id'][len(prefix):])
    responses[line_index] = response['response']['body']['choices'][0]['message']['content']
  dialog_pairs[column_name] = responses

In [None]:
def generate_base_answers(file_name):
  prefix = 'base-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      sys_prompt = gpt_base_prompt
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = 'gpt_base.jsonl'
prefix = generate_base_answer(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_base')

### Эксперимент 2. Передача контекста вместе с очередной репликой.

In [None]:
def dialog_context_prompt(context):
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  Also consider that this utterance is not the first in the dialog. The previous messages of the user look like this: {context}
  '''

In [None]:
def experiment_context(file_name):
  prev_conv_id = dialog_pairs.iloc[0]['conv_id']
  context = ''
  prefix = 'context-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      if dialog_pairs.iloc[i]['conv_id'] != prev_conv_id:
        context = ''
        prev_conv_id = dialog_pairs.iloc[i]['conv_id']
      sys_prompt = dialog_context_prompt(context)
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      context = f'{context}. {user_prompt}'
      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_context.jsonl"
prefix = experiment_context(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_context')

### Эксперимент 3. Передача эмоции вместе с репликой.

#### Передача одной эмоции реплики

In [None]:
def gpt_emo1_prompt(emotion):
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  You also should consider that the user is experiencing this emotion: {emotion}. Do not give any additional comments, provide only the answer
  '''

In [None]:
def generate_emo1_batch(file_name):
  prefix = 'emo1-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      sys_prompt  = gpt_emo1_prompt(dialog_pairs.iloc[i]['utter_emo_1'])
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_emo1.jsonl"
prefix = generate_emo1_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_emo1')

#### Передача трёх эмоций фразы

In [None]:
def gpt_emo3_prompt(emotions):
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  You also should consider that the user is experiencing these emotions: {', '.join(emotions)}. Keep that in mind while writing the answer.
  The emotions are stated from the strongest one in the message to the least strong.
  '''

In [None]:
def generate_emo3_batch(file_name):
  prefix = 'emo3-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      emos = [dialog_pairs.iloc[i]['utter_emo_1'], dialog_pairs.iloc[i]['utter_emo_2'], dialog_pairs.iloc[i]['utter_emo_3']]
      emos = [e for e in emos if isinstance(e, str) and len(e) != 0]
      sys_prompt  = gpt_emo3_prompt(emos)
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url": "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user", "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

file-XwVe5DsHGdUmKAkM8oHtFa


In [None]:
file_name = "prompts_emo3.jsonl"
prefix = generate_emo3_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_emo3')




#### Передача одной эмоции всего диалога

In [None]:
def generate_dialog_emo1_batch(file_name):
  prefix = 'dialog-emo1-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      sys_prompt  = gpt_emo1_prompt(dialog_pairs.iloc[i]["dialog_emo_1"])
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url": "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user", "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

file-AnRUtDs2ZPJ3txh9YyX1Mi


In [None]:
file_name = "prompts_dialog_emo1.jsonl"
prefix = generate_dialog_emo1_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_dialog_emo1')

#### Передача трёх эмоций всего диалога

In [None]:
def generate_dialog_emo3_batch(file_name):
  prefix = 'dialog-emo3-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      emos = [dialog_pairs.iloc[i]['dialog_emo_1'], dialog_pairs.iloc[i]['dialog_emo_2'], dialog_pairs.iloc[i]['dialog_emo_3']]
      emos = [e for e in emos if isinstance(e, str) and len(e) != 0]
      sys_prompt  = gpt_emo3_prompt(emos)
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

file-2NtwsvvfEwZFLbJY35UMDH


In [None]:
file_name = "prompts_dialog_emo3.jsonl"
prefix = generate_dialog_emo3_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_dialog_emo3')

### Эксперимент 4. Передача эмоции и примеров ответа на эту эмоцию вместе с репликой.

#### Настройка

In [None]:
train_score_per_emotion1 = dict()
for emotion in train['utter_emo_1'].unique():
  print(emotion)
  scores = [train.iloc[k]['utter_emo_1_score'] if train.iloc[k]['utter_emo_1'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion1[emotion] = scores

joy
neutral
sadness
surprise
disgust
anger
fear


In [None]:
train_score_per_emotion2 = dict()
for emotion in train['utter_emo_2'].unique():
  print(emotion)
  scores = [train.iloc[k]['utter_emo_2_score'] if train.iloc[k]['utter_emo_2'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion2[emotion] = scores

nan
surprise
disgust
fear
neutral
sadness
anger
joy


In [None]:
train_score_per_emotion3 = dict()
for emotion in train['utter_emo_3'].unique():
  print(emotion)
  scores = [train.iloc[k]['utter_emo_3_score'] if train.iloc[k]['utter_emo_3'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion3[emotion] = scores

nan
anger
sadness
joy
neutral
fear
surprise
disgust


In [None]:
train_score_per_emotion1_dialog = dict()
for emotion in train['dialog_emo_1'].unique():
  print(emotion)
  scores = [train.iloc[k]['dialog_emo_1_score'] if train.iloc[k]['dialog_emo_1'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion1_dialog[emotion] = scores

joy
nan
sadness
neutral
surprise
disgust
fear
anger


In [None]:
train_score_per_emotion2_dialog = dict()
for emotion in train['dialog_emo_2'].unique():
  print(emotion)
  scores = [train.iloc[k]['dialog_emo_2_score'] if train.iloc[k]['dialog_emo_2'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion2_dialog[emotion] = scores

nan
disgust
fear
neutral
surprise
sadness
anger
joy


In [None]:
train_score_per_emotion3_dialog = dict()
for emotion in train['dialog_emo_3'].unique():
  print(emotion)
  scores = [train.iloc[k]['dialog_emo_3_score'] if train.iloc[k]['dialog_emo_3'] == emotion and k != train.shape[0] - 1 and train.iloc[k]['conv_id'] == train.iloc[k + 1]['conv_id'] else -10 for k in range(train.shape[0])]
  train_score_per_emotion3_dialog[emotion] = scores

nan
sadness
neutral
fear
surprise
anger
disgust
joy


#### Передача одной эмоции и примеров

In [None]:
def top_emotion_few_shot_prompt(emotion, examples):
  joined_examples = '\n'.join(examples)
  return f'''Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  You also should consider that the user is experiencing this emotion: {emotion}. Keep that in mind while writing the answer.
  I will also provide you with some examples of the answers to the users that were experiencing the same emotion. You can make use of the structure and emotional vocabulary of these examples, however, the content of your answer must corresponding to the user"s message I mentioned earlier.
  Here are the examples: {joined_examples}
  '''

In [None]:
def top_emotions_few_shot_prompt(emotions, examples):
  joined_examples = '\n'.join(examples)
  return f'''Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  You also should consider that the user is experiencing this emotion: {','.join(emotions)}. Keep that in mind while writing the answer. The emotions are stated from the strongest one in the message to the least strong.
  I will also provide you with some examples of the answers to the users that were experiencing the same emotion. You can make use of the structure and emotional vocabulary of these examples, however, the content of your answer must corresponding to the user"s message I mentioned earlier.
  Here are the examples: {joined_examples}
  '''

In [None]:
# Эксперимент 4.1: передача одной эмоции пользователя вместе с запросом и примерами ответа
def generate_few_show_emo1_batch(file_name):
  prefix = 'few-shot-emo1-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      emotion = dialog_pairs.iloc[i]['utter_emo_1']
      score = dialog_pairs.iloc[i]['utter_emo_1_score']
      scores = train_score_per_emotion1[emotion]
      diffs = [abs(score - cur_score) for cur_score in scores]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      sys_prompt  = top_emotion_few_shot_prompt(dialog_pairs.iloc[i]['utter_emo_1'], list(train.iloc[final_indices]['utterance']))
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_few_shot_emo1.jsonl"
prefix = generate_few_show_emo1_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_few_shot_emo1')

#### Передача трёх эмоций и примеров

In [None]:
# Эксперимент 4.2: передача одной эмоции пользователя вместе с примерами ответа
train_score_emotions = [train_score_per_emotion1, train_score_per_emotion2, train_score_per_emotion3]
def generate_few_show_emo3_batch(file_name):
  prefix = 'few-shot-emo3-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      emo1 = dialog_pairs.iloc[i]['utter_emo_1']
      emo2 = dialog_pairs.iloc[i]['utter_emo_2']
      emo3 = dialog_pairs.iloc[i]['utter_emo_3']
      emo1_score = dialog_pairs.iloc[i]['utter_emo_1_score']
      emo2_score = dialog_pairs.iloc[i]['utter_emo_2_score']
      emo3_score = dialog_pairs.iloc[i]['utter_emo_3_score']
      emos = [emo1, emo2, emo3]
      scores = [emo1_score, emo2_score, emo3_score]
      cur_emotions = list(filter(lambda x: type(x) == type('') and x != '', emos))
      diffs = [0 for _ in range(train.shape[0])]
      for emo_ind in range(3):
        if type(emos[emo_ind]) != type('') or emos[emo_ind] == '':
          break
        cur_scores = train_score_emotions[emo_ind][emos[emo_ind]]
        diffs = [diffs[k] + abs(scores[emo_ind] - cur_scores[k]) for k in range(len(cur_scores))]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      sys_prompt  = top_emotions_few_shot_prompt(cur_emotions, list(train.iloc[final_indices]['utterance']))
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_few_shot_emo3.jsonl"
prefix = generate_few_show_emo3_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_few_shot_emo3')

#### Передача одной эмоции всего диалога и примеров

In [None]:
def generate_few_shot_dialog_emo1_batch(file_name):
  prefix = 'few-shot-dialog-emo1-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      emotion = dialog_pairs.iloc[i]['dialog_emo_1']
      score = dialog_pairs.iloc[i]['dialog_emo_1_score']
      scores = train_score_per_emotion1_dialog[emotion]
      diffs = [abs(score - cur_score) for cur_score in scores]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      sys_prompt  = top_emotion_few_shot_prompt(dialog_pairs.iloc[i]['dialog_emo_1'], list(train.iloc[final_indices]['utterance']))
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user", "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_few_shot_dialog_emo1.jsonl"
prefix = generate_few_shot_dialog_emo1_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_few_shot_dialog_emo1')

#### Передача трёх эмоций всего диалога и примеров

In [None]:
train_score_emotions = [train_score_per_emotion1_dialog, train_score_per_emotion2_dialog, train_score_per_emotion3_dialog]
def generate_few_shot_dialog_emo3_batch(file_name):
  prefix = 'few-shot-dialog-emo3-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      emo1 = dialog_pairs.iloc[i]['dialog_emo_1']
      emo2 = dialog_pairs.iloc[i]['dialog_emo_2']
      emo3 = dialog_pairs.iloc[i]['dialog_emo_3']
      emo1_score = dialog_pairs.iloc[i]['dialog_emo_1_score']
      emo2_score = dialog_pairs.iloc[i]['dialog_emo_2_score']
      emo3_score = dialog_pairs.iloc[i]['dialog_emo_3_score']
      emos = [emo1, emo2, emo3]
      scores = [emo1_score, emo2_score, emo3_score]
      cur_emotions = list(filter(lambda x: type(x) == type('') and x != '', emos))
      diffs = [0 for _ in range(train.shape[0])]
      for emo_ind in range(3):
        if type(emos[emo_ind]) != type('') or emos[emo_ind] == '':
          break
        cur_scores = train_score_emotions[emo_ind][emos[emo_ind]]
        diffs = [diffs[k] + abs(scores[emo_ind] - cur_scores[k]) for k in range(len(cur_scores))]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      sys_prompt  = top_emotions_few_shot_prompt(cur_emotions, list(train.iloc[final_indices]['utterance']))
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_dialog_emo3_few.jsonl"
prefix = generate_few_shot_dialog_emo1_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_few_shot_dialog_emo3')

'batch_68275d2eb7d4819091d482319172c24a'

### Эксперимент 5. Передача словаря эмоциональных слов, подходящих для текущей эмоции пользователя.

In [None]:
def get_emotional_words_prompt(phrases, emotion):
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  The user is experiencing the following emotion: {emotion}.
  Typically when answering this emotion you might use some of the following phrases, you may find them useful: {','.join(phrases)}.
  Do not write any additional comments, return only the generated empathetic, relevant and fluent answer.
  '''

In [None]:
def generate_emo_phrases_batch(file_name):
  prefix = 'phrases-emo-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      emotion = dialog_pairs.iloc[i]['utter_emo_1']
      sys_prompt = get_emotional_words_prompt(emo_dict[emotion], dialog_pairs.iloc[i]['utter_emo_1'])
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_emo_phrases.jsonl"
prefix = generate_emo_phrases_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_emo_phrases')

### Эксперимент 6. Использование COMET.

In [None]:
def generate_comet_batch(file_name):
  prefix = 'comet-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      non_blank_endings = []
      for ind in range(len(cause_of_ending)):
        generated_ending = generated_endings[endings_counter + ind].strip().lower()
        if generated_ending == 'none':
          continue
        non_blank_endings.append(f'{cause_of_ending[ind]} {generated_ending}.')
      sys_prompt  = get_comet_prompt(non_blank_endings)
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_comet.jsonl"
prefix = generate_comet_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_comet')

### Эксперимент 7. Переформулирование первичного промпта с учётом эмоции, которую испытывает пользователь.

In [None]:
def reformulate_prompt(emotion):
  return f'''
  Your task is to reformulate the user's message so that the emotion they are experiencing is more clearly expressed in the text.
  Make it more expressive. In your answer write just the reformulated text, no additional comments.
  The emotion: {emotion}
  '''

In [None]:
def generate_reformulate_batch(file_name):
  prefix = 'reformulate-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      sys_prompt  = reformulate_prompt(dialog_pairs.iloc[i]['utter_emo_1'])
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_reformulate.jsonl"
prefix = generate_reformulate_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_reform')

In [None]:
def generate_reform_result_batch(file_name):
  prefix = 'reformulate-res-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      sys_prompt  = gpt_base_prompt
      user_prompt = dialog_pairs.iloc[i]["reformulate"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_reform_result.jsonl"
prefix = generate_reform_result_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_reform_result')

'batch_68276b362e408190872c9794e8a1db59'

### Эксперимент 8. Концентрация внимания на эмоционально окрашенных словах и словах, важных для понимания контекста.

In [None]:
emo_highlight_prompt = f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  Some words are marked with [BOLD] [/BOLD] tags. Pay special attention to them as they empathesize the emotional state of the user.
'''

In [None]:
def generate_emo_highlight_batch(file_name):
  prefix = 'emo-highlight-'
  with open(file_name, "w", encoding="utf-8") as f:
    for ind in range(dialog_pairs.shape[0]):
      utterance = dialog_pairs.iloc[ind]['user_utterance']
      new_utterance = ''
      cur_word = ''
      alpha = 'qwertyuiopasdfghjklzxcvbnm'
      i = 0
      while i < len(utterance):
        if utterance[i] in alpha:
          cur_word += utterance[i]
        else:
          vad_term = vad[vad['term'] == cur_word]
          if vad_term.shape[0] == 0:
            emo_score = 0
          else:
            emo_score = vad_term.iloc[0]['emo_score']
          if utterance[i] != ' ':
            while i < len(utterance) and utterance[i] != ' ':
              cur_word += utterance[i]
              i += 1
          if emo_score >= 0.7:
            new_utterance = f'{new_utterance}[BOLD]{cur_word}[/BOLD] '
          else:
            new_utterance = f'{new_utterance}{cur_word} '
          cur_word = ''
        i += 1
      sys_prompt = emo_highlight_prompt
      user_prompt = new_utterance

      batch_obj = {
          "custom_id": f"{prefix}{ind}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [
                  {"role": "system", "content": sys_prompt},
                  {"role": "user",   "content": user_prompt}
                  ],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_emo_highlight.jsonl"
prefix = generate_emo_highlight_batch(file_name)
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_emo_highlight')

'batch_682771de79748190b493f400a2d0a0de'

### Эксперимент 9. Повторное улучшение полученного первичного ответа самой же моделью.

In [None]:
def improve_answer_prompt(answer):
  return f'''
  Your task now is to improve the following answer so that it is more expressive, empathetic, fluent and relevant to the user"s message, that you will get right after.
  Here is a more detailed description of each characteristic:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  The answer to improve, according to the above-mentioned characteristics: {answer}.
  Do not provide any additional comments: return just the improved answer.
  '''

In [None]:
def generate_impr_answer_batch(file_name, column_to_improve):
  prefix = 'impr-'
  with open(file_name, "w", encoding="utf-8") as f:
    for i in range(dialog_pairs.shape[0]):
      sys_prompt  = improve_answer_prompt(dialog_pairs.iloc[i][column_to_improve])
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]

      batch_obj = {
          "custom_id": f"{prefix}{i}",
          "method": "POST",
          "url":    "/v1/chat/completions",
          "body": {
              "model": model_gpt,
              "messages": [{"role": "system", "content": sys_prompt}],
              "max_tokens": 200
              }
          }
      f.write(json.dumps(batch_obj, ensure_ascii=False) + "\n")
  return prefix

In [None]:
file_name = "prompts_impr1.jsonl"
prefix = generate_impr_answer_batch(file_name, 'gpt_base')
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_impr1')

'batch_68277832f0788190acfa85737d4c8cb8'

In [None]:
file_name = "prompts_impr2.jsonl"
prefix = generate_impr_answer_batch(file_name, 'gpt_impr1')
batch_id = get_new_batch_id(file_name)
add_column(prefix, batch_id, 'gpt_impr2')

Batch(id='batch_68277832f0788190acfa85737d4c8cb8', completion_window='24h', created_at=1747417138, endpoint='/v1/chat/completions', input_file_id='file-7eQNRS73UUyjEfGSPSBrB3', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1747418476, error_file_id=None, errors=None, expired_at=None, expires_at=1747503538, failed_at=None, finalizing_at=1747418281, in_progress_at=1747417203, metadata=None, output_file_id='file-VzJNzVMo8rgB78VgeMcp3W', request_counts=BatchRequestCounts(completed=2602, failed=0, total=2602))

## Deepseek

In [None]:
!pip install openai
!pip install uvloop
import json
from openai import OpenAI
import asyncio, aiohttp, os, json, uvloop
BASE_URL = "https://api.deepseek.com/v1/chat/completions"
model_ds = 'deepseek-chat'
deepseek_client = OpenAI(api_key=API_KEY, base_url="https://api.deepseek.com")
headers = {"Authorization": f"Bearer {API_KEY}"}
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
sem = asyncio.Semaphore(20)

Collecting uvloop
  Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.0/4.0 MB[0m [31m68.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvloop
Successfully installed uvloop-0.21.0


### Эксперимент 1

In [None]:
async def get_response(session, body):
  while True:
    try:
      async with sem:
        async with session.post(BASE_URL, json=body, headers=headers) as r:
          r.raise_for_status()
          j = await r.json()
          return j["choices"][0]["message"]["content"]
    except Exception:
      await asyncio.sleep(0.2)

In [None]:
from tqdm.asyncio import tqdm_asyncio

async def get_all_base_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      sys_prompt = base_prompt
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_base_calls()
dialog_pairs['ds_base'] = responses

Processing requests: 100%|██████████| 2602/2602 [37:25<00:00,  1.16it/s]


### Эксперимент 2

In [None]:
def dialog_context_prompt(context):
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  Also consider that this utterance is not the first in the dialog. The previous messages of the user look like this: {context}
  '''

In [None]:
contexts = []
prev_conv_id = dialog_pairs.iloc[0]['conv_id']
context = ''
for i in range(dialog_pairs.shape[0]):
  contexts.append(context)
  if dialog_pairs.iloc[i]['conv_id'] != prev_conv_id:
    context = ''
    prev_conv_id = dialog_pairs.iloc[i]['conv_id']

In [None]:
dialog_pairs['dialog_context'] = contexts

In [None]:
# Эксперимент 2
async def get_all_context_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      sys_prompt = dialog_context_prompt(dialog_pairs.iloc[i]['dialog_context'])
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_context_calls()
dialog_pairs['ds_context'] = responses

Processing requests: 100%|██████████| 2602/2602 [11:58<00:00,  3.62it/s]


### Эксперимент 3

#### Передача одной эмоции реплики

In [None]:
def emo1_prompt(emotion):
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  You also should consider that the user is experiencing this emotion: {emotion}. Do not give any additional comments, provide only the answer
  '''

In [None]:
async def get_all_emo1_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      sys_prompt = emo1_prompt(dialog_pairs.iloc[i]['utter_emo_1'])
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_emo1_calls()
dialog_pairs['ds_emo1'] = responses

Processing requests: 100%|██████████| 2602/2602 [11:16<00:00,  3.85it/s]


#### Передача трёх эмоций реплики

In [None]:
def emo3_prompt(emotions):
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  You also should consider that the user is experiencing these emotions: {', '.join(emotions)}. Keep that in mind while writing the answer.
  The emotions are stated from the strongest one in the message to the least strong.
  '''

In [None]:
async def get_all_emo3_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      emos = [dialog_pairs.iloc[i]['utter_emo_1'], dialog_pairs.iloc[i]['utter_emo_2'], dialog_pairs.iloc[i]['utter_emo_3']]
      emos = [e for e in emos if isinstance(e, str) and len(e) != 0]
      sys_prompt = emo3_prompt(emos)
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_emo3_calls()
dialog_pairs['ds_emo3'] = responses

Processing requests: 100%|██████████| 2602/2602 [13:07<00:00,  3.30it/s]


#### Передача одной эмоции диалога

In [None]:
async def get_all_dialog_emo1_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      sys_prompt = emo1_prompt(dialog_pairs.iloc[i]['dialog_emo_1'])
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_dialog_emo1_calls()
dialog_pairs['ds_dialog_emo1'] = responses

Processing requests: 100%|██████████| 2602/2602 [11:38<00:00,  3.72it/s]


#### Передача трёх эмоций диалога

In [None]:
async def get_all_dialog_emo3_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      emos = [dialog_pairs.iloc[i]['dialog_emo_1'], dialog_pairs.iloc[i]['dialog_emo_2'], dialog_pairs.iloc[i]['dialog_emo_3']]
      emos = [e for e in emos if isinstance(e, str) and len(e) != 0]
      sys_prompt = emo3_prompt(emos)
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_dialog_emo3_calls()
dialog_pairs['ds_dialog_emo3'] = responses

Processing requests: 100%|██████████| 2602/2602 [13:00<00:00,  3.33it/s]


#### Эксперимент 4

In [None]:
def top_emotion_few_shot_prompt(emotion, examples):
  joined_examples = '\n'.join(examples)
  return f'''Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  You also should consider that the user is experiencing this emotion: {emotion}. Keep that in mind while writing the answer.
  I will also provide you with some examples of the answers to the users that were experiencing the same emotion. You can make use of the structure and emotional vocabulary of these examples, however, the content of your answer must corresponding to the user"s message I mentioned earlier.
  Here are the examples: {joined_examples}
  '''

In [None]:
def top_emotions_few_shot_prompt(emotions, examples):
  joined_examples = '\n'.join(examples)
  return f'''Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  You also should consider that the user is experiencing this emotion: {','.join(emotions)}. Keep that in mind while writing the answer. The emotions are stated from the strongest one in the message to the least strong.
  I will also provide you with some examples of the answers to the users that were experiencing the same emotion. You can make use of the structure and emotional vocabulary of these examples, however, the content of your answer must corresponding to the user"s message I mentioned earlier.
  Here are the examples: {joined_examples}
  '''

#### Передача одной эмоции реплики и примеров

In [None]:
# Эксперимент 4.1
async def get_all_emo1_few_shot_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      emotion = dialog_pairs.iloc[i]['utter_emo_1']
      score = dialog_pairs.iloc[i]['utter_emo_1_score']
      scores = train_score_per_emotion1[emotion]
      diffs = [abs(score - cur_score) for cur_score in scores]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      sys_prompt  = top_emotion_few_shot_prompt(dialog_pairs.iloc[i]['utter_emo_1'], list(train.iloc[final_indices]['utterance']))
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_emo1_few_shot_calls()
dialog_pairs['ds_emo1_few_shot'] = responses

Processing requests: 100%|██████████| 2602/2602 [10:45<00:00,  4.03it/s]


#### Передача трёх эмоций реплики и примеров

In [None]:
async def get_all_emo3_few_shot_calls():
  train_score_emotions = [train_score_per_emotion1, train_score_per_emotion2, train_score_per_emotion3]
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      emo1 = dialog_pairs.iloc[i]['utter_emo_1']
      emo2 = dialog_pairs.iloc[i]['utter_emo_2']
      emo3 = dialog_pairs.iloc[i]['utter_emo_3']
      emo1_score = dialog_pairs.iloc[i]['utter_emo_1_score']
      emo2_score = dialog_pairs.iloc[i]['utter_emo_2_score']
      emo3_score = dialog_pairs.iloc[i]['utter_emo_3_score']
      emos = [emo1, emo2, emo3]
      scores = [emo1_score, emo2_score, emo3_score]
      cur_emotions = list(filter(lambda x: type(x) == type('') and x != '', emos))
      diffs = [0 for _ in range(train.shape[0])]
      for emo_ind in range(3):
        if type(emos[emo_ind]) != type('') or emos[emo_ind] == '':
          break
        cur_scores = train_score_emotions[emo_ind][emos[emo_ind]]
        diffs = [diffs[k] + abs(scores[emo_ind] - cur_scores[k]) for k in range(len(cur_scores))]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      sys_prompt  = top_emotions_few_shot_prompt(cur_emotions, list(train.iloc[final_indices]['utterance']))
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_emo3_few_shot_calls()
dialog_pairs['ds_emo3_few_shot'] = responses

Processing requests: 100%|██████████| 2602/2602 [08:49<00:00,  4.91it/s]


#### Передача одной эмоции диалога и примеров

In [None]:
async def get_all_dialog_emo1_few_shot_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      emotion = dialog_pairs.iloc[i]['dialog_emo_1']
      score = dialog_pairs.iloc[i]['dialog_emo_1_score']
      scores = train_score_per_emotion1_dialog[emotion]
      diffs = [abs(score - cur_score) for cur_score in scores]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      sys_prompt  = top_emotion_few_shot_prompt(dialog_pairs.iloc[i]['dialog_emo_1'], list(train.iloc[final_indices]['utterance']))
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_dialog_emo1_few_shot_calls()
dialog_pairs['ds_dialog_emo1_few_shot'] = responses

Processing requests: 100%|██████████| 2602/2602 [11:32<00:00,  3.76it/s]


#### Передача трёх эмоций диалога и примеров

In [None]:
async def get_all_dialog_emo3_few_shot_calls():
  train_score_emotions = [train_score_per_emotion1_dialog, train_score_per_emotion2_dialog, train_score_per_emotion3_dialog]
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      emo1 = dialog_pairs.iloc[i]['dialog_emo_1']
      emo2 = dialog_pairs.iloc[i]['dialog_emo_2']
      emo3 = dialog_pairs.iloc[i]['dialog_emo_3']
      emo1_score = dialog_pairs.iloc[i]['dialog_emo_1_score']
      emo2_score = dialog_pairs.iloc[i]['dialog_emo_2_score']
      emo3_score = dialog_pairs.iloc[i]['dialog_emo_3_score']
      emos = [emo1, emo2, emo3]
      scores = [emo1_score, emo2_score, emo3_score]
      cur_emotions = list(filter(lambda x: type(x) == type('') and x != '', emos))
      diffs = [0 for _ in range(train.shape[0])]
      for emo_ind in range(3):
        if type(emos[emo_ind]) != type('') or emos[emo_ind] == '':
          break
        cur_scores = train_score_emotions[emo_ind][emos[emo_ind]]
        diffs = [diffs[k] + abs(scores[emo_ind] - cur_scores[k]) for k in range(len(cur_scores))]
      min5_indices = np.argpartition(np.array(diffs), 5)[:5]
      final_indices = [index + 1 for index in min5_indices]
      sys_prompt  = top_emotions_few_shot_prompt(cur_emotions, list(train.iloc[final_indices]['utterance']))
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_dialog_emo3_few_shot_calls()
dialog_pairs['ds_dialog_emo3_few_shot'] = responses

Processing requests: 100%|██████████| 2602/2602 [10:10<00:00,  4.27it/s]


#### Эксперимент 5

In [None]:
def get_emotional_words_prompt(phrases, emotion):
  return f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  The user is experiencing the following emotion: {emotion}.
  Typically when answering this emotion you might use some of the following phrases, you may find them useful: {','.join(phrases)}.
  Do not write any additional comments, return only the generated empathetic, relevant and fluent answer.
  '''

In [None]:
async def get_all_emo_phrases_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      emotion = dialog_pairs.iloc[i]['utter_emo_1']
      sys_prompt = get_emotional_words_prompt(emo_dict[emotion], dialog_pairs.iloc[i]['utter_emo_1'])
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_emo_phrases_calls()
dialog_pairs['ds_emo_phrases'] = responses

Processing requests: 100%|██████████| 2602/2602 [10:40<00:00,  4.06it/s]


#### Эксперимент 6

In [None]:
async def get_all_comet_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    endings_counter = 0
    for i in range(dialog_pairs.shape[0]):
      non_blank_endings = []
      for ind in range(len(cause_of_ending)):
        generated_ending = generated_endings[endings_counter + ind].strip().lower()
        if generated_ending == 'none':
          continue
        non_blank_endings.append(f'{cause_of_ending[ind]} {generated_ending}.')
      sys_prompt  = get_comet_prompt(non_blank_endings)
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      endings_counter += len(endings)
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_comet_calls()
dialog_pairs['ds_comet'] = responses

Processing requests: 100%|██████████| 2602/2602 [17:29<00:00,  2.48it/s]


#### Эксперимент 7

In [None]:
def reformulate_prompt(emotion):
  return f'''
  Your task is to reformulate the user's message so that the emotion they are experiencing is more clearly expressed in the text.
  Make it more expressive. In your answer write just the reformulated text, no additional comments.
  The emotion: {emotion}
  '''

In [None]:
async def get_all_reformulate_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      sys_prompt  = reformulate_prompt(dialog_pairs.iloc[i]['utter_emo_1'])
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
prompts_reformulated = await get_all_emo_phrases_calls()
dialog_pairs['ds_utter_reformulate'] = responses

Processing requests: 100%|██████████| 2602/2602 [10:47<00:00,  4.02it/s]


In [None]:
async def get_all_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      sys_prompt = base_prompt
      user_prompt = dialog_pairs.iloc[i]["ds_utter_reformulate"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_calls()
dialog_pairs['ds_reform_res'] = responses

Processing requests: 100%|██████████| 2602/2602 [17:56<00:00,  2.42it/s]


#### Эксперимент 8

In [None]:
import pandas as pd
vad = pd.read_csv("NRC-VAD-Lexicon-v2.1.txt", sep="\t")
min_val = -1
max_val = 1
diff = max_val - min_val
columns = ['valence', 'arousal', 'dominance']
for column in columns:
  vad[column] = [(value - min_val) / diff for value in vad[column]]
vad['emo_score'] = [(vad.iloc[i]['valence'] - 0.5) ** 2 + (vad.iloc[i]['arousal'] / 2) ** 2 for i in range(vad.shape[0])]
min_emo = min(vad['emo_score'])
max_emo = max(vad['emo_score'])
diff = max_emo - min_emo
vad['emo_score'] = [(score - min_emo) / diff for score in vad['emo_score']]

In [None]:
import nltk
from nltk.stem import WordNetLemmatizer
nltk.download("wordnet")
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package wordnet to /root/nltk_data...


In [None]:
emo_highlight_prompt = f'''
  Imagine that you are an empathetic listener that needs to answer to the user"s message. Your answer should be at least {answer_length} long.
  Your answer also should be empathetic, relevant and fluent. Here is the description what it means:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  Some words are marked with [BOLD] [/BOLD] tags. Pay special attention to them as they empathesize the emotional state of the user.
'''

In [None]:
bold_utterances = []
for utterance in dialog_pairs['user_utterance']:
  new_utterance = ''
  cur_word = ''
  alpha = 'qwertyuiopasdfghjklzxcvbnm'
  i = 0
  while i < len(utterance):
    if utterance[i] in alpha:
      cur_word += utterance[i]
    else:
      vad_term = vad[vad['term'] == cur_word]
      if vad_term.shape[0] == 0:
        emo_score = 0
      else:
        emo_score = vad_term.iloc[0]['emo_score']
      if utterance[i] != ' ':
        while i < len(utterance) and utterance[i] != ' ':
          cur_word += utterance[i]
          i += 1
      if emo_score >= 0.7:
        new_utterance = f'{new_utterance}[BOLD]{cur_word}[/BOLD] '
      else:
        new_utterance = f'{new_utterance}{cur_word} '
      cur_word = ''
    i += 1
  bold_utterances.append(new_utterance)

In [None]:
async def get_all_highlighted_words_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for ind in range(dialog_pairs.shape[0]):
      utterance = bold_utterances[ind]

      sys_prompt = emo_highlight_prompt
      user_prompt = new_utterance
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_highlighted_words_calls()
dialog_pairs['ds_highlighted_words'] = responses

Processing requests: 100%|██████████| 2602/2602 [17:46<00:00,  2.44it/s]


#### Эксперимент 9

In [None]:
def improve_answer_prompt(answer):
  return f'''
  Your task now is to improve the following answer so that it is more expressive, empathetic, fluent and relevant to the user"s message, that you will get right after.
  Here is a more detailed description of each characteristic:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  The answer to improve, according to the above-mentioned characteristics: {answer}.
  Do not provide any additional comments: return just the improved answer.
  '''

In [None]:
async def get_all_impr_calls(column_to_improve):
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      sys_prompt  = improve_answer_prompt(dialog_pairs.iloc[i][column_to_improve])
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_impr_calls('ds_base')
dialog_pairs['ds_impr1'] = responses
responses = await get_all_impr_calls('ds_impr1')
dialog_pairs['ds_impr2'] = responses

Processing requests: 100%|██████████| 2602/2602 [20:29<00:00,  2.12it/s]


## Combination

In [None]:
def create_improve_prompt(user_message, gpt_answer):
  return f'''
  Imagine that you are the person responding to the friend's message. You will get the friend's message and the first version of the asnwer to this message.
  Your task is to create an empathetic, relevant and fluent answer. When creating this answer, you should base it on the first version of the answer that I've provided you with. You can use some words and phrases from it that you consider appropriate.
  However, remember that your primary task is to create an empathetic, relevant and fluent answer
  Here is what I mean behind empathetic, relevant and fluent:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  User's message: {user_message}.
  First version of the answer: {gpt_answer}.
  Do not provide any additional comments: return just the improved answer.
  '''

In [None]:
def create_combine_answers_prompt(user_message, answ1, answ2):
  return f'''
  Imagine that you are the person responding to the close friend's message. You will get the friend's message and two answers to this message.
  Your task is to to combine them to create an empathetic, relevant and fluent answer. You can use some words and phrases from the answers that you consider appropriate. Your key aim is to make empathy as high as possible.
  However, remember that your primary task is to create an empathetic, relevant and fluent answer
  Here is what I mean behind empathetic, relevant and fluent:
  empathetic: does the responses show understanding of the feelings of the person talking about their experience?
  relevant: does the responses seem appropriate to the conversation? Are they on-topic?
  fluent: is it possible to understand the responses? Does the language seem accurate?
  User's message: {user_message}.
  First answers: {answ1}.
  Second answers: {answ2}.
  Do not provide any additional comments: return just the final answer.
  '''

In [None]:
# gpt_client = OpenAI(api_key=API_KEY, base_url="https://api.openai.com/v1/chat/completions")
BASE_URL_GPT = "https://api.openai.com/v1/chat/completions"
headers_gpt = {
    "Authorization": f"Bearer",
    "Content-Type":  "application/json",
    }
model_gpt = 'gpt-4.1-nano-2025-04-14'

In [None]:
async def get_response_gpt(session, body):
  while True:
    try:
      async with sem:
        async with session.post(gpt_client, json=body, headers=headers_gpt) as r:
          r.raise_for_status()
          j = await r.json()
          return j["choices"][0]["message"]["content"]
    except Exception:
      await asyncio.sleep(0.2)

In [None]:
async def get_all_combine_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      deepseek_answ = dialog_pairs.iloc[i]["deepseek_exp1"]
      gpt_answ = dialog_pairs.iloc[i]["gpt_exp2"]
      sys_prompt = create_combine_answers_prompt(user_prompt, deepseek_answ, gpt_answ)
      body = {
          "model": model_ds,
          "messages": [{"role": "system", "content": sys_prompt}],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_combine_calls()

Processing requests: 100%|██████████| 2602/2602 [17:36<00:00,  2.46it/s]


In [None]:
dialog_pairs['combine_by_ds'] = responses

In [None]:
async def get_all_gpt_improve_calls():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      base_answer = dialog_pairs.iloc[i]["gpt_exp2"]
      sys_prompt = create_improve_prompt(user_prompt, base_answer)
      body = {
          "model": model_ds,
          "messages": [
            {"role": "system", "content": sys_prompt},
          ],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_gpt_improve_calls()

Processing requests: 100%|██████████| 2602/2602 [14:43<00:00,  2.94it/s]


In [None]:
dialog_pairs['ds_additional_2'] = responses

In [None]:
async def get_all_combine_calls_gpt():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      deepseek_answ = dialog_pairs.iloc[i]["deepseek_exp1"]
      gpt_answ = dialog_pairs.iloc[i]["gpt_exp2"]
      sys_prompt = create_combine_answers_prompt(user_prompt, deepseek_answ, gpt_answ)
      body = {
          "model": model_gpt,
          "messages": [{"role": "system", "content": sys_prompt}],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response_gpt(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_combine_calls_gpt()

Processing requests: 100%|██████████| 2602/2602 [11:55<00:00,  3.64it/s]


In [None]:
dialog_pairs['combine_by_gpt'] = responses

In [None]:
async def get_all_improve_calls_deepseek_by_gpt():
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      user_prompt = dialog_pairs.iloc[i]["user_utterance"]
      deepseek_answ = dialog_pairs.iloc[i]['deepseek_exp1']
      sys_prompt = create_improve_prompt(user_prompt, deepseek_answ)
      body = {
          "model": model_gpt,
          "messages": [{"role": "system", "content": sys_prompt}],
          "max_tokens": 200
      }
      jobs.append(asyncio.create_task(get_response_gpt(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
responses = await get_all_improve_calls_deepseek_by_gpt()

Processing requests: 100%|██████████| 2602/2602 [06:55<00:00,  6.26it/s]


In [None]:
dialog_pairs['improve_ds_by_gpt'] = responses

## Метрики

In [None]:
!pip install evaluate
!pip install sacrebleu
!pip install bert_score

from evaluate import load

bleu = load("sacrebleu")
bertscore = load('bertscore')

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3
Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-3.1.1-py3-none-any.whl.metadata (8.6 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading builder script:   0%|          | 0.00/8.15k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]

In [None]:
!pip install --upgrade sympy

Collecting sympy
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading sympy-1.14.0-py3-none-any.whl (6.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m119.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sympy
  Attempting uninstall: sympy
    Found existing installation: sympy 1.13.1
    Uninstalling sympy-1.13.1:
      Successfully uninstalled sympy-1.13.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torch 2.6.0+cu124 requires sympy==1.13.1; python_version >= "3.9", but you have sympy 1.14.0 which is incompatible.[0m[31m
[0mSuccessfully installed sympy-1.14.0


In [None]:
from collections import Counter
from itertools import islice

def distinct_n(sentences, n=1):
    """
    sentences : list[str] | list[list[str]]
        Either raw strings (will be .split()) or pre‑tokenised lists.
    n : int
        n‑gram order (1, 2, …)
    Returns float in [0,1].
    """
    if not sentences:
        return 0.0

    # tokenise if needed
    sentences = [str(s).split() for s in sentences]

    total_ngrams = 0
    uniq_ngrams  = set()

    for tokens in sentences:
        tokens = list(tokens)
        if len(tokens) < n:
            continue
        total_ngrams += len(tokens) - n + 1
        for i in range(len(tokens) - n + 1):
            uniq_ngrams.add(tuple(tokens[i:i+n]))

    return len(uniq_ngrams) / total_ngrams if total_ngrams else 0.0

In [None]:
def format_judge_prompt(question, answer):
  return f'''
  Your task now is to evaluate the following answer by 3 parameters: empathy, relevance and fluency.
  You will be also provided with the message, which the answer was written for. Use it for evaluating empathy and relevance parameters.
  This parameters are characterised as follows:
  empathy: did the responses show understanding of the feelings of the person talking about their experience?
  relevance: did the responses seem appropriate to the conversation? Were they on-topic?
  fluency: could you understand the responses? Did the language seem accurate?
  Evaluate each of these parameter by providing the number from 1 to 5 inclusive. The format of your answer should be the following: 3 numbers (empathy, relevance and fluency correspondingly), separated by commas without spaces.
  Message: {question}.
  Answer: {answer}.
  '''

In [None]:
def get_metrics(test_answers, generated_answers, user_prompts):
  references = [[test_answer] for test_answer in test_answers]
  bleu_score = bleu.compute(predictions=generated_answers, references=references)
  bert_score = bertscore.compute(predictions=generated_answers,
                                references=test_answers,
                                lang="en",
                                device="cuda:0",)
  distinct_1 = distinct_n(list(generated_answers), 1)
  distinct_2 = distinct_n(list(generated_answers), 2)
  prompts_size = len(user_prompts)
  return bleu_score, bert_score["f1"], distinct_1, distinct_2

In [None]:
# gpt_client = OpenAI(api_key=API_KEY, base_url="https://api.openai.com/v1/chat/completions")
BASE_URL_GPT = "https://api.openai.com/v1/chat/completions"
headers_gpt = {
    "Authorization": f"Bearer",
    "Content-Type":  "application/json",
    }
model_gpt = 'gpt-4.1-nano-2025-04-14'

In [None]:
async def get_response_gpt(session, body):
  while True:
    try:
      async with sem:
        async with session.post(gpt_client, json=body, headers=headers_gpt) as r:
          r.raise_for_status()
          j = await r.json()
          return j["choices"][0]["message"]["content"]
    except Exception:
      await asyncio.sleep(0.2)

In [None]:
async def get_all_eval_calls(column_name):
  connector = aiohttp.TCPConnector(
      limit=20,
      limit_per_host=20,
      keepalive_timeout=30,
    )
  timeout = aiohttp.ClientTimeout(total=120)
  async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
    jobs = []
    for i in range(dialog_pairs.shape[0]):
      sys_prompt = format_judge_prompt(dialog_pairs.iloc[i]['user_utterance'], dialog_pairs.iloc[i][column_name])
      body = {
          "model": model_gpt,
          "messages": [{"role": "system", "content": sys_prompt}],
          "max_tokens": 100
      }
      jobs.append(asyncio.create_task(get_response_gpt(session, body)))
      if i % 10 == 0:
        await asyncio.sleep(0.1)
    return await tqdm_asyncio.gather(*jobs, desc="Processing requests")

In [None]:
columns = ['improve_ds_by_gpt', 'combine_by_gpt', 'ds_additional_2', 'combine_by_ds']
for model in ['mistral', 'gpt', 'deepseek']:
  for column in [f'{model}_exp1', f'{model}_exp2', f'{model}_exp3.1', f'{model}_exp3.2', f'{model}_exp3.3', f'{model}_exp3.4', f'{model}_exp4.1', f'{model}_exp4.2',
                f'{model}_exp4.3', f'{model}_exp4.4', f'{model}_exp5', f'{model}_exp6', f'{model}_exp7', f'{model}_exp8', f'{model}_exp9.1', f'{model}_exp9.2']:
    columns.append(column)

In [None]:
for column in columns:
    responses = await get_all_eval_calls(column)
    empathies = []
    relevances = []
    fluencies = []
    for response in responses:
      empathy, relevance, fluency = map(int, response.split(','))
      empathies.append(empathy)
      relevances.append(relevance)
      fluencies.append(fluency)

    dialog_pairs[f'{column}_emp'] = empathies
    dialog_pairs[f'{column}_rel'] = relevances
    dialog_pairs[f'{column}_flu'] = fluencies

In [None]:
metrics = dict()
for column in columns:
    cur_metrics = []
    cur_metrics.extend(get_metrics(dialog_pairs['answer'], dialog_pairs[column], dialog_pairs['user_utterance']))
    cur_metrics.extend([dialog_pairs[f'{column}_emp'].mean(), dialog_pairs[f'{column}_rel'].mean(), dialog_pairs[f'{column}_flu'].mean()])
    metrics[column] = cur_metrics

In [None]:
for key in metrics.keys():
  metrics[key][0] = metrics[key][0]['score']
  bertscore = metrics[key][1]
  metrics[key][1] = sum(bertscore) / len(bertscore)

In [None]:
metrics = pd.DataFrame(metrics).transpose()
metrics.columns = ['BLEU', 'BERTScore', 'Distinct1', 'Distinct2', 'Empathy', 'Relevance', 'Fluency']
metrics

Unnamed: 0,BLEU,BERTScore,Distinct1,Distinct2,Empathy,Relevance,Fluency
mistral_exp1,0.526482,0.850189,0.075071,0.329157,4.204842,4.521522,4.911606
mistral_exp2,0.475544,0.847856,0.072339,0.32869,4.094927,4.241353,4.823213
mistral_exp3.1,0.445116,0.852638,0.090985,0.364351,4.205995,4.505765,4.892775
mistral_exp3.2,0.471316,0.851873,0.085553,0.352552,4.222521,4.526134,4.907763
mistral_exp3.3,0.489192,0.852463,0.090909,0.364549,4.213682,4.509224,4.904689
mistral_exp3.4,0.445363,0.851931,0.085986,0.353452,4.225211,4.536126,4.902767
mistral_exp4.1,0.402039,0.846355,0.072072,0.34363,4.086856,4.207148,4.783628
mistral_exp4.2,0.378528,0.845104,0.070877,0.340356,4.098386,4.21445,4.781706
mistral_exp4.3,0.398472,0.848441,0.076133,0.350206,4.131822,4.27671,4.821676
mistral_exp4.4,0.424694,0.847764,0.074532,0.345519,4.129131,4.278248,4.814758


In [None]:
from google.colab import drive
drive.mount('/content/drive')
test_save_path = "/content/drive/MyDrive/models_responses_eval.csv"
dialog_pairs.to_csv(test_save_path, index=False)

Mounted at /content/drive


In [None]:
drive.mount('/content/drive')
test_save_path = "/content/drive/MyDrive/metrics.csv"
metrics.to_csv(test_save_path, index=False)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
