# Importing needed modules

In [85]:
from datasets import list_datasets, load_dataset
import openai
from IPython.display import display, Markdown
import pandas as pd
import re
from tqdm.auto import tqdm
tqdm.pandas()
import backoff

#from electra_classifier import *
%matplotlib inline 
%config InlineBackend.figure_format='retina'

In [91]:
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  # for exponential backoff

# Data preparation

## Importing data

In [2]:
empathetic_dialogue_dataset = load_dataset('empathetic_dialogues')  

Found cached dataset empathetic_dialogues (/home/ahmed_b/.cache/huggingface/datasets/empathetic_dialogues/default/0.1.0/09bbeed3882a67db98c73952fb3c1c9a85af83dc78f81454c2454382fd03f6cf)


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
train_df = empathetic_dialogue_dataset["train"].to_pandas()
val_df = empathetic_dialogue_dataset["validation"].to_pandas()
test_df = empathetic_dialogue_dataset["test"].to_pandas()
all_df = pd.concat([train_df, val_df, test_df])
sample_df = all_df.iloc[:100]

In [4]:
print(f"We have {len(train_df)} rows ({int(100*len(train_df)/len(all_df))}%) for training, {len(val_df)} rows ({int(100*len(val_df)/len(all_df))}%) for validation and {len(test_df)} rows ({int(100*len(test_df)/len(all_df))}%) for testing.")

We have 76673 rows (76%) for training, 12030 rows (12%) for validation and 10943 rows (10%) for testing.


In [5]:
df = train_df
#df = all_df
#df = sample_df
df.head()

Unnamed: 0,conv_id,utterance_idx,context,prompt,speaker_idx,utterance,selfeval,tags
0,hit:0_conv:1,1,sentimental,I remember going to the fireworks with my best...,1,I remember going to see the fireworks with my ...,5|5|5_2|2|5,
1,hit:0_conv:1,2,sentimental,I remember going to the fireworks with my best...,0,Was this a friend you were in love with_comma_...,5|5|5_2|2|5,
2,hit:0_conv:1,3,sentimental,I remember going to the fireworks with my best...,1,This was a best friend. I miss her.,5|5|5_2|2|5,
3,hit:0_conv:1,4,sentimental,I remember going to the fireworks with my best...,0,Where has she gone?,5|5|5_2|2|5,
4,hit:0_conv:1,5,sentimental,I remember going to the fireworks with my best...,1,We no longer talk.,5|5|5_2|2|5,


## Data Cleaning

Some rows contain utterances that are mixed or duplicated

In [6]:
full_text = ""
indexes_to_drop = []
for i in tqdm(range(len(df))):
    utterance = df.iloc[i].utterance
    if "|" in utterance[:-1]: # excluding the sentenses finishing with the character "|" 
        text_starting_from_2nd_row = utterance[utterance.index('\n')+1:]
        first_row = ','.join(df.loc[i].astype(str).tolist())
        full_text = full_text + "\n" + first_row + "\n"+ text_starting_from_2nd_row
        indexes_to_drop.append(i)
full_text = full_text[1: ] # Removing the initial "\n"
df = df.drop(index=indexes_to_drop)

  0%|          | 0/76673 [00:00<?, ?it/s]

In [7]:
print(f"We dropped {len(indexes_to_drop)} bad rows with bad utterance formatting. We now have {len(df)} clean rows.")

We dropped 51 bad rows with bad utterance formatting. We now have 76622 clean rows.


In [8]:
grouped_by_df = df.groupby(["conv_id", "context"])["utterance"].apply(list).to_frame().reset_index()
grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo..."
2,hit:10000_conv:20001,impressed,"[I really like the new paint job on my house.,..."
3,hit:10001_conv:20002,lonely,[I went to the skating rink all by myself toda...
4,hit:10002_conv:20004,ashamed,[I was walking on the road. I saw a beggar and...


In [9]:
indexes_with_only_1_utterance = grouped_by_df[grouped_by_df.utterance.apply(lambda x: len(x))<2].index
print(f"We have {len(indexes_with_only_1_utterance)} conversations with only 1 utterance. We're gonna drop those")

We have 13 conversations with only 1 utterance. We're gonna drop those


In [10]:
grouped_by_df = grouped_by_df.drop(indexes_with_only_1_utterance, axis=0)

We will remove the last utterance when there is an odd number of utterances in a conversation

In [11]:
def remove_last_odd_utternace(utterance):
    if len(utterance)%2:
        return utterance[:-1]
    else: 
        return utterance

In [12]:
grouped_by_df["utterance"] = grouped_by_df.utterance.apply(lambda x: remove_last_odd_utternace(x))

In [13]:
grouped_by_df

Unnamed: 0,conv_id,context,utterance
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo..."
2,hit:10000_conv:20001,impressed,"[I really like the new paint job on my house.,..."
3,hit:10001_conv:20002,lonely,[I went to the skating rink all by myself toda...
4,hit:10002_conv:20004,ashamed,[I was walking on the road. I saw a beggar and...
...,...,...,...
17788,hit:9999_conv:19999,apprehensive,[So I went skydiving for the first time the ot...
17789,hit:999_conv:1998,confident,[I believe I did rather well on my Law School ...
17790,hit:999_conv:1999,devastated,[I was shocked when Lebron left the cavs again...
17791,hit:99_conv:198,ashamed,"[I cheated on a test. I am very ashamed., oh n..."


In [14]:
def count_sentences(text):
    # Split the text into sentences using regular expressions
    sentences = re.split('[.!?]+', text)
    
    # Count the number of sentences
    num_sentences = len(sentences)
    
    # Return the result
    return num_sentences

import matplotlib.pyplot as plt
number_of_sentences_list = df.utterance.apply(lambda x: count_sentences(x)).to_list()
less_than_2_sentences_percentage = int(100*len([x for x in number_of_sentences_list if x <= 2]) /len(number_of_sentences_list))
print(f"More than {less_than_2_sentences_percentage}% of the uternaces contain 2 sentences or less.") 

number_of_sentences_less_than_10 = [x for x in number_of_sentences_list if x <= 10]
#plt.hist(number_of_sentences_less_than_10, bins=50)

More than 58% of the uternaces contain 2 sentences or less.


Most utterances are 2 sentences or less (58%).

In [15]:
def repair_sentence(sentence):
    # Replace _comma_ with ,
    sentence = re.sub(r'_comma_', ',', sentence)
    # Replace \' with '
    sentence = re.sub(r"\\'", "'", sentence)
    # Remove double quotes
    sentence = sentence.replace('"', '')
    return sentence

In [16]:
print(f"We have {len(grouped_by_df)} conversations in total")

We have 17780 conversations in total


# PAID ChatGPT API !!

But First, let's import the ELECTRA classifier

In [17]:
print("Preparing Electra Classifier...")
from electra_classifier import *

best_model_path = "electra_cls/lightning_logs/version_18/checkpoints/epoch=9-step=3629.ckpt"
emotion_categories = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring',
       'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval',
       'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief',
       'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization',
       'relief', 'remorse', 'sadness', 'surprise', 'neutral']

MODEL_NAME = "google/electra-base-discriminator"
tokenizer = ElectraTokenizer.from_pretrained(MODEL_NAME)

trained_model = EmotionClassifier.load_from_checkpoint(
    # trainer.checkpoint_callback.best_model_path,
    best_model_path,
    n_classes=len(emotion_categories)
)

def predict_emotion_and_probability(text):
    encoding = tokenizer(
          text,
          max_length=64, 
          truncation=True,
          padding="max_length",
          add_special_tokens=True,
          return_token_type_ids=False,
          return_attention_mask=True,
          return_tensors="pt"
      )
    outputs = trained_model(**encoding)
    probabilities = list(torch.softmax(outputs, dim=-1).detach().numpy().flatten())
    emotion_idx = torch.argmax(outputs, dim=-1).item() 
    predicted_emotion = emotion_categories[emotion_idx]

    emotion_categories_list = list(emotion_categories)
    probabilities, emotion_categories_list = zip(*sorted(zip(probabilities, emotion_categories_list)))
    probabilities = probabilities[::-1]
    emotion_categories_list = emotion_categories_list[::-1]
    emotion_probability = round(100*probabilities[0], 2)#:.2f

    return predicted_emotion, emotion_probability

print("Electra Classifier is ready!")

Preparing Electra Classifier...


Global seed set to 42
Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraClassifier: ['discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraClassifier from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraClassifier from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraClassifier were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias'

Electra Classifier is ready!


In [18]:
sample_text = "I do! I was so happy when I opened the box and that fat mofo jumped out!"
predict_emotion_and_probability(sample_text)

('joy', 98.35)

In [19]:
def show_formatted_choice(unformatted_text):
  display(Markdown(unformatted_text["choices"][0]["message"]["content"]))

## Calling the API

In [20]:
API_KEY = "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"
openai.api_key = API_KEY

## Testing with only 1 conversation

### Non-emotional version

In [108]:
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def call_chatgpt_no_emotion(utterances):
    # Prepare the messages to be sent to ChatGPT
    messages = []
    for utt_numb, utterance in enumerate(utterances[:-1]):
        if not utt_numb%2: 
            messages.append({"role": "user", "content": repair_sentence(utterance)})
        else:
            messages.append({"role": "assistant", "content": repair_sentence(utterance)})  
    messages.append({"role": "assistant", "content": "I'll reply with less than 2 sentences: "})
            
    # Calling ChatGPT to complete the conversation
    completion = openai.ChatCompletion.create(
      model="gpt-3.5-turbo", 
      messages=messages
    )
    return completion

In [111]:
def complete_with_non_emotional_chatgpt(utterances):
    return call_chatgpt_no_emotion(utterances)["choices"][0]["message"]["content"]

In [22]:
### EXAMPLE CONVERSATION
conversation = grouped_by_df.utterance.iloc[7]

In [23]:
display(conversation)

['My little cousin gifted me for my birthday',
 'Very sweet of him. Did he surprise you?',
 'Yeah definitely. it was very pleasant one at that. he earned that money while doing chores at home',
 'Nice! Hes a sweet kid. I bet that brought you joy as well.']

In [24]:
chatgpt_response_no_emotion = call_chatgpt_no_emotion(conversation)

In [25]:
show_formatted_choice(chatgpt_response_no_emotion)

That's so heartwarming to hear! It shows his dedication and thoughtfulness towards you.

In [26]:
non_emotional_emotion, non_emotional_probability = predict_emotion_and_probability(chatgpt_response_no_emotion["choices"][0]["message"]["content"])

In [27]:
print(f'The non-emotional version gives a response with the "{non_emotional_emotion}" ({non_emotional_probability}% confidence)')

The non-emotional version gives a response with the "admiration" (99.5% confidence)


### Emotional version

In [96]:
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def call_emotional_chatgpt(utterances):
    # Get emotion from last utterance
    emotion, probability = predict_emotion_and_probability(utterances[-2])
    
    # Prepare the messages to be sent to ChatGPT
    messages = []
    for utt_numb, utterance in enumerate(utterances[:-1]):
        if not utt_numb%2: 
            messages.append({"role": "user", "content": repair_sentence(utterance)})
        else:
            messages.append({"role": "assistant", "content": repair_sentence(utterance)})
    
    # Push ChatGPT to be brief and use the predicted emotion
    messages.append({"role": "assistant", "content": f"Looks like you're feeling {emotion}. I'll reply with less than 2 sentences: "})
            
    # Calling ChatGPT to complete the conversation
    completion = openai.ChatCompletion.create(
      model="gpt-3.5-turbo", 
      messages=messages
    )
    return emotion, probability, completion

In [87]:
def complete_with_emotional_chatgpt(utterances):
    emotion, probability, completion = call_emotional_chatgpt(utterances)
    
    return emotion, probability, completion["choices"][0]["message"]["content"]

In [29]:
user_emotion, user_probability, chatgpt_emotional_response = call_emotional_chatgpt(conversation)

In [30]:
display(conversation)

['My little cousin gifted me for my birthday',
 'Very sweet of him. Did he surprise you?',
 'Yeah definitely. it was very pleasant one at that. he earned that money while doing chores at home',
 'Nice! Hes a sweet kid. I bet that brought you joy as well.']

In [31]:
show_formatted_choice(chatgpt_emotional_response)

That's really heartwarming to hear. It shows how much effort and thought he put into getting you a special gift.

In [32]:
emotional_emotion, emotional_probability = predict_emotion_and_probability(chatgpt_emotional_response["choices"][0]["message"]["content"])

In [34]:
print(f'The emotional version detects the "{user_emotion}" emotion from the last utterance and gives a response with the "{emotional_emotion}" ({emotional_probability}% confidence)')

The emotional version detects the "approval" emotion from the last utterance and gives a response with the "caring" (94.41% confidence)


## Testing with only the first 10 conversations:

### Non-emotional version

In [81]:
sample_grouped_by_df = grouped_by_df[:10]
sample_grouped_by_df["chatgpt_response_no_emotion"] = sample_grouped_by_df["utterance"].progress_apply(lambda x: complete_with_non_emotional_chatgpt(x))

  0%|          | 0/10 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [82]:
sample_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,chatgpt_response_no_emotion
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...,I'm sorry to hear that. Perhaps you can reach ...
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo...",That's awesome to hear! Taking care of pet toa...
2,hit:10000_conv:20001,impressed,"[I really like the new paint job on my house.,...",That sounds like a cheerful and bright combina...
3,hit:10001_conv:20002,lonely,[I went to the skating rink all by myself toda...,I'm sorry to hear that. Perhaps next time you ...
4,hit:10002_conv:20004,ashamed,[I was walking on the road. I saw a beggar and...,It's okay to make mistakes. You can always mak...


Now analyzing the result..

In [84]:
sample_grouped_by_df["chatgpt_response_detail_no_emotion"] = sample_grouped_by_df.chatgpt_response_no_emotion.progress_apply(lambda x: predict_emotion_and_probability(x))

  0%|          | 0/10 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [85]:
sample_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,chatgpt_response_no_emotion,chatgpt_response_detail_no_emotion
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...,I'm sorry to hear that. Perhaps you can reach ...,"(caring, 65.56)"
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo...",That's awesome to hear! Taking care of pet toa...,"(admiration, 58.81)"
2,hit:10000_conv:20001,impressed,"[I really like the new paint job on my house.,...",That sounds like a cheerful and bright combina...,"(joy, 99.35)"
3,hit:10001_conv:20002,lonely,[I went to the skating rink all by myself toda...,I'm sorry to hear that. Perhaps next time you ...,"(caring, 50.28)"
4,hit:10002_conv:20004,ashamed,[I was walking on the road. I saw a beggar and...,It's okay to make mistakes. You can always mak...,"(approval, 48.57)"


In [88]:
sample_grouped_by_df.utterance.iloc[4]

["I was walking on the road. I saw a beggar and i didn't help him.",
 "Wow_comma_ that's kinda mean",
 'yeah i know. i was in a hurry and i am ashamed of myself!!',
 'You think he will be there next time so you can help him?']

In [89]:
sample_grouped_by_df.chatgpt_response_no_emotion.iloc[4]

"It's okay to make mistakes. You can always make up for it next time by showing kindness and helping others in need."

### Emotional version

In [95]:
#sample_grouped_by_df = grouped_by_df[:2]
sample_grouped_by_df["chatgpt_emotional_response"] = sample_grouped_by_df["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/2 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [96]:
sample_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,chatgpt_emotional_response
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...,"(neutral, 99.86, I'm sorry to hear that you no..."
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo...","(joy, 98.35, That's great to hear! Toads can m..."


In [99]:
sample_grouped_by_df.iloc[1].utterance

['My girlfriend got me a pet toad today!',
 'Do you like toads?',
 'I do! I was so happy when I opened the box and that fat mofo jumped out!',
 'That was nice of your girlfriend_comma_ do you love her?']

In [100]:
sample_grouped_by_df.iloc[1].chatgpt_emotional_response

('joy',
 98.35,
 "That's great to hear! Toads can make great pets as long as you take good care of them. Enjoy your new pet!")

Now analyzing the result..

In [102]:
sample_grouped_by_df["chatgpt_emotional_response_detail"] = sample_grouped_by_df.chatgpt_emotional_response.progress_apply(lambda x: predict_emotion_and_probability(x[2]))

  0%|          | 0/2 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [103]:
sample_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,chatgpt_emotional_response,chatgpt_emotional_response_detail
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...,"(neutral, 99.86, I'm sorry to hear that you no...","(sadness, 53.93)"
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo...","(joy, 98.35, That's great to hear! Toads can m...","(admiration, 98.22)"


## Testing with 10% - 90% split

### Splitting the data

In [36]:
num_rows = grouped_by_df.shape[0]
# calculate the index of the row to split on
split_idx = int(0.1 * num_rows)
# select the first 10% of rows using iloc
first_10_percent_grouped_by_df = grouped_by_df.iloc[:split_idx]
print(f"The first 10% contains {len(first_10_percent_grouped_by_df)} conversations.")
# select the remaining 90% of rows using iloc
last_90_precent_grouped_by_df = grouped_by_df.iloc[split_idx:]
print(f"The last 90% contains {len(last_90_precent_grouped_by_df)} conversations.")

The first 10% contains 1778 conversations.
The last 90% contains 16002 conversations.


#### Experiment: remove utterances finishing with "neutral" emotion: 

In [37]:
first_10_percent_grouped_by_df["utterance_2_emotion"] = first_10_percent_grouped_by_df["utterance"].progress_apply(lambda x: predict_emotion_and_probability(x[-2]))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [38]:
first_10_percent_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...,"(neutral, 99.86)"
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo...","(joy, 98.35)"
2,hit:10000_conv:20001,impressed,"[I really like the new paint job on my house.,...","(neutral, 91.12)"
3,hit:10001_conv:20002,lonely,[I went to the skating rink all by myself toda...,"(disappointment, 31.49)"
4,hit:10002_conv:20004,ashamed,[I was walking on the road. I saw a beggar and...,"(embarrassment, 99.49)"


In [39]:
first_10_percent_grouped_by_df[['utterance_minus_2_emotion', 'utterance_minus_2_emotion_conf']] = first_10_percent_grouped_by_df['utterance_2_emotion'].apply(lambda x: pd.Series(x))
first_10_percent_grouped_by_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...,"(neutral, 99.86)",neutral,99.86
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo...","(joy, 98.35)",joy,98.35
2,hit:10000_conv:20001,impressed,"[I really like the new paint job on my house.,...","(neutral, 91.12)",neutral,91.12
3,hit:10001_conv:20002,lonely,[I went to the skating rink all by myself toda...,"(disappointment, 31.49)",disappointment,31.49
4,hit:10002_conv:20004,ashamed,[I was walking on the road. I saw a beggar and...,"(embarrassment, 99.49)",embarrassment,99.49
...,...,...,...,...,...,...
1774,hit:11424_conv:22848,jealous,[When i was younger my parents could only affo...,"(neutral, 99.86)",neutral,99.86
1775,hit:11424_conv:22849,content,[After I bought my house I felt happy about my...,"(curiosity, 99.04)",curiosity,99.04
1776,hit:11425_conv:22850,disappointed,[They promoted Sally at my job. Im happy for h...,"(neutral, 99.69)",neutral,99.69
1777,hit:11425_conv:22851,ashamed,[I promised my boss I would go into work on my...,"(approval, 86.57)",approval,86.57


In [41]:
first_10_percent_grouped_by_df[first_10_percent_grouped_by_df.utterance_minus_2_emotion == "neutral"].utterance_minus_2_emotion_conf.describe()

count    417.000000
mean      87.731607
std       17.501684
min       22.990000
25%       83.140000
50%       97.030000
75%       99.520000
max       99.960000
Name: utterance_minus_2_emotion_conf, dtype: float64

In [42]:
first_10_percent_grouped_by_df.utterance_minus_2_emotion.value_counts()

neutral           417
approval          211
admiration        165
joy                95
gratitude          91
sadness            77
excitement         63
disappointment     61
love               56
disapproval        54
amusement          51
optimism           51
fear               46
realization        42
desire             37
curiosity          32
caring             32
surprise           31
nervousness        28
annoyance          24
anger              22
confusion          21
embarrassment      20
remorse            15
disgust            13
relief             12
pride               7
grief               4
Name: utterance_minus_2_emotion, dtype: int64

In [76]:
#first_10_percent_grouped_by_df.to_pickle("ChatEPT/first_10_percent_grouped_by_df")

### Working first on the first 10% of the data

In [59]:
first_10_percent_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...,"(neutral, 99.86)",neutral,99.86
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo...","(joy, 98.35)",joy,98.35
2,hit:10000_conv:20001,impressed,"[I really like the new paint job on my house.,...","(neutral, 91.12)",neutral,91.12
3,hit:10001_conv:20002,lonely,[I went to the skating rink all by myself toda...,"(disappointment, 31.49)",disappointment,31.49
4,hit:10002_conv:20004,ashamed,[I was walking on the road. I saw a beggar and...,"(embarrassment, 99.49)",embarrassment,99.49


#### Non-emotional version

In [70]:
utterances = first_10_percent_grouped_by_df.utterance.iloc[18]
utterances

['I know I will get my bonus this quarter.',
 "That's exciting! What do you plan to do with it?",
 'I plan to go on a vacation.',
 'Wonderful! Where are you going to go?']

In [71]:
complete_with_non_emotional_chatgpt(utterances)

'Great! Where do you plan to go?'

In [72]:
first_10_percent_grouped_by_df["chatgpt_response_no_emotion"] = first_10_percent_grouped_by_df["utterance"].progress_apply(lambda x: complete_with_non_emotional_chatgpt(x))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [73]:
first_10_percent_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf,chatgpt_response_no_emotion
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...,"(neutral, 99.86)",neutral,99.86,I'm sorry to hear that. It's tough losing a go...
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo...","(joy, 98.35)",joy,98.35,That's great to hear! Toads can make wonderful...
2,hit:10000_conv:20001,impressed,"[I really like the new paint job on my house.,...","(neutral, 91.12)",neutral,91.12,That sounds like a beautiful combination of co...
3,hit:10001_conv:20002,lonely,[I went to the skating rink all by myself toda...,"(disappointment, 31.49)",disappointment,31.49,I'm sorry to hear that. It's tough to feel lef...
4,hit:10002_conv:20004,ashamed,[I was walking on the road. I saw a beggar and...,"(embarrassment, 99.49)",embarrassment,99.49,It's never too late to help someone in need. N...


#### Emotional version

In [79]:
first_10_percent_grouped_by_df["chatgpt_emotional_response"] = first_10_percent_grouped_by_df["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [81]:
first_10_percent_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf,chatgpt_response_no_emotion,chatgpt_emotional_response
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...,"(neutral, 99.86)",neutral,99.86,I'm sorry to hear that. It's tough losing a go...,"(neutral, 99.86, I'm sorry to hear that. Losin..."
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo...","(joy, 98.35)",joy,98.35,That's great to hear! Toads can make wonderful...,"(joy, 98.35, That's great to hear! Toads can m..."
2,hit:10000_conv:20001,impressed,"[I really like the new paint job on my house.,...","(neutral, 91.12)",neutral,91.12,That sounds like a beautiful combination of co...,"(neutral, 91.12, Blue and yellow is a lovely c..."
3,hit:10001_conv:20002,lonely,[I went to the skating rink all by myself toda...,"(disappointment, 31.49)",disappointment,31.49,I'm sorry to hear that. It's tough to feel lef...,"(disappointment, 31.49, I'm sorry to hear that..."
4,hit:10002_conv:20004,ashamed,[I was walking on the road. I saw a beggar and...,"(embarrassment, 99.49)",embarrassment,99.49,It's never too late to help someone in need. N...,"(embarrassment, 99.49, That's completely under..."


In [82]:
first_10_percent_grouped_by_df.to_pickle("ChatEPT/first_10_percent_grouped_by_df")

### Working on the last 90% of the data

#### Experiment: remove utterances finishing with "neutral" emotion: 

In [43]:
last_90_precent_grouped_by_df["utterance_2_emotion"] = last_90_precent_grouped_by_df["utterance"].progress_apply(lambda x: predict_emotion_and_probability(x[-2]))

  0%|          | 0/16002 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [44]:
last_90_precent_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion
1779,hit:11426_conv:22853,disgusted,[I went into my shed to clean it and i found a...,"(neutral, 99.85)"
1780,hit:11427_conv:22854,apprehensive,[My sister asked me to go out for drinks with ...,"(annoyance, 94.36)"
1781,hit:11427_conv:22855,furious,[I missed my train yesterday. I was so mad at ...,"(disappointment, 71.76)"
1782,hit:11428_conv:22856,embarrassed,[Yesterday When I was coming from my office. I...,"(amusement, 92.57)"
1783,hit:11428_conv:22857,confident,"[I recently was in a Arcade game tournament. ,...","(approval, 27.76)"


In [45]:
last_90_precent_grouped_by_df[['utterance_minus_2_emotion', 'utterance_minus_2_emotion_conf']] = last_90_precent_grouped_by_df['utterance_2_emotion'].apply(lambda x: pd.Series(x))
last_90_precent_grouped_by_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf
1779,hit:11426_conv:22853,disgusted,[I went into my shed to clean it and i found a...,"(neutral, 99.85)",neutral,99.85
1780,hit:11427_conv:22854,apprehensive,[My sister asked me to go out for drinks with ...,"(annoyance, 94.36)",annoyance,94.36
1781,hit:11427_conv:22855,furious,[I missed my train yesterday. I was so mad at ...,"(disappointment, 71.76)",disappointment,71.76
1782,hit:11428_conv:22856,embarrassed,[Yesterday When I was coming from my office. I...,"(amusement, 92.57)",amusement,92.57
1783,hit:11428_conv:22857,confident,"[I recently was in a Arcade game tournament. ,...","(approval, 27.76)",approval,27.76


In [46]:
last_90_precent_grouped_by_df[last_90_precent_grouped_by_df.utterance_minus_2_emotion == "neutral"].utterance_minus_2_emotion_conf.describe()

count    4102.000000
mean       88.093774
std        17.041761
min        20.330000
25%        81.932500
50%        97.470000
75%        99.670000
max        99.970000
Name: utterance_minus_2_emotion_conf, dtype: float64

In [47]:
last_90_precent_grouped_by_df.utterance_minus_2_emotion.value_counts()

neutral           4102
approval          1791
admiration        1322
sadness            729
disappointment     715
joy                668
gratitude          663
excitement         587
disapproval        517
optimism           509
amusement          505
fear               465
love               389
annoyance          382
desire             353
curiosity          287
anger              278
confusion          261
surprise           253
realization        226
nervousness        219
caring             197
embarrassment      174
disgust            137
remorse            101
relief              87
grief               48
pride               37
Name: utterance_minus_2_emotion, dtype: int64

In [76]:
#first_10_percent_grouped_by_df.to_pickle("ChatEPT/first_10_percent_grouped_by_df")

In [49]:
last_90_precent_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf
1779,hit:11426_conv:22853,disgusted,[I went into my shed to clean it and i found a...,"(neutral, 99.85)",neutral,99.85
1780,hit:11427_conv:22854,apprehensive,[My sister asked me to go out for drinks with ...,"(annoyance, 94.36)",annoyance,94.36
1781,hit:11427_conv:22855,furious,[I missed my train yesterday. I was so mad at ...,"(disappointment, 71.76)",disappointment,71.76
1782,hit:11428_conv:22856,embarrassed,[Yesterday When I was coming from my office. I...,"(amusement, 92.57)",amusement,92.57
1783,hit:11428_conv:22857,confident,"[I recently was in a Arcade game tournament. ,...","(approval, 27.76)",approval,27.76


#### Splitting the last 90% into 9 batches of 10% each

In [95]:
batch_1 = last_90_precent_grouped_by_df.iloc[:split_idx]
batch_2 = last_90_precent_grouped_by_df.iloc[split_idx:2*split_idx]
batch_3 = last_90_precent_grouped_by_df.iloc[2*split_idx:3*split_idx]
batch_4 = last_90_precent_grouped_by_df.iloc[3*split_idx:4*split_idx]
batch_5 = last_90_precent_grouped_by_df.iloc[4*split_idx:5*split_idx]
batch_6 = last_90_precent_grouped_by_df.iloc[5*split_idx:6*split_idx]
batch_7 = last_90_precent_grouped_by_df.iloc[6*split_idx:7*split_idx]
batch_8 = last_90_precent_grouped_by_df.iloc[7*split_idx:8*split_idx]
batch_9 = last_90_precent_grouped_by_df.iloc[8*split_idx:]

#### Non-emotional version

In [109]:
utterances = last_90_precent_grouped_by_df.utterance.iloc[18]
utterances

['i just stepped on a hairball',
 "Like the kind a cat vomits or came off of a brush? I'm hoping for you it came from a brush",
 'nope it squeezed betwixt my toes',
 'Oh my goodness for a lack of better terms! I would be no good after that']

In [112]:
complete_with_non_emotional_chatgpt(utterances)

"That's not a pleasant experience, but try cleaning the affected area thoroughly to avoid any discomfort or infection."

In [113]:
last_90_precent_grouped_by_df["chatgpt_response_no_emotion"] = last_90_precent_grouped_by_df["utterance"].progress_apply(lambda x: complete_with_non_emotional_chatgpt(x))

  0%|          | 0/16002 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [114]:
last_90_precent_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf,chatgpt_response_no_emotion
1779,hit:11426_conv:22853,disgusted,[I went into my shed to clean it and i found a...,"(neutral, 99.85)",neutral,99.85,That was a responsible way to handle the situa...
1780,hit:11427_conv:22854,apprehensive,[My sister asked me to go out for drinks with ...,"(annoyance, 94.36)",annoyance,94.36,"That's okay, you have the right to decline. Yo..."
1781,hit:11427_conv:22855,furious,[I missed my train yesterday. I was so mad at ...,"(disappointment, 71.76)",disappointment,71.76,"That's frustrating. Hopefully, you were able t..."
1782,hit:11428_conv:22856,embarrassed,[Yesterday When I was coming from my office. I...,"(amusement, 92.57)",amusement,92.57,I'm sorry to hear that. It can be embarrassing...
1783,hit:11428_conv:22857,confident,"[I recently was in a Arcade game tournament. ,...","(approval, 27.76)",approval,27.76,That's still impressive! Congrats on your achi...


In [115]:
last_90_precent_grouped_by_df.to_pickle("ChatEPT/last_90_precent_grouped_by_df")

#### Emotional version

In [None]:
#last_90_precent_grouped_by_df["chatgpt_emotional_response"] = last_90_precent_grouped_by_df["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

##### Dispatching into 10 batches

In [97]:
batch_1["chatgpt_emotional_response"] = batch_1["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [98]:
batch_2["chatgpt_emotional_response"] = batch_2["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [99]:
batch_3["chatgpt_emotional_response"] = batch_3["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [100]:
batch_4["chatgpt_emotional_response"] = batch_4["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [101]:
batch_5["chatgpt_emotional_response"] = batch_5["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [102]:
batch_6["chatgpt_emotional_response"] = batch_6["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [103]:
batch_7["chatgpt_emotional_response"] = batch_7["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [104]:
batch_8["chatgpt_emotional_response"] = batch_8["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [105]:
batch_9["chatgpt_emotional_response"] = batch_9["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/1778 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [106]:
batch_1.to_pickle("ChatEPT/batch_1")
batch_1.to_pickle("ChatEPT/batch_1")
batch_1.to_pickle("ChatEPT/batch_1")

In [107]:
batch_2.to_pickle("ChatEPT/batch_2")
batch_3.to_pickle("ChatEPT/batch_3")
batch_4.to_pickle("ChatEPT/batch_4")
batch_5.to_pickle("ChatEPT/batch_5")
batch_6.to_pickle("ChatEPT/batch_6")
batch_7.to_pickle("ChatEPT/batch_7")
batch_8.to_pickle("ChatEPT/batch_8")
batch_9.to_pickle("ChatEPT/batch_9")

##### Merging into one dataframe

In [116]:
batch_1.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf,chatgpt_emotional_response
1779,hit:11426_conv:22853,disgusted,[I went into my shed to clean it and i found a...,"(neutral, 99.85)",neutral,99.85,"(neutral, 99.85, That was responsible of you t..."
1780,hit:11427_conv:22854,apprehensive,[My sister asked me to go out for drinks with ...,"(annoyance, 94.36)",annoyance,94.36,"(annoyance, 94.36, That's fine. It's important..."
1781,hit:11427_conv:22855,furious,[I missed my train yesterday. I was so mad at ...,"(disappointment, 71.76)",disappointment,71.76,"(disappointment, 71.76, That's understandable...."
1782,hit:11428_conv:22856,embarrassed,[Yesterday When I was coming from my office. I...,"(amusement, 92.57)",amusement,92.57,"(amusement, 92.57, I'm sorry to hear that. It ..."
1783,hit:11428_conv:22857,confident,"[I recently was in a Arcade game tournament. ,...","(approval, 27.76)",approval,27.76,"(approval, 27.76, That's awesome! Second place..."


In [117]:
batch_2.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf,chatgpt_emotional_response
3558,hit:1341_conv:2682,anxious,[I am so ready for the first Monday Night Foot...,"(love, 97.52)",love,97.52,"(love, 97.52, That's great! Who are you rootin..."
3559,hit:1341_conv:2683,devastated,[I drew an art piece once and it got torn afte...,"(neutral, 99.24)",neutral,99.24,"(neutral, 99.24, I'm sorry to hear that. It ca..."
3560,hit:1342_conv:2685,jealous,"[I wish I was a musician., I sadly have no mus...","(disapproval, 94.5)",disapproval,94.5,"(disapproval, 94.5, That's okay. It's never to..."
3561,hit:1343_conv:2686,anxious,[I often feel a lot of anxiety about making en...,"(approval, 53.42)",approval,53.42,"(approval, 53.42, That sounds like a good mott..."
3562,hit:1343_conv:2687,angry,[My former 'friend' took my car for a day or t...,"(surprise, 97.0)",surprise,97.0,"(surprise, 97.0, I can understand how shocked ..."


In [None]:
merged_last_90_percent_emotional_df = pd.concat([batch_1, batch_2, batch_3, batch_4, batch_5, batch_6, batch_7, batch_8, batch_9])

In [127]:
merged_last_90_percent_emotional_df.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf,chatgpt_emotional_response
1779,hit:11426_conv:22853,disgusted,[I went into my shed to clean it and i found a...,"(neutral, 99.85)",neutral,99.85,"(neutral, 99.85, That was responsible of you t..."
1780,hit:11427_conv:22854,apprehensive,[My sister asked me to go out for drinks with ...,"(annoyance, 94.36)",annoyance,94.36,"(annoyance, 94.36, That's fine. It's important..."
1781,hit:11427_conv:22855,furious,[I missed my train yesterday. I was so mad at ...,"(disappointment, 71.76)",disappointment,71.76,"(disappointment, 71.76, That's understandable...."
1782,hit:11428_conv:22856,embarrassed,[Yesterday When I was coming from my office. I...,"(amusement, 92.57)",amusement,92.57,"(amusement, 92.57, I'm sorry to hear that. It ..."
1783,hit:11428_conv:22857,confident,"[I recently was in a Arcade game tournament. ,...","(approval, 27.76)",approval,27.76,"(approval, 27.76, That's awesome! Second place..."


In [128]:
last_90_precent_grouped_by_df.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf,chatgpt_response_no_emotion
1779,hit:11426_conv:22853,disgusted,[I went into my shed to clean it and i found a...,"(neutral, 99.85)",neutral,99.85,That was a responsible way to handle the situa...
1780,hit:11427_conv:22854,apprehensive,[My sister asked me to go out for drinks with ...,"(annoyance, 94.36)",annoyance,94.36,"That's okay, you have the right to decline. Yo..."
1781,hit:11427_conv:22855,furious,[I missed my train yesterday. I was so mad at ...,"(disappointment, 71.76)",disappointment,71.76,"That's frustrating. Hopefully, you were able t..."
1782,hit:11428_conv:22856,embarrassed,[Yesterday When I was coming from my office. I...,"(amusement, 92.57)",amusement,92.57,I'm sorry to hear that. It can be embarrassing...
1783,hit:11428_conv:22857,confident,"[I recently was in a Arcade game tournament. ,...","(approval, 27.76)",approval,27.76,That's still impressive! Congrats on your achi...


In [139]:
merged_last_90_percent_all_df = pd.merge(merged_last_90_percent_emotional_df, last_90_precent_grouped_by_df[["conv_id", "chatgpt_response_no_emotion"]], on="conv_id")

In [140]:
merged_last_90_percent_all_df.head()

Unnamed: 0,conv_id,context,utterance,utterance_2_emotion,utterance_minus_2_emotion,utterance_minus_2_emotion_conf,chatgpt_emotional_response,chatgpt_response_no_emotion
0,hit:11426_conv:22853,disgusted,[I went into my shed to clean it and i found a...,"(neutral, 99.85)",neutral,99.85,"(neutral, 99.85, That was responsible of you t...",That was a responsible way to handle the situa...
1,hit:11427_conv:22854,apprehensive,[My sister asked me to go out for drinks with ...,"(annoyance, 94.36)",annoyance,94.36,"(annoyance, 94.36, That's fine. It's important...","That's okay, you have the right to decline. Yo..."
2,hit:11427_conv:22855,furious,[I missed my train yesterday. I was so mad at ...,"(disappointment, 71.76)",disappointment,71.76,"(disappointment, 71.76, That's understandable....","That's frustrating. Hopefully, you were able t..."
3,hit:11428_conv:22856,embarrassed,[Yesterday When I was coming from my office. I...,"(amusement, 92.57)",amusement,92.57,"(amusement, 92.57, I'm sorry to hear that. It ...",I'm sorry to hear that. It can be embarrassing...
4,hit:11428_conv:22857,confident,"[I recently was in a Arcade game tournament. ,...","(approval, 27.76)",approval,27.76,"(approval, 27.76, That's awesome! Second place...",That's still impressive! Congrats on your achi...


In [141]:
merged_last_90_percent_all_df.to_pickle("ChatEPT/merged_last_90_percent_all_df")

In [88]:
last_90_precent_grouped_by_df["chatgpt_emotional_response"] = last_90_precent_grouped_by_df["utterance"].progress_apply(lambda x: complete_with_emotional_chatgpt(x))

  0%|          | 0/16002 [00:00<?, ?it/s]

APIError: Gateway timeout. {"error":{"code":524,"message":"Gateway timeout.","param":null,"type":"cf_gateway_timeout"}} 524 {'error': {'code': 524, 'message': 'Gateway timeout.', 'param': None, 'type': 'cf_gateway_timeout'}} {'Date': 'Mon, 10 Apr 2023 22:09:08 GMT', 'Content-Type': 'application/json', 'Content-Length': '92', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '7b5e415a0ba9ca4f-YUL'}

##### Debugging 

It stopped in 

In [68]:
last_90_precent_grouped_by_df.utterance.iloc[2023]

['This woman approached me on Facebook Messenger about how she is seeing my boyfriend behind my back. They work together which is how they know each other. I approached my boyfriend about this but I believe him.',
 'What did he say? ',
 "That he didn't do anything like that.",
 'Always good to trust your spouse_comma_ but I would be on the fence a bit and have my eye out too. ']

In [None]:
last_90_precent_grouped_by_df.head()

In [82]:
last_90_precent_grouped_by_df.to_pickle("ChatEPT/last_10_percent_grouped_by_df")

In [None]:
ddf = pd.read_pickle("")

In [63]:
#utterances = grouped_by_df.utterance.iloc[15691]
utterances = grouped_by_df.utterance.iloc[10]
display(utterances)

['I am not able to pay for my mortgage. I think i will lose my home',
 "Oh wow_comma_ that's not good at all",
 'I know. i lost my job due to automation in the company.',
 'Yeah_comma_ that sounds no good_comma_ what will you do now?']

In [88]:
grouped_by_df

Unnamed: 0,conv_id,context,utterance
0,hit:0_conv:1,sentimental,[I remember going to see the fireworks with my...
1,hit:10000_conv:20000,surprised,"[My girlfriend got me a pet toad today!, Do yo..."
2,hit:10000_conv:20001,impressed,"[I really like the new paint job on my house.,..."
3,hit:10001_conv:20002,lonely,[I went to the skating rink all by myself toda...
4,hit:10002_conv:20004,ashamed,[I was walking on the road. I saw a beggar and...
...,...,...,...
17839,hit:9999_conv:19999,apprehensive,[So I went skydiving for the first time the ot...
17840,hit:999_conv:1998,confident,[I believe I did rather well on my Law School ...
17841,hit:999_conv:1999,devastated,[I was shocked when Lebron left the cavs again...
17842,hit:99_conv:198,ashamed,"[I cheated on a test. I am very ashamed., oh n..."


In [72]:
emotion

'sadness'

In [73]:
probability

85.77

In [54]:
predict_emotion_and_probability(ground_truth)

('admiration', 97.72)

In [56]:
predict_emotion_and_probability(chatgpt_response_string)

('admiration', 66.71)

In [125]:
predict_emotion_from_utterance("It's amazing how much we can learn from those who have experienced different walks of life than our own. I'm glad you had this eye-opening experience.")

'admiration'

In [126]:
predict_emotion_from_utterance("That's great to hear. Sometimes the people we set out to help end up teaching us more than we could have imagined.")

'admiration'

In [118]:
def find_row(sentence):
    for i in range(len(grouped_by_df)):
        utterances = grouped_by_df.utterance.iloc[i]
        if utterances[0].startswith(sentence):
            break
    print(i)

In [119]:
find_row("I visited an orphan")

6575


In [97]:
grouped_by_df.utterance.iloc[15691]

['Why does McDonalds always get my order wrong! I said no lettuce!',
 'Report to their management',
 "Sometimes I think the management don't even care!",
 'Just give it a trial']

# Preparing the Electra classifier

# Preparing the emotion engine

In [7]:
dd_emotion_choice_df = pd.read_pickle("dd_emotion_choice_df")
dd_emotion_choice_no_neutral_df = pd.read_pickle("dd_emotion_choice_no_neutral_df")

In [8]:
dd_emotion_choice = dd_emotion_choice_df.set_index('Initial emotion')["Mostly followed by"].to_dict()
dd_emotion_choice_no_neutral = dd_emotion_choice_no_neutral_df.set_index('Initial emotion')["Mostly followed by"].to_dict()
#display("Correspondance: ", dd_emotion_choice)
#print()
#display("If we exculde the neutral emotions, our chatbot becomes more emotional:", dd_emotion_choice_no_neutral)

In [58]:
def choose_response_emotion(utterance, emotion_dict):
    predicted_emotion = predict_emotion_from_utterance(utterance)
    respone_emotion = emotion_dict[predicted_emotion]
    return response_emotion

In [70]:
def prepare_beginning_of_response(utterance, emotion_dict):
    response_emotion = choose_response_emotion(utterance, emotion_dict)
    if response_emotion != "neutral":
        return f"My response's gonna be brief and sound full of {response_emotion}:"
    else: 
        return f"My response's gonna be brief and sound {response_emotion}:"

In [82]:
def test_emotion_choice(sample_utterance):
    predicted_emotion = predict_emotion_from_utterance(sample_utterance)
    response_emotion = dd_emotion_choice[predicted_emotion]
    print(f'The predicted emotion is: "{predicted_emotion}"\nand the response should have the "{response_emotion}" emotion')

In [72]:
sample_utterance = "this is disgusting"

In [73]:
prepare_beginning_of_response(sample_utterance, dd_emotion_choice)

"My response's gonna be brief and sound full of curiosity:"

# Preparing the ChatGPT API

In [99]:
API_KEY = "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"
openai.api_key = API_KEY

In [16]:
def show_formatted_choice(unformatted_text):
  display(Markdown(unformatted_text["choices"][0]["message"]["content"]))

In [76]:
def call_chatept(utterance):
    beginning_of_response = prepare_beginning_of_response(utterance, dd_emotion_choice)
    completion = openai.ChatCompletion.create(
      model="gpt-3.5-turbo", 
      messages=[
          {
              "role": "user",
              "content": utterance
          },
          {
              "role": "assistant",
              "content": beginning_of_response
          }
      ]
    )
    return completion

In [80]:
utterance = "I am hearing weird noise in the house"
chatept_response = call_chatept(utterance)

In [81]:
show_formatted_choice(chatept_response)

What kind of noise are you hearing? Can you describe it? Is it a consistent sound or intermittent? It's important to identify the source of the sound to ensure it's not a serious issue.

In [85]:
test_emotion_choice(utterance)

The predicted emotion is: "nervousness"
and the response should have the "neutral" emotion


# Testing on DailyDialogue Dataset

In [86]:
from datasets import list_datasets, load_dataset
daily_dialogue_dataset = load_dataset('daily_dialog')  

Found cached dataset daily_dialog (/home/ahmed_b/.cache/huggingface/datasets/daily_dialog/default/1.0.0/1d0a58c7f2a4dab5ed9d01dbde8e55e0058e589ab81fce5c2df929ea810eabcd)


  0%|          | 0/3 [00:00<?, ?it/s]

In [87]:
train_df = daily_dialogue_dataset["train"].to_pandas()
val_df = daily_dialogue_dataset["validation"].to_pandas()
test_df = daily_dialogue_dataset["test"].to_pandas()
all_df = pd.concat([train_df, val_df, test_df])
sample_df = train_df.iloc[:100]

In [95]:
list_of_sample_utterances = sample_df.iloc[0].dialog.tolist()
list_of_sample_utterances

['Say , Jim , how about going for a few beers after dinner ? ',
 ' You know that is tempting but is really not good for our fitness . ',
 ' What do you mean ? It will help us to relax . ',
 " Do you really think so ? I don't . It will just make us fat and act silly . Remember last time ? ",
 " I guess you are right.But what shall we do ? I don't feel like sitting at home . ",
 ' I suggest a walk over to the gym where we can play singsong and meet some of our friends . ',
 " That's a good idea . I hear Mary and Sally often go there to play pingpong.Perhaps we can make a foursome with them . ",
 ' Sounds great to me ! If they are willing , we could ask them to go dancing with us.That is excellent exercise and fun , too . ',
 " Good.Let ' s go now . ",
 ' All right . ']

In [94]:
sample_df.iloc[0].emotion.tolist()

[0, 0, 0, 0, 0, 0, 4, 4, 4, 4]

In [96]:
list_of_electra_emotions = []
for utterance in list_of_sample_utterances:
    list_of_electra_emotions.append(predict_emotion_from_utterance(utterance))
list_of_electra_emotions

['neutral',
 'disapproval',
 'neutral',
 'annoyance',
 'approval',
 'approval',
 'admiration',
 'joy',
 'gratitude',
 'approval']

In [100]:
dict(list(zip(list_of_sample_utterances,list_of_electra_emotions)))

{'Say , Jim , how about going for a few beers after dinner ? ': 'neutral',
 ' You know that is tempting but is really not good for our fitness . ': 'disapproval',
 ' What do you mean ? It will help us to relax . ': 'neutral',
 " Do you really think so ? I don't . It will just make us fat and act silly . Remember last time ? ": 'annoyance',
 " I guess you are right.But what shall we do ? I don't feel like sitting at home . ": 'approval',
 ' I suggest a walk over to the gym where we can play singsong and meet some of our friends . ': 'approval',
 " That's a good idea . I hear Mary and Sally often go there to play pingpong.Perhaps we can make a foursome with them . ": 'admiration',
 ' Sounds great to me ! If they are willing , we could ask them to go dancing with us.That is excellent exercise and fun , too . ': 'joy',
 " Good.Let ' s go now . ": 'gratitude',
 ' All right . ': 'approval'}

In [5]:
user_request = "I am hearing weird noise in the house"
beginning_of_response = "My response is going to be brief and sound scared: "

In [19]:
user_request = "I have a problem"
beginning_of_response = "My response is going to be brief and sound curious: "

In [6]:
chatgpt_response = call_chatgpt(user_request, beginning_of_response)

In [7]:
chatgpt_response

<OpenAIObject chat.completion id=chatcmpl-6wvX1tarEzsUIdYAvy1Wp9tHmA6F4 at 0x7fae4a3d69b0> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "Oh no, that sounds scary! Can you try to identify where the noise is coming from and see if it's something that can be easily explained? If not, it might be a good idea to call someone for help or to investigate further.",
        "role": "assistant"
      }
    }
  ],
  "created": 1679502195,
  "id": "chatcmpl-6wvX1tarEzsUIdYAvy1Wp9tHmA6F4",
  "model": "gpt-3.5-turbo-0301",
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 50,
    "prompt_tokens": 32,
    "total_tokens": 82
  }
}

In [8]:
show_formatted_choice(chatgpt_response)

Oh no, that sounds scary! Can you try to identify where the noise is coming from and see if it's something that can be easily explained? If not, it might be a good idea to call someone for help or to investigate further.

In [6]:
completion = openai.ChatCompletion.create(
  model="gpt-3.5-turbo", 
  messages=[
      {
          "role": "user",
          "content": "Tell the world about the ChatGPT API in the style of a pirate."
      }
  ]
)

In [17]:
completion["choices"][0]["message"]["content"]

"\n\nAhoy mateys! Gather 'round and listen closely, for I'm about to tell ye about a treasure hidden in the depths of the digital seas - the ChatGPT API!\n\nThis API be a marvel, a tool that allows ye to integrate natural language processing into yer applications and websites with ease. It be like having yer very own parrot that understands human speech and can do yer bidding.\n\nWith the ChatGPT API, ye can create chatbots that talk like real sailors, answering questions and keeping yer customers happy. Or ye can use it to analyze text and gain insights that ye never thought possible. It be powerful, yet easy to use, like a trusty cutlass in the hands of a seasoned pirate.\n\nAnd the best part, ye ask? It be available for all ye scallywags out there to use! So hoist the Jolly Roger, set sail for the ChatGPT API, and uncover the riches that lie ahead. Yarrr!"

In [19]:
show_formatted_choice(completion)



Ahoy mateys! Gather 'round and listen closely, for I'm about to tell ye about a treasure hidden in the depths of the digital seas - the ChatGPT API!

This API be a marvel, a tool that allows ye to integrate natural language processing into yer applications and websites with ease. It be like having yer very own parrot that understands human speech and can do yer bidding.

With the ChatGPT API, ye can create chatbots that talk like real sailors, answering questions and keeping yer customers happy. Or ye can use it to analyze text and gain insights that ye never thought possible. It be powerful, yet easy to use, like a trusty cutlass in the hands of a seasoned pirate.

And the best part, ye ask? It be available for all ye scallywags out there to use! So hoist the Jolly Roger, set sail for the ChatGPT API, and uncover the riches that lie ahead. Yarrr!