Step 1. Install and import packages

In [272]:
import textwrap
import csv
import json
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import time
import google.generativeai as genai
import re 

from IPython.display import display
from IPython.display import Markdown
import ast 
import pandas as pd
from collections import Counter
import numpy as np
from scipy.spatial import distance
import os
from pathlib import Path

def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

# Or use `os.getenv('GOOGLE_API_KEY')` to fetch an environment variable.
GOOGLE_API_KEY= 'AIzaSyDjbwmKkCKl_f-durMYdtpbHasZRpuUHAs'

genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel('gemini-1.5-flash')

Step 2. Get the dialog order for all sessions

In [168]:
model.generate_content("hello").text

I0000 00:00:1725945624.422761 20282584 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


'Hello! How can I help you today? \n'

In [273]:
msp = json.load(open('./data/msp_ambigous.json'))
msp[0]

{'id': 'MSP-PODCAST_0001_0008',
 'need_prediction': 'no',
 'emotion': ['Neutral', 'Neutral', 'Sad', 'Other-Inquisitive', 'Neutral'],
 'groundtruth': ["it's it follows. that's the movie. and i know it came out january 27th in the uk. did it come out later there, or...?"],
 'audio': 'Audio/MSP-PODCAST_0001_0008.wav',
 'speaker': '0001'}

In [274]:
index = []
for i in range(len(msp)):
    if msp[i]['need_prediction'] == 'yes':
        index.append(i)
len(index)

4114

In [275]:
def get_label_prob(data):

    emo_labels = ['neu', 'hap', 'ang', 'sad']
    emotion_code_dict = {"Neutral":"neu", "Happy":"hap", "Angry":"ang", "Sad":"sad", "Frustration":"others", "Contempt":"others", "Excitement":"others", "Surprise":"others", "Disgust":"others", "Fear":"others", "Other": "others"}
    num_out_labels = 0
    for item in data:
        amb_labels = []
        if item['need_prediction'] == 'yes':
            for emo in item['emotion']:
                amb_labels.append(emotion_code_dict[emo])

            filtered_labels = [label for label in amb_labels if label in emo_labels]
            for label in amb_labels:
                if label not in emo_labels:
                    num_out_labels += 1


            item['amb_emotion'] = filtered_labels

            emotion_counts = Counter(filtered_labels)
            total_count = sum(emotion_counts.values())
            
            probs = {emo: round(emotion_counts[emo]/total_count,2) for emo in emo_labels}
            item['emotion_probs'] = [probs[emo] for emo in emo_labels]

    return data, num_out_labels

msp, num_out_labels = get_label_prob(msp)
print("Number of instances that their emotions are outside four labels: ", num_out_labels)
msp[5]

Number of instances that their emotions are outside four labels:  0


{'id': 'MSP-PODCAST_0001_0019',
 'need_prediction': 'yes',
 'emotion': ['Happy', 'Happy', 'Neutral', 'Neutral', 'Happy'],
 'groundtruth': ["can't. i had someone in my media studies department, my degree, which was associated with the film production."],
 'audio': 'Audio/MSP-PODCAST_0001_0019.wav',
 'speaker': '0001',
 'amb_emotion': ['hap', 'hap', 'neu', 'neu', 'hap'],
 'emotion_probs': [0.4, 0.6, 0.0, 0.0]}

In [276]:
def get_label_dict(data):
    for i in range(len(data)):
        item = data[i]
        if "emotion" in item.keys():
            emotion_counts = Counter(item['emotion'])
            total_count = sum(emotion_counts.values())
            probs = {emo: round(emotion_counts[emo]/total_count,2) for emo in emotion_counts}
            item['emotion_dict'] = probs
        else:
            item['emotion_dict'] = None
    return data

msp = get_label_dict(msp)

In [277]:
started_sessions = []
organ_msp = {}
for i in range(len(msp)):
    podcast_ID = "_".join(msp[i]['id'].split("_")[0:2])
    if podcast_ID not in started_sessions:
        started_sessions.append("_".join(msp[i]['id'].split("_")[0:2]))
        organ_msp[podcast_ID] = []
    organ_msp[podcast_ID].append(msp[i])
print("total number of podcast:", len(organ_msp.keys()))

total number of podcast: 1855


In [278]:
print("An example of sentence of no need prediction")
organ_msp['MSP-PODCAST_0001'][0]

An example of sentence of no need prediction


{'id': 'MSP-PODCAST_0001_0008',
 'need_prediction': 'no',
 'emotion': ['Neutral', 'Neutral', 'Sad', 'Other-Inquisitive', 'Neutral'],
 'groundtruth': ["it's it follows. that's the movie. and i know it came out january 27th in the uk. did it come out later there, or...?"],
 'audio': 'Audio/MSP-PODCAST_0001_0008.wav',
 'speaker': '0001',
 'emotion_dict': {'Neutral': 0.6, 'Sad': 0.2, 'Other-Inquisitive': 0.2}}

In [279]:
print("An example of sentence of need prediction")
organ_msp['MSP-PODCAST_0001'][5]

An example of sentence of need prediction


{'id': 'MSP-PODCAST_0001_0019',
 'need_prediction': 'yes',
 'emotion': ['Happy', 'Happy', 'Neutral', 'Neutral', 'Happy'],
 'groundtruth': ["can't. i had someone in my media studies department, my degree, which was associated with the film production."],
 'audio': 'Audio/MSP-PODCAST_0001_0019.wav',
 'speaker': '0001',
 'amb_emotion': ['hap', 'hap', 'neu', 'neu', 'hap'],
 'emotion_probs': [0.4, 0.6, 0.0, 0.0],
 'emotion_dict': {'Happy': 0.6, 'Neutral': 0.4}}

In [280]:
need_podcast_id = []

for ID, podcast in organ_msp.items():
    for i in range(len(podcast)):
        if podcast[i]["need_prediction"] == "yes" and ID not in need_podcast_id:
            need_podcast_id.append(ID)
len(need_podcast_id)

301

In [281]:
print("Average number of sentences in each podcast:")
avg_sen = []
for ID in need_podcast_id:
    avg_sen.append(len(organ_msp[ID]))
print(sum(avg_sen)/len(avg_sen))

Average number of sentences in each podcast:
84.75083056478405


In [282]:
def Gemini_emotion_predictor(context, cur_sentence, number_of_contexts, firstflag):
    """
    Predicts the emotional state of a speaker based on the current input sentence and the conversational context.

    Parameters:
    context (list of dict): A list of dictionaries, each representing a previous conversational turn. Each dictionary
                            should contain at least the keys 'speaker' and 'sentence' indicating who the speaker was
                            and what they said, respectively.
    cur_input (dict): A dictionary representing the current sentence to be analyzed. It should contain at least the keys
                        'speaker' and 'sentence', similar to the dictionaries in `context`.
    number_of_contexts (int, optional): The number of contextual entries to consider for emotion prediction. Defaults to 3. The more context, the more expensive.

    Returns:
    str: The predicted emotion for the current sentence, from a set of predefined emotions such as 'happy', 'sad',
            'neutral', or 'angry'.
    """

    # for simplicity, we just use whisper-tiny's transcription, feel free to use any transcription we provide, and you can combine them
    background = 'Here is the conversation of a podcast cut:\n'
    if firstflag == False:
        context_string = '\n'.join(f"{item['groundtruth'][0]}" for item in context[-number_of_contexts:]) + '\n'
    else:
        context_string = '\n'.join(f"{item['groundtruth'][0]}" for item in context) + '\n'
    new_sentence = f"'{cur_sentence['groundtruth'][0]}'. \n"
    task = f"Predict the probability of the emotion of the last sentence '{cur_sentence['groundtruth'][0]}' from the options [neutral, happy, angry, sad], consider the conversation context. Output statisfies the following rules.\n"
    example = "Examples: \n" + "\n".join(
    f"Sentence {i+1}: {gt['groundtruth'][0]} Emotion probabilities: {context[i]['emotion_dict']}"
    for i, gt in enumerate(context)
) + '\n'
    task_req1 = "Rule 1: Ignore the format of answers from examples, they contains useless emotion labels that you don't need to care. Generate a dictionary of emotion probabilities in format of {'neutral': 0.1, 'happy':0.0, 'angry':0.1, 'sad':0.8}. If you think there is only one emotion in the sentence, then give the probability to 1. \n "
    task_req2 = "Rule 2: Ensure the sum of probability equal to 1.\n"
    task_amb2 = "Rule 3: Do not explain, only the dictionary.\n \n"
    task_final = "Please check again whether your output follows the three rules. "
    prompt = background + context_string + new_sentence + task + example+  task_req1 + task_req2 + task_amb2 + task_final
    # print(prompt)
    response = model.generate_content(prompt)
    return response, prompt

In [283]:
index_sentence = 5
number_of_context = 5
response, prompt = Gemini_emotion_predictor(organ_msp['MSP-PODCAST_0001'][index_sentence-number_of_context:index_sentence], organ_msp['MSP-PODCAST_0001'][5], 5, False)
response.text

I0000 00:00:1725962618.260634 20282584 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


"{'neutral': 1.0, 'happy': 0.0, 'angry': 0.0, 'sad': 0.0} \n"

In [284]:
prompt

"Here is the conversation of a podcast cut:\nit's it follows. that's the movie. and i know it came out january 27th in the uk. did it come out later there, or...?\noh, i can't. i mean, i don't know the festival circuit and all that, if it did all that, but-\ni think march 27th, cause a lot of my friends and i were going to go see that movie on friday, march 27th.\n20th. but then it had that wide release a week later.\nyeah. so david robert mitchells, it follows, he did the myth of the american sleepover?\n'can't. i had someone in my media studies department, my degree, which was associated with the film production.'. \nPredict the probability of the emotion of the last sentence 'can't. i had someone in my media studies department, my degree, which was associated with the film production.' from the options [neutral, happy, angry, sad], consider the conversation context. Output statisfies the following rules.\nExamples: \nSentence 1: it's it follows. that's the movie. and i know it came 

In [285]:
# transfer response from ```json\n{'neutral': 0.0, 'happy': 0.0, 'angry': 0.0, 'sad': 1.0}\n```
def identify_format(text):
    match = re.search(r"\{.*\}", text)
    if match:
        text = match.group(0)
    result_dict = ast.literal_eval(text)
    result_dict = {k.lower(): v for k, v in result_dict.items()}

    return result_dict
def dictToList(dict, emo_labels):
    prob_list = [dict[emo] for emo in emo_labels]
    return prob_list

In [286]:
log = {}
def predict_sentence(index_sentence, number_of_context, data):
    error = False
    emo_labels = ['neutral', 'happy', 'angry', 'sad']
    cur_sentence = data[index_sentence]
    cur_label = cur_sentence['emotion_probs']
    if index_sentence >= number_of_context:
        firstflag = False
        cur_context = data[index_sentence-number_of_context:index_sentence]
    else:
        firstflag = True
        cur_context = data[:index_sentence]

    try:
        time.sleep(0.05)
        response,prompt = Gemini_emotion_predictor(cur_context, cur_sentence, number_of_context, firstflag)
        response = response.text.strip()

        # input both context and the current sentence to the emotion predictor
        try:
            clear_response = identify_format(response)
            cur_pred = dictToList(clear_response, emo_labels)
            log[cur_sentence["id"]] = [prompt, response]
        except:
            # if there is an error, fill a neutral to keep the output of same dimension
            print('Gemini response is not in the right format: ', response, cur_sentence['id'])
            cur_pred = [1.0,0.0,0.0,0.0]
            error = True
            log[cur_sentence["id"]] = ["Response not in the right format", prompt, response]
    except:
        try: 
            time.sleep(5)
            response,prompt = Gemini_emotion_predictor(cur_sentence)
            response = response.text.strip()

            # input both context and the current sentence to the emotion predictor
            try:
                clear_response = identify_format(response)
                cur_pred = dictToList(clear_response, emo_labels)
                log[cur_sentence["id"]] = [prompt, response]
            except:
                # if there is an error, fill a neutral to keep the output of same dimension
                print('Gemini response is not in the right format: ', response, cur_sentence['id'])
                cur_pred = [1.0,0.0,0.0,0.0]
                error = True
                log[cur_sentence["id"]] = ["Response not in the right format", prompt, response]

        except:
            print('Gemini api has an error.: ', cur_sentence)
            cur_pred = [1.0,0.0,0.0,0.0]
            error = True
            log[cur_sentence["id"]] = ["Gemini api has an error."]

    return cur_label, cur_pred, error 

In [287]:
log = {}
def make_predictions(data, number_of_context):
    number_errors, number_success = 0, 0
    started_sessions = []
    all_ground_truth, all_pred = [], []
    
    for podcast_ID in need_podcast_id:
        podcast = data[podcast_ID]
        if podcast_ID not in started_sessions:
            started_sessions.append(podcast_ID)
            print("Podcast ", podcast_ID)
        
        for i in range(len(podcast)):
            if podcast[i]["need_prediction"] == "yes":
                
                label, prediction, error = predict_sentence(i,number_of_context, podcast)

                all_ground_truth.append(label)
                all_pred.append(prediction)
                if error == True:
                    number_errors += 1
                else: 
                    number_success += 1

        print('Number of error counts:', number_errors, "; Number of predictions:", number_success)
        number_errors, number_success = 0, 0
        print('------------------------')
        # for testing
        # if i > 50:
        #     break
    print("Total predictions: ", len(all_pred), "Total ground truth:", len(all_ground_truth))

    return all_pred, all_ground_truth

In [288]:
all_pred, all_ground_truth = make_predictions(organ_msp,15)

Podcast  MSP-PODCAST_0001
Number of error counts: 0 ; Number of predictions: 16
------------------------
Podcast  MSP-PODCAST_0003
Gemini response is not in the right format:  {'sad': 1.0} MSP-PODCAST_0003_0532
Number of error counts: 1 ; Number of predictions: 10
------------------------
Podcast  MSP-PODCAST_0038
Number of error counts: 0 ; Number of predictions: 2
------------------------
Podcast  MSP-PODCAST_0046
Number of error counts: 0 ; Number of predictions: 11
------------------------
Podcast  MSP-PODCAST_0047
Gemini response is not in the right format:  {'neutral': 1.0} MSP-PODCAST_0047_0237
Gemini response is not in the right format:  {'sad': 1.0} MSP-PODCAST_0047_0261
Gemini response is not in the right format:  {'sad': 1.0} MSP-PODCAST_0047_0262
Gemini response is not in the right format:  {'sad': 1.0} MSP-PODCAST_0047_0280
Gemini response is not in the right format:  {'angry': 1.0} MSP-PODCAST_0047_0292
Gemini response is not in the right format:  {'neutral': 1.0} MSP-POD

In [235]:
all_pred, all_ground_truth = make_predictions(organ_msp,10)

Podcast  MSP-PODCAST_0001
Number of error counts: 0 ; Number of predictions: 16
------------------------
Podcast  MSP-PODCAST_0003
Gemini response is not in the right format:  {'sad': 1.0} MSP-PODCAST_0003_0149
Number of error counts: 1 ; Number of predictions: 10
------------------------
Podcast  MSP-PODCAST_0038
Number of error counts: 0 ; Number of predictions: 2
------------------------
Podcast  MSP-PODCAST_0046
Number of error counts: 0 ; Number of predictions: 11
------------------------
Podcast  MSP-PODCAST_0047
Gemini response is not in the right format:  {'sad': 1.0} MSP-PODCAST_0047_0260
Gemini response is not in the right format:  {'sad': 1.0} MSP-PODCAST_0047_0280
Gemini response is not in the right format:  {'neutral': 1.0} MSP-PODCAST_0047_0321
Number of error counts: 3 ; Number of predictions: 51
------------------------
Podcast  MSP-PODCAST_0049
Number of error counts: 0 ; Number of predictions: 4
------------------------
Podcast  MSP-PODCAST_0051
Gemini response is not

In [289]:
def save_result(folder_path):

    print("Total predictions: ", len(all_pred), "Total ground truth:", len(all_ground_truth))
    # Write to a CSV file using a context manager
    with open(f'./msp_prediction/{folder_path}/pred.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerows(all_pred)

    with open(f'./msp_prediction/{folder_path}/truth.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerows(all_ground_truth)

    json.dump(log, open(f'./msp_prediction/{folder_path}/log.json', 'w'), indent=4)  

In [290]:
save_result("fs_15con")

Total predictions:  4114 Total ground truth: 4114


In [209]:
all_pred, all_ground_truth = make_predictions(organ_msp,5)

Podcast  MSP-PODCAST_0001
Gemini api has an error.:  {'id': 'MSP-PODCAST_0001_0161', 'need_prediction': 'yes', 'emotion': ['Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral'], 'groundtruth': ["the second point is obviously the sex plays a big part in the movie. we haven't talked about it much beyond your intro."], 'audio': 'Audio/MSP-PODCAST_0001_0161.wav', 'speaker': '0001', 'amb_emotion': ['neu', 'neu', 'neu', 'neu', 'neu'], 'emotion_probs': [1.0, 0.0, 0.0, 0.0], 'emotion_dict': {'Neutral': 1.0}}
Number of error counts: 1 ; Number of predictions: 15
------------------------
Podcast  MSP-PODCAST_0003
Number of error counts: 0 ; Number of predictions: 11
------------------------
Podcast  MSP-PODCAST_0038
Number of error counts: 0 ; Number of predictions: 2
------------------------
Podcast  MSP-PODCAST_0046
Number of error counts: 0 ; Number of predictions: 11
------------------------
Podcast  MSP-PODCAST_0047
Gemini response is not in the right format:  {'neutral': 1.0} MSP-PODCAST_

In [211]:
save_result("fs_5con")

Total predictions:  4114 Total ground truth: 4114


In [212]:
import eval_metrics as em
from sklearn.metrics import recall_score, balanced_accuracy_score
from sklearn.metrics import f1_score, accuracy_score

In [238]:
KL_values = []
for i in range(len(all_pred)):
    KL_values.append(em.KL(all_ground_truth[i],all_pred[i]))
round(np.mean(KL_values),2)

  KL_value = np.sum(np.where(a != 0, a * np.log(a / b), 0))
  KL_value = np.sum(np.where(a != 0, a * np.log(a / b), 0))


10.76

In [239]:
BC_values = []
for i in range(len(all_pred)):
    BC_dic, BC_co = em.BC(all_ground_truth[i],all_pred[i])
    BC_values.append(BC_co)
np.mean(BC_values)

0.5514178658240155

In [240]:
R_square = []
for i in range(len(all_pred)):
    R_square.append(em.R(all_ground_truth[i],all_pred[i]))
np.mean(R_square)

0.5349854399611085

In [241]:
em.ECE(all_pred, all_ground_truth)

array([0.46781721])

In [242]:
truth_labels = []
pred_labels = []
for i in range(len(all_pred)):
    truth_labels.append(np.argmax(all_ground_truth[i]))
    pred_labels.append(np.argmax(all_pred[i]))

In [243]:
print("Accuracy: ", accuracy_score(truth_labels, pred_labels))
print("UAR ", balanced_accuracy_score(truth_labels, pred_labels))
print("Weighted-F1 score: ", f1_score(truth_labels, pred_labels, average='weighted'))

Accuracy:  0.512396694214876
UAR  0.44077592755299366
Weighted-F1 score:  0.5317478685444739


In [269]:
log['MSP-PODCAST_2353_0879'][2]

"{'angry': 1.0}"

In [271]:
response = model.generate_content("Could you transfer the dictionary {'angry': 1.0} into a distribution of emotions with probabilities for each emotion. [neutral, happy, angry, sad]")
response.text

'Here\'s how we can approach this, along with the reasoning:\n\n**Understanding the Challenge**\n\nYou have a dictionary representing a single emotion ("angry") with a certainty level (1.0).  We need to create a probability distribution for four emotions (neutral, happy, angry, sad).\n\n**Assumptions and Considerations**\n\n* **Lack of Information:** We have no information about the likelihood of the other emotions.  \n* **Default Distribution:**  We need to make assumptions to fill in the missing probabilities.  \n* **Equal Distribution (Simple Approach):** A simple way is to assume that the remaining emotions are equally likely.\n\n**Solution**\n\n1. **Start with the given emotion:**  "angry" has a probability of 1.0.\n\n2. **Allocate the remaining probability:** The remaining probability is 1.0 - 1.0 = 0.0.  We need to distribute this equally among the other three emotions.\n\n3. **Calculate probability for each remaining emotion:** 0.0 / 3 = 0.0.\n\n**Resulting Distribution**\n\n* 

In [266]:
prob_IDs = []
for i, podcast_ID in enumerate(log):
    item = log[podcast_ID]
    if len(item) == 3:
        if item[0] == 'Response not in the right format':
            
            prob_IDs.append(i)
len(prob_IDs)

490

In [256]:
log['MSP-PODCAST_2353_0879']

['Response not in the right format',
 "Here is the conversation of a podcast cut:\n... called the patio, but when you got a pad like that-\nstrolling down main street in st. louis and these people just came out onto their lawn and started pointing guns at these innocent protest-\nreason 97,342 why the mainstream media needs to be destroyed.\na 100% but how many people saw that and cheered for those guy?-\nyeah. so there are people-\nand to use them. and so i'm really worried that this is going to escalate to where we are really going to start seeing-\nthink if trump wins again, he can nego-\n... it should. we're to the point, like we've been talking about-\noh, in michigan, you've got detroit.\nright?because in countries come and go, all these lines change like there's consequence-\n'... to goes but mexico. we've kind of moved lines a little bit, that-'. \nPredict the probability of the emotion of the last sentence '... to goes but mexico. we've kind of moved lines a little bit, that-'