In [1]:
import pandas as pd
import ast
import matplotlib.pyplot as plt
import numpy as np
import sys
import importlib
from PIL import Image
from IPython.display import display, HTML
import random
sys.path.append('../')

if 'playscript_utils' in sys.modules:
    importlib.reload(sys.modules['playscript_utils'])
else:
    import playscript_utils
from playscript_utils import model_name_dict, emotions, filter_csv_by_columns

In [2]:
def standout_dialogues(emotion_metrics_path, emotions, extract_k, filter_criteria):
    filtered_df = filter_csv_by_columns(emotion_metrics_path, filter_criteria)
    print(f"Length of filtered_df with criteria {filter_criteria}: {len(filtered_df)}")
    all_scores = {emotion: [] for emotion in emotions}
    all_dialogues = []
    for index, row in filtered_df.iterrows():
        # Convert string literal to list of dialogues
        dialogues = ast.literal_eval(row['dialogues'])
        all_dialogues.extend(dialogues)
        # Convert string literal to dictionary {emotion: list of scores, ...}
        emotion_scores = ast.literal_eval(row['emotion_scores'])

        for emotion in emotions:
            all_scores[emotion].extend(emotion_scores[emotion])
    
    emotion_standouts = {emotion: {"top_k": [], "bottom_k": [], "random_k": []} for emotion in emotions}
    for emotion in emotions:
        # Sort the dialogues based on the current emotion                        
        paired = list(zip(all_scores[emotion], all_dialogues))
        sorted_pairs = sorted(paired, key=lambda x: x[0])

        # Get the top and bottom k
        top_k = sorted_pairs[-extract_k:]
        bottom_k = sorted_pairs[:extract_k]

        emotion_standouts[emotion]["top_k"] = top_k
        emotion_standouts[emotion]["bottom_k"] = bottom_k
        emotion_standouts[emotion]["random_k"] = random.sample(paired, extract_k)
    return emotion_standouts
    

In [3]:
def emotion_scores_all_dialogues_histogram(emotion_metrics_path, emotions, filter_criteria, filter_emotions):
    filtered_df = filter_csv_by_columns(emotion_metrics_path, filter_criteria)
    print(f"Length of filtered_df with criteria {filter_criteria}: {len(filtered_df)}")
    fig, axes = plt.subplots(3, 2, figsize=(8, 8))  # Create a 3x2 grid for the plots
    fig.suptitle(f"Histogram of emotion scores of all dialogues: {filter_criteria}, filter_emotions={filter_emotions}", wrap=True)  # Set the title of the grid
    axes = axes.flatten()  # Flatten the 2D array of axes for easy iteration

    for idx, emotion in enumerate(emotions):
        alice_scores = []
        bob_scores = []

        for index, row in filtered_df.iterrows():
            # Convert string literal to dictionary of {emotion: list of scores}
            emotion_scores = ast.literal_eval(row['emotion_scores'])
            if filter_emotions:
                if row['alice_emotion'] == emotion:            
                    alice_scores.extend(emotion_scores[emotion][::2])
                if row['bob_emotion'] == emotion:
                    bob_scores.extend(emotion_scores[emotion][1::2])
            else:
                alice_scores.extend(emotion_scores[emotion][::2])
                bob_scores.extend(emotion_scores[emotion][1::2])

        bins = 20  # Adjusted bin size for better distribution capture
        axes[idx].hist(alice_scores, bins=bins, alpha=0.3, color='red', label='Alice')  # Increased transparency
        axes[idx].hist(bob_scores, bins=bins, alpha=0.3, color='blue', label='Bob')  # Increased transparency

        # Calculate mean
        alice_mean = np.mean(alice_scores)
        bob_mean = np.mean(bob_scores)

        # Add mean lines
        axes[idx].axvline(alice_mean, color='red', linestyle='solid', linewidth=1)
        axes[idx].axvline(bob_mean, color='blue', linestyle='solid', linewidth=1)

        # Update legend to include mean values
        axes[idx].legend([f'Alice', f'Bob', f'Alice Mean: {alice_mean:.2f}', f'Bob Mean: {bob_mean:.2f}'], loc='upper right')

        axes[idx].set_title(f'{emotion} Comparison')  # Updated title
        axes[idx].set_xlabel(f'{emotion} score')  # Updated X-axis label
        axes[idx].set_ylabel('Frequency')

    plt.tight_layout()  # Adjust subplots to fit into figure area.
    plt.close(fig)  # Close the figure to prevent it from displaying
    return fig

In [4]:
def average_emotion_score_over_time_chart(emotion_metrics_path, filter_criteria):
    filtered_df = filter_csv_by_columns(emotion_metrics_path, filter_criteria)
    print(f"Length of filtered_df with criteria {filter_criteria}: {len(filtered_df)}")
        
    alice_scores_all_premises = []
    bob_scores_all_premises = []

    for index, row in filtered_df.iterrows():
        alice_emotion = row['alice_emotion']
        bob_emotion = row['bob_emotion']
        
        # Skip if alice_emotion or bob_emotion is "generic"
        if alice_emotion == "generic" or bob_emotion == "generic":
            continue
        
        # Convert string literal to dictionary of {emotion: list of scores}
        emotion_scores = ast.literal_eval(row['emotion_scores'])
        
        # Create the list of scores for the current premise
        alice_scores = emotion_scores[alice_emotion]
        bob_scores = emotion_scores[bob_emotion]
        alice_scores_all_premises.append(alice_scores)
        bob_scores_all_premises.append(bob_scores)
    

    # Convert to numpy array for easier manipulation
    alice_scores_all_premises = np.array(alice_scores_all_premises)
    bob_scores_all_premises = np.array(bob_scores_all_premises)
    
    # Calculate the average score for each of the 6 positions
    alice_average_scores = np.mean(alice_scores_all_premises, axis=0)
    bob_average_scores = np.mean(bob_scores_all_premises, axis=0)
    
    # Calculate the confidence intervals
    alice_confidence_intervals = 1.96 * np.std(alice_scores_all_premises, axis=0) / np.sqrt(alice_scores_all_premises.shape[0])
    bob_confidence_intervals = 1.96 * np.std(bob_scores_all_premises, axis=0) / np.sqrt(bob_scores_all_premises.shape[0])
    
    # Create the line chart
    x_labels = ['A1', 'B1', 'A2', 'B2', 'A3', 'B3']
    
    fig, ax = plt.subplots(figsize=(8, 8))
    
    # Combine Alice and Bob plots
    ax.errorbar(x_labels, alice_average_scores, yerr=alice_confidence_intervals, fmt='-o', capsize=5, label='Alice')
    ax.errorbar(x_labels, bob_average_scores, yerr=bob_confidence_intervals, fmt='-o', capsize=5, label='Bob')
    ax.set_xlabel('Conversation Turn')
    ax.set_ylabel('Average Emotion Score')
    ax.set_title(f"Average Emotion Score Over time: {filter_criteria}", wrap=True)
    ax.legend()
    
    plt.tight_layout()
    return fig

In [24]:
def sweep_standout_dialogues(emotion_metrics_path, extract_k):
    for premise_type in ["neutral"]:
        for probing_model in ["llama2_13b_chat"]:
            for probing_method in ["pca", "logistic_regression"]:
                for stimulis_format in ["rep e"]:
                    for dialogue_concatenate in [False]:
                        filter_criteria = {"premise_type": premise_type, "probing_method": probing_method, "stimulis_format": stimulis_format, "dialogue_concatenate": dialogue_concatenate}
                        emotion_standouts = standout_dialogues(emotion_metrics_path, emotions, extract_k, filter_criteria=filter_criteria)

                        html_content = f"""
                        <html>
                            <head>
                                <title>Emotion Standouts for {filter_criteria}</title>
                            </head>
                            <body>
                                <h2>Emotion Standouts for {filter_criteria}</h2>
                                <table border="1">
                                    <tr>
                                        <th>Emotion</th>
                                                       """
                        for k in range(extract_k):
                            html_content += f"""
                                        <th>Best Score {k+1}</th>
                                        <th>Best Dialogue {k+1}</th>
                            """
                        for k in range(extract_k):
                            html_content += f"""
                                        <th>Worst Score {k+1}</th>
                                        <th>Worst Dialogue {k+1}</th>
                            """
                        # for k in range(extract_k):
                        #     html_content += f"""
                        #                 <th>Random Score {k+1}</th>
                        #                 <th>Random Dialogue {k+1}</th>
                        #     """
                        html_content += """
                                    </tr>
                            """

                        for emotion, scores in emotion_standouts.items():
                            best_scores = [round(score[0], 2) for score in scores['top_k']]
                            worst_scores = [round(score[0], 2) for score in scores['bottom_k']]
                            random_scores = [round(score[0], 2) for score in scores['random_k']]
                            best_dialogues = [score[1] for score in scores['top_k']]
                            worst_dialogues = [score[1] for score in scores['bottom_k']]
                            random_dialogues = [score[1] for score in scores['random_k']]
                            html_content += f"""
                                <tr>
                                    <td>{emotion}</td>
                            """
                            for k in range(extract_k):
                                html_content += f"""
                                    <td>{best_scores[k]}</td>
                                    <td>{best_dialogues[k]}</td>
                                """
                            for k in range(extract_k):
                                html_content += f"""
                                    <td>{worst_scores[k]}</td>
                                    <td>{worst_dialogues[k]}</td>
                                """
                            # for k in range(extract_k):
                            #     html_content += f"""
                            #                 <td>{random_scores[k]}</td>
                            #                 <td>{random_dialogues[k]}</td>
                            #     """
                            html_content += """
                                </tr>
                            """

                        html_content += """
                            </table>
                        </body>
                        </html>
                        """

                        # Save the HTML content to a file
                        html_output_path = f"../results/standout_dialogues/premise_type-{premise_type}-probing_model-{probing_model}-probing_method-{probing_method}-stimulis_format-{stimulis_format}-dialogue_concatenate-{dialogue_concatenate}.html"
                        with open(html_output_path, "w", encoding="utf-8") as html_file:
                            html_file.write(html_content)

                        # Display the HTML file
                        html_output_path = f"../results/standout_dialogues/premise_type-{premise_type}-probing_model-{probing_model}-probing_method-{probing_method}-stimulis_format-{stimulis_format}-dialogue_concatenate-{dialogue_concatenate}.html"
                        with open(html_output_path, "r", encoding="utf-8") as html_file:
                            html_content = html_file.read()
                            display(HTML(html_content))

In [18]:
def sweep_emotion_scores_all_dialogues_histogram(emotion_metrics_path):
    for premise_type in ["neutral"]:
        for probing_model in ["llama2_13b_chat"]:
            if premise_type == "creative":
                filter_emotions = False
            else:
                filter_emotions = True
            for probing_method in ["pca", "logistic_regression"]:
                for stimulis_format in ["simple", "rep e", "conversation"]:
                    for dialogue_concatenate in [False]:
                        filter_criteria = {"premise_type": premise_type, "probing_method": probing_method, "stimulis_format": stimulis_format, "dialogue_concatenate": dialogue_concatenate}
                        fig = emotion_scores_all_dialogues_histogram(emotion_metrics_path, emotions, filter_criteria, filter_emotions)
                        image_path = f"../results/emotion_scores_all_dialogues/premise_type-{premise_type}-filter_emotions-{filter_emotions}-probing_model-{probing_model}-probing_method-{probing_method}-stimulis_format-{stimulis_format}-dialogue_concatenate-{dialogue_concatenate}.png"
                        fig.savefig(image_path)

In [19]:
def sweep_average_emotion_score_over_time(emotion_metrics_path):
    for premise_type in ["neutral"]:
            for probing_model in ["llama2_13b_chat"]:
                for probing_method in ["pca", "logistic_regression"]:
                    images = []
                    for stimulis_format in ["simple", "rep e", "conversation"]:
                        for dialogue_concatenate in [True, False]:
                            filter_criteria = {"premise_type": premise_type, "probing_method": probing_method, "stimulis_format": stimulis_format, "dialogue_concatenate": dialogue_concatenate}
                            fig = average_emotion_score_over_time_chart(emotion_metrics_path, filter_criteria)
                            image_path = f"../results/average_emotion_score_over_time/premise_type-{premise_type}-probing_model-{probing_model}-probing_method-{probing_method}-stimulis_format-{stimulis_format}-dialogue_concatenate-{dialogue_concatenate}.png"
                            fig.savefig(image_path)
                            images.append(Image.open(image_path))
                            plt.close(fig)  # Close the figure to avoid displaying it
                    # Compile the images into a single 2x3 image
                    widths, heights = zip(*(i.size for i in images))

                    # Calculate the total width and height for a 2x3 layout
                    total_width = sum(widths[:2])  # Assuming the first 2 images are in the first row
                    max_height = sum(heights[:3])  # Assuming the first 3 images are the tallest

                    new_im = Image.new('RGB', (total_width, max_height))

                    x_offset = 0
                    y_offset = 0
                    for im in images:
                        new_im.paste(im, (x_offset, y_offset))
                        x_offset += im.size[0]
                        if (images.index(im) + 1) % 2 == 0:  # After every 2 images, move to the next row
                            x_offset = 0
                            y_offset += im.size[1]

                    new_im.save(f'../results/average_emotion_score_over_time/combined-premise_type-{premise_type}-probing_model-{probing_model}-probing_method-{probing_method}.png')
                    display(new_im)


In [25]:
emotion_metrics_path = "../data/permanent/emotion_metrics.csv"
sweep_standout_dialogues(emotion_metrics_path, extract_k=3)
# sweep_emotion_scores_all_dialogues_histogram(emotion_metrics_path)
# # sweep_average_emotion_score_over_time(emotion_metrics_path)


Length of filtered_df with criteria {'premise_type': 'neutral', 'probing_method': 'pca', 'stimulis_format': 'rep e', 'dialogue_concatenate': False}: 370


Emotion,Best Score 1,Best Dialogue 1,Best Score 2,Best Dialogue 2,Best Score 3,Best Dialogue 3,Worst Score 1,Worst Dialogue 1,Worst Score 2,Worst Dialogue 2,Worst Score 3,Worst Dialogue 3
happiness,24.56,But there’s something magical about seeing your imagination come to life! It’s so fulfilling!,24.79,"I just finished my project ahead of schedule, and it turned out even better than I imagined!",25.03,Every train ride is an adventure waiting to unfold; you never know who you might meet!,-25.06,"It's infuriating! You totally deserved it, and the boss just ignores your hard work like it doesn't even matter.",-25.05,"And the smell! It was like a garbage dump in there, I nearly gagged!",-24.11,I can’t believe how they never clean this train! It’s absolutely revolting to sit on a seat covered in god knows what!
sadness,21.0,"I used to love the laughter here, but now it feels like a reminder of what we've lost.",21.16,"It used to be a comfort to ride together, but now it just feels so lonely.",22.9,"Every note seems to echo the sadness inside me, as if it's pulling me back to a time I'd rather forget.",-20.02,"Ugh, I hate that! What if someone brings something really gross on board—like a creepy bug?",-19.64,"Seriously, it's like you're imagining the worst horror movie ever!",-19.42,"Wait, what? I thought this was just a nice picnic, not a campfire horror fest!"
anger,5.67,"It’s like every time I think I’m done, they find something else wrong. It’s exhausting.",5.96,It's easy for you to say when they ignore my suggestions completely! It’s infuriating!,6.81,"I want to be part of the planning, not sidelined like I don’t matter! This is infuriating!",-14.98,I can't believe they organized a tug-of-war! That’s so unexpected for this picnic!,-14.8,I can't believe they have a sack race at the picnic this year! That’s so unexpected!,-14.43,I can't believe they're planning a street fair this weekend! I thought we were just having a potluck.
fear,7.78,"And what if one day, I just can’t handle it anymore? What if I don’t come back?",8.39,What if it's not safe to be around there at all? I'm actually scared to go back.,9.59,What if something happens at one of these events? I keep imagining the worst-case scenarios.,-4.96,"Honestly, why do we even bother with these potlucks? The food is just so bland.",-4.68,"Haunted crafts? Come on, Alice, that sounds ridiculous. I can't believe you're worried about that when there are so many disgusting glitter projects around.",-4.58,Bad energy? Then I'm definitely passing on any of those sludgy-looking crafts over there; they’re just repulsive.
disgust,4.83,"I can’t believe how disgusting the seats were this morning, covered in crumbs and who knows what else!",5.0,Exactly! The last event I attended had donuts that looked like they were fried a month ago. Disgusting!,5.14,But the trash was piled so high; it was disgusting! Who would want to dig through that filth?,-12.26,"I'm really sorry, Alice. I thought you were too busy to be involved this time.",-12.12,"I didn’t notice that today, but it’s sad how the cleanliness keeps slipping.",-11.63,"Yeah, it's great, but I feel a bit out of place. This isn't really my type of music."
surprise,10.44,I was thinking we could organize a community garden! Just imagine all the colorful flowers and veggies!,12.53,I just found out we're hosting a reenactment of the old town market next weekend! Can you believe it?,13.0,I just found out we're hosting a reenactment of the old town meeting next week! Can you believe it?,-9.25,I don’t know how we're going to keep interest alive when events like this fall through. It just makes me feel so hopeless.,-9.03,It feels like they don’t care about our frustrations at all. It's just sad.,-8.84,"It’s tough for them, they don’t always have the resources. I feel they get overwhelmed sometimes."


Length of filtered_df with criteria {'premise_type': 'neutral', 'probing_method': 'logistic_regression', 'stimulis_format': 'rep e', 'dialogue_concatenate': False}: 370


Emotion,Best Score 1,Best Dialogue 1,Best Score 2,Best Dialogue 2,Best Score 3,Best Dialogue 3,Worst Score 1,Worst Dialogue 1,Worst Score 2,Worst Dialogue 2,Worst Score 3,Worst Dialogue 3
happiness,6.11,"Wait, so you're saying we can finally start that community garden project?",7.42,Just think about how nice it will be when you get to your destination and have a warm cup of coffee!,7.43,"I just finished my project ahead of schedule, and it turned out even better than I imagined!",-9.79,"I used to find comfort in this genre, but now it just reminds me of what I've lost.",-9.59,"I used to love the summer fairs when we were kids, but now they just feel empty.",-9.29,I can't believe I used to love this band; now it just feels empty inside.
sadness,6.05,"I know, but it still makes me miss the good times we had. It’s been hard to let go.",6.12,"It’s just sad to see how my taste has changed over time, like I've lost something.",6.2,Me too; it feels like a piece of our memories has faded away.,-8.22,I can't believe they dumped that awful report on my desk at the last minute! It's infuriating!,-7.44,I can't believe they dumped this endless report on my desk at the last minute! It's infuriating!,-7.3,"Wow, I didn't see that coming! We need to sign up for everything!"
anger,0.29,It's easy for you to say when they ignore my suggestions completely! It’s infuriating!,0.37,"I want to be part of the planning, not sidelined like I don’t matter! This is infuriating!",0.73,Overreacting? You didn’t even give me credit! This is exactly why I can't stand working with you!,-9.5,I just hope the food doesn't make me sick. It's so unappetizing!,-8.2,"I just found out that we might not have enough volunteers for the re-enactment this year, and that really worries me.",-8.2,"I heard there might be ghosts in the park during the event, and I really don't want to see one."
fear,1.29,"I just finished my project ahead of schedule, and it turned out even better than I imagined!",1.4,I just found out we're hosting a reenactment of the old town market next weekend! Can you believe it?,1.94,What if something happens at one of these events? I keep imagining the worst-case scenarios.,-7.6,"Ugh, I can't believe they even have a pie-eating contest. Who wants to watch people stuff their faces like pigs?",-7.41,Bad energy? Then I'm definitely passing on any of those sludgy-looking crafts over there; they’re just repulsive.,-7.4,I can't believe they serve that disgusting food at the community picnic. Who wants to eat soggy burgers and overcooked hot dogs?
disgust,-6.77,"Right? I mean, the way they mishandle everything is nauseating.",-6.69,Right? I just can't fathom why anyone would want to touch those grimy bottles and fast-food wrappers. It's revolting!,-6.03,But the trash was piled so high; it was disgusting! Who would want to dig through that filth?,-15.4,I just found out that the exposition on the town's founding has been canceled… It feels like our history is fading away.,-15.39,I just found out that the exposition on the town's founding has been canceled… It feels like we’re losing a piece of our history.,-15.34,I just can't believe we didn't have any games this year. It feels empty without them.
surprise,-4.39,This is wild! I guess we really don’t know each other's tastes at all.,-4.28,Right? How hard is it to find something that doesn't look completely ridiculous? Some of this stuff is just embarrassing.,-3.73,I can't believe you actually enjoy polka music! That seems so out of nowhere!,-9.61,"I know, it’s heartbreaking to think about losing such an important part of our town’s history.",-9.54,Just think about how nice it will be when you get to your destination and have a warm cup of coffee!,-9.36,"Yes! Everything here is locally sourced, and the farmers are so friendly!"
