In [29]:
import openai
import itertools
import time
import os
import pandas as pd
import csv

Read Data

In [8]:
def extractFormattedConvo(index):
    convosPath = "dataset/dialogues_text.txt"
    person = "Person 2: "
    formattedConvo =""    
    with open(convosPath,"r") as convosFile:
        allConvos = convosFile.readlines()
        splitConvo = allConvos[index].replace("__eou__\n","").split("__eou__")
        for message in splitConvo:
            if formattedConvo !="":
                formattedConvo +="\n"
            if(person == "Person 2: "):
                person = "Person 1: "
            else:
                person = "Person 2: "
            formattedConvo+=person+message
    return formattedConvo

In [9]:
def createDataframe():
    emotionsPath = "dataset/dialogues_emotion.txt"
    dataFrame = pd.DataFrame({"index":[], "conversation":[], "emotions":[], "variety":[],"length":[]})
    try:
        with open(emotionsPath, "r") as file:
            #fetch line by line (one line is one convo)
            for index, emotions in enumerate(file):
                procEmotions = emotions.replace("\n","").replace(" ","")
                variety = len(set(procEmotions))
                length = len(procEmotions)
                convo = extractFormattedConvo(index)
                tempDF = pd.DataFrame({'index':[index], 'conversation':[convo], 'emotions': [procEmotions], 'variety': [variety], "length": [length]})
                dataFrame = pd.concat([dataFrame,tempDF], ignore_index=True)
    except Exception as e:
        print(f"The following exception occured: {e}")

    return dataFrame

In [10]:
dataFrame = createDataframe()

In [11]:
dataFrame.head()

Unnamed: 0,index,conversation,emotions,variety,length
0,0.0,Person 1: The kitchen stinks . \nPerson 2: I'...,20,2.0,2.0
1,1.0,"Person 1: So Dick , how about getting some cof...",42010,4.0,5.0
2,2.0,Person 1: Are things still going badly with yo...,100,2.0,4.0
3,3.0,Person 1: Would you mind waiting a while ? \nP...,4,2.0,4.0
4,4.0,Person 1: Are you going to the annual party ? ...,44,2.0,3.0


Data Pre-Processing

In [12]:
def countZeroes(string):
    return string.count('0')

In [13]:
def weighData(dataFrame, varietyWeight, zeroesWeight, lengthWeight):
    tempDataFrame = dataFrame.copy(deep=True)
    #normalize the data first
    min_value = tempDataFrame['variety'].min()
    max_value = tempDataFrame['variety'].max()
    tempDataFrame['variety'] = (tempDataFrame['variety'] - min_value) / (max_value - min_value)

    min_value = tempDataFrame['length'].min()
    max_value = tempDataFrame['length'].max()
    tempDataFrame['length'] = (tempDataFrame['length'] - min_value) / (max_value - min_value)

    tempDataFrame['zeroes'] = tempDataFrame['emotions'].apply(countZeroes)

    min_value = tempDataFrame['zeroes'].min()
    max_value = tempDataFrame['zeroes'].max()
    tempDataFrame['zeroes'] = (tempDataFrame['zeroes'] - min_value) / (max_value - min_value)

    #take weighted sum
    tempDataFrame['score'] = (varietyWeight * tempDataFrame['variety']) + (lengthWeight * tempDataFrame['length']) + (zeroesWeight * tempDataFrame['zeroes'])
    sortedTempDF = tempDataFrame.sort_values(by='score', ascending=False)
    return sortedTempDF[sortedTempDF['emotions'].str.len() > 5]


In [14]:
#dataFrame, varietyWeight, zeroesWeight, lengthWeight
weighedDF = weighData(dataFrame, 0.6,-1.2,0.4)
display(weighedDF.head())

Unnamed: 0,index,conversation,emotions,variety,length,zeroes,score
3769,3769.0,"Person 1: Jenny , you look terrible . What's e...",105056505630,1.0,0.333333,0.15625,0.545833
6666,6666.0,Person 1: What ? How could you forget to reser...,601515,0.75,0.121212,0.03125,0.460985
9023,9023.0,"Person 1: Hello , three-five . \nPerson 2: Is...",262525,0.75,0.181818,0.0625,0.447727
2687,2687.0,"Person 1: Brooke , I have bad news . \nPerson ...",601515101510,0.75,0.333333,0.125,0.433333
5990,5990.0,"Person 1: Tom , you look thoughtful and unhapp...",5650504,0.75,0.181818,0.09375,0.410227


Evaluation

In [34]:
def createCombos(convo,index,contextLength):
    message = convo[index]
    context = ""
    startingIndex = index
    endingIndex = 0
    if((index) - contextLength > 0):
        endingIndex = (index) - contextLength
    for i in range(startingIndex,endingIndex-1,-1):
        context = convo[i] + "\n" + context
    return message,context


In [33]:
import openai
import itertools
import time

def callGPT(messagesBox):
    openai.api_type = "azure"
    openai.api_base = "REMOVED"
    openai.api_key = 'REMOVED'
    openai.api_version = "2023-05-15"
    response = openai.ChatCompletion.create(
        engine="GPT4",
        messages=messagesBox
        
    )
    extrResponse = response['choices'][0]['message']['content']

    if("NEUTRAL" in extrResponse.upper()):
        return "0"
    elif("ANGER" in extrResponse.upper()):
        return "1"
    elif("DISGUST" in extrResponse.upper()):
        return "2"
    elif("FEAR" in extrResponse.upper()):
        return "3"
    elif("HAPPINESS" in extrResponse.upper()):
        return "4"
    elif("SADNESS" in extrResponse.upper()):
        return "5"
    elif("SURPRISE" in extrResponse.upper()):
        return "6"
    else:
        return "7"

In [32]:
def createMB(message, context):

    prompt = "\n\nIdentify the emotion in the following message from the conversation above as either Neutral, Anger, Disgust, Fear, Happiness, Sadness or Surprise:\n\n"
    
    sampleContext1 = "Person 1 : Hey! What is up?\nPerson 2 : Well, I was thinking, how about a trip to Gloucester, Massachusetts this weekend?"
    sampleMessage1 = "Person 2 : Well, I was thinking, how about a trip to Gloucester, Massachusetts this weekend?"
    samplePrompt1 =  sampleContext1 + prompt + sampleMessage1
    sampleAnswer1 = "Neutral"

    sampleContext2 = "Person 1: Let's go there!\nPerson 2: I am not going with you. just shut it!"
    sampleMessage2 = "Person 2: I am not going with you. just shut it!"
    samplePrompt2=  sampleContext2 + prompt + sampleMessage2
    sampleAnswer2 = "Anger"

    realPrompt = context + prompt + message

    messagesBox = [
        { "role": "user", "content": samplePrompt1},
        { "role": "assistant", "content": sampleAnswer1},
        { "role": "user", "content": samplePrompt2},
        { "role": "assistant", "content": sampleAnswer2},
        { "role": "user", "content": realPrompt}
    ]

    return messagesBox

In [39]:
def evaluate(data):
    csv_file_path = 'output.csv'
    with open(csv_file_path, 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["ConvoIndex", "MessageIndex", "Context", "Response"])
        #for convo in data['conversation']:
        for index,row in data.iterrows():
            convo = row['conversation']
            ind = row ['index']
            convo = convo.split("\n")
            #print(convo)
            for contextLength in range(5):
                for index,message in enumerate(convo):
                    message, context = createCombos(convo,index,contextLength)
                    messageBox = createMB(message,context)
                    #call GPT
                    try:
                        response = callGPT(messageBox)
                    except Exception as e:
                        if('response was filtered due to the prompt triggering' in str(e)):
                            response = "8"
                        else:
                            print("Sleeping for 8 seconds.")
                            time.sleep(8)
                            try: 
                                response = callGPT(messageBox)
                            except Exception as e:
                                print("Exception:",str(e))
                                response="9"
                    writer.writerow([ind, index, contextLength, response]) 
               


In [40]:
weighedData = weighedDF.head(10).copy(deep=True)
evaluate(weighedData)

KeyboardInterrupt: 