In [22]:
import pandas as pd
import error_prompts as p
import os
from dotenv import load_dotenv
from langfuse.callback import CallbackHandler
from langchain.chains import LLMChain
from langchain_openai import OpenAI
from langchain.chains import LLMChain, SimpleSequentialChain
from langfuse.callback import CallbackHandler
from langchain_openai import ChatOpenAI
import pandas as pd

# Load the .env file
load_dotenv()

# Access the environment variables
os.environ["LANGFUSE_PUBLIC_KEY"] = os.environ.get("LANGFUSE_PUBLIC_KEY")
os.environ["LANGFUSE_SECRET_KEY"] = os.environ.get("LANGFUSE_SECRET_KEY")
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")

handler = CallbackHandler()
handler.auth_check()


prompt_name_structured = 'structured_analysis'
prompt_structured = p.structured_analysis
prompt_name_unstructured = 'unstructured_analysis'
prompt_unstructured = p.unstructured_analysis

file = "singleterm_zeroshot_3.5-turbo"

DATA_PATH = f'../../Datasets/Evaluations/Sentiment Analysis/{file}.csv'
OUTPUT_PATH = f"../../Datasets/Evaluations/Sentiment Analysis/Error_Analysis/structured_unstructured_{file}.csv"

MODEL = "gpt-4-turbo-preview"
TEMP = 0





In [24]:
df = pd.read_csv(DATA_PATH)
df['error'] = None
df['error'] = ['correct' if polarity == polarity_pred else 'error' for polarity, polarity_pred in zip(df['polarity'], df['polarity_pred'])]

print(df['error'].value_counts())

df_correct = df[df['error'] == 'correct']
df_error = df[df['error'] == 'error']



df_error_sample = df_error.sample(n=50)
df_correct_sample = df_correct.sample(n=50)
df = pd.concat([df_error_sample, df_correct_sample])
df['structured_analysis'] = None
df['unstructured_analysis'] = None
df = df.reset_index(drop=True)




error
correct    267
error      184
Name: count, dtype: int64


In [25]:
print(df.shape)
print(df.head())

(100, 12)
   Unnamed: 0  text_id  term_id  \
0          57       20       57   
1         232       87      232   
2         434      164      434   
3         104       39      104   
4         268      102      268   

                                                text    term  polarity  \
0  There were a lot of scensters who couldnt affo...  dinner  negative   
1  Had a mouthwatering sardinian Stuffed Squid an...   sauce   neutral   
2  As for the food, brunch was average, I would n...  brunch   neutral   
3  We sat at the bar and were constantly bumped b...   rolls  positive   
4  about 10 minutes apart each, so we were all ea...    eggs  negative   

  polarity_pred                    prompt_name  \
0       neutral  prompt_3_zeroshot_single_term   
1      positive  prompt_3_zeroshot_single_term   
2      negative  prompt_3_zeroshot_single_term   
3      negative  prompt_3_zeroshot_single_term   
4       neutral  prompt_3_zeroshot_single_term   

                                 

In [26]:
#Structured Analysis
def execute(df, prompt, start = 0):
    if(MODEL == "gpt-3.5-turbo-instruct"):
        llm = OpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    else:
        llm = ChatOpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    for i in range(len(df)):
        if(df["structured_analysis"][i] != None and df["structured_analysis"][i] != "error"):
            continue
        print(i)
        chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
        user_prompt = df.loc[i, 'prompt']
        ai_answer = df.loc[i, 'polarity_pred']
        try: 
            result = chain.run(user_prompt = user_prompt, ai_answer = str(ai_answer), callbacks=[handler])
        except Exception as e:
            print(e)
            print(chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string())
            df['structured_analysis'][i] = 'error'
            continue
        handler.langfuse.flush()
        prompt_text = chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string()
        if (i < 5):
            print(prompt_text)
            print("\n")
            print(result)
            print("\n")
        
        if (i % 50 == 0):
            print(str(i) + " of  " + str(len(df)))
        df.loc[i, 'structured_analysis']= result
    return df


df = execute(df, prompt_structured)

    



0
System: You are a helpful AI.
Human: There were a lot of scensters who couldnt afford dinner hanging in the waiting area so we got bumped around a lot.

    What is the sentiment on 'dinner'? Only respond with "positive", "negative" or "neutral" as one word.
AI: neutral
Human: Explain your prediction in a
structured format, listing adjectives you used for your decision and how important you deemed them for your decision. Each adjective should be accompanied by a a score between 0 and
1 that shows the importance of the attribute for the decision. All scores in total must add up to 1. Only include adjectives that had an impact on the term's sentiment. 
Return the final result as JSON like in this example:
[{"adjective":"great","importance":"0.50"},
{"adjective":"wonderful","importance":"0.45"},
{"adjective":"bad","importance":"0.05"}]
ONLY return the JSON.


[]


0 of  100
1
System: You are a helpful AI.
Human: Had a mouthwatering sardinian Stuffed Squid and a Catfish fiumarola from Ro

In [27]:
#Unstructured Analysis
def execute(df, prompt):
    if(MODEL == "gpt-3.5-turbo-instruct"):
        llm = OpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    else:
        llm = ChatOpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    for i in range(len(df)):
        if(df["unstructured_analysis"][i] != None and df["unstructured_analysis"][i] != "error"):
            continue
        print(i)
        chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
        user_prompt = df.loc[i, 'prompt']
        ai_answer = df.loc[i, 'polarity_pred']
        try: 
            result = chain.run(user_prompt = user_prompt, ai_answer = str(ai_answer), callbacks=[handler])
        except Exception as e:
            print(e)
            print(chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string())
            df['unstructured_analysis'][i] = 'error'
            continue
        handler.langfuse.flush()
        prompt_text = chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string()
        if (i < 5):
            print(prompt_text)
            print("\n")
            print(result)
            print("\n")
        
        if (i % 50 == 0):
            print(str(i) + " of  " + str(len(df)))
        df.loc[i, 'unstructured_analysis']= result
    return df

df = execute(df, prompt_unstructured)

0
System: You are a helpful AI.
Human: There were a lot of scensters who couldnt afford dinner hanging in the waiting area so we got bumped around a lot.

    What is the sentiment on 'dinner'? Only respond with "positive", "negative" or "neutral" as one word.
AI: neutral
Human: Now explain concisely how you made your prediction and explicitly mention the adjectives that had a high influence on your decision.


The sentiment analysis on the word "dinner" in the provided text is determined to be "neutral" because the context in which "dinner" is mentioned does not inherently carry a positive or negative connotation towards the meal itself. The focus of the sentence is on the situation of people who couldn't afford dinner and the inconvenience caused by being bumped around, rather than the quality or enjoyment of the dinner. There are no adjectives directly modifying "dinner" to suggest a positive or negative sentiment. The sentiment is derived more from the situation (people not affordi

In [5]:
df.to_csv(OUTPUT_PATH, index=False)