In [1]:
import pandas as pd
import error_prompts as p
import os
from dotenv import load_dotenv
from langfuse.callback import CallbackHandler
from langchain.chains import LLMChain
from langchain_openai import OpenAI
from langchain.chains import LLMChain, SimpleSequentialChain
from langfuse.callback import CallbackHandler
from langchain_openai import ChatOpenAI
import pandas as pd

# Load the .env file
load_dotenv()

# Access the environment variables
os.environ["LANGFUSE_PUBLIC_KEY"] = os.environ.get("LANGFUSE_PUBLIC_KEY")
os.environ["LANGFUSE_SECRET_KEY"] = os.environ.get("LANGFUSE_SECRET_KEY")
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")

handler = CallbackHandler()
handler.auth_check()


prompt_name_structured = 'structured_analysis'
prompt_structured = p.structured_analysis
prompt_name_unstructured = 'unstructured_analysis'
prompt_unstructured = p.unstructured_analysis

file = "singleterm_dynamic_fewshot_4-turbo-preview"

DATA_PATH = f'../../Datasets/Evaluations/Sentiment Analysis/{file}.csv'
OUTPUT_PATH = f"../../Datasets/Evaluations/Sentiment Analysis/Error_Analysis/structured_unstructured/structured_unstructured_{file}.csv"

MODEL = "gpt-4-turbo-preview"
TEMP = 0





In [2]:
df = pd.read_csv(DATA_PATH)
df['error'] = None
df['error'] = ['correct' if polarity == polarity_pred else 'error' for polarity, polarity_pred in zip(df['polarity'], df['polarity_pred'])]

print(df['error'].value_counts())

df_correct = df[df['error'] == 'correct']
df_error = df[df['error'] == 'error']



df_error_sample = df_error.sample(n=50)
df_correct_sample = df_correct.sample(n=50)
df = pd.concat([df_error_sample, df_correct_sample])
df['structured_analysis'] = None
df['unstructured_analysis'] = None
df = df.reset_index(drop=True)




error
correct    263
error      188
Name: count, dtype: int64


In [3]:
print(df.shape)
print(df.head())

(100, 22)
   Unnamed: 0.1  Unnamed: 0  text_id  term_id  \
0           426         426      161      426   
1           269         269      102      269   
2            30          30       10       30   
3           378         378      144      378   
4            38          38       13       38   

                                                text        term  polarity  \
0  Had a party of 7 people for dinner here on a b...      dinner   neutral   
1  about 10 minutes apart each, so we were all ea...        food   neutral   
2  Ok I got the edamame and something from the su...  sushi chef  positive   
3  Aside from the fact the maitre de claimed the ...      maitre   neutral   
4  We preferred to gaze at our burgers while avoi...     burgers   neutral   

   example1_term_id                                    example1_prompt  \
0              2979  Input: "Went here for a casual Sunday night di...   
1              8605  Input: "The staff were all partying with each ...   
2   

In [4]:
#Structured Analysis
def execute(df, prompt, start = 0):
    if(MODEL == "gpt-3.5-turbo-instruct"):
        llm = OpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    else:
        llm = ChatOpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    for i in range(len(df)):
        if(df["structured_analysis"][i] != None and df["structured_analysis"][i] != "error"):
            continue
        print(i)
        chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
        user_prompt = df.loc[i, 'prompt']
        ai_answer = df.loc[i, 'polarity_pred']
        try: 
            result = chain.run(user_prompt = user_prompt, ai_answer = str(ai_answer), callbacks=[handler])
        except Exception as e:
            print(e)
            print(chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string())
            df['structured_analysis'][i] = 'error'
            continue
        handler.langfuse.flush()
        prompt_text = chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string()
        if (i < 5):
            print(prompt_text)
            print("\n")
            print(result)
            print("\n")
        
        if (i % 50 == 0):
            print(str(i) + " of  " + str(len(df)))
        df.loc[i, 'structured_analysis']= result
    return df


df = execute(df, prompt_structured)

    



0


  warn_deprecated(
  warn_beta(


System: You are a helpful AI.
Human: Example 1:
Input: "Went here for a casual Sunday night dinner at 7:45pm; dinner was served at 10:15pm!"
Term: Sunday
Output: neutral


Example 2:
Input: "The meal was improperly served."
Term: meal
Output: neutral


Example 3
Input: "The service wasn't good -- dumplings were served after we had almost finished the main courses, drinks had to be asked for three times, etc."
Term: service
Output: negative

    
Task:
Input: Had a party of 7 people for dinner here on a busy night for the restaurant, and our meal was excellent and served with extreme consistency (all appetizers and main courses were served at the right times, with none of the dishes served at the wrong temperature).
Prompt: What is the sentiment in the text towards 'dinner'? Only respond with "positive", "negative" or "neutral" as one word.
AI: positive
Human: Explain your prediction in a
structured format, listing words or word groups you used for your decision and how important you de

In [7]:
#Unstructured Analysis
def execute(df, prompt):
    if(MODEL == "gpt-3.5-turbo-instruct"):
        llm = OpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    else:
        llm = ChatOpenAI(model_name = MODEL, temperature = TEMP, timeout=10)
    for i in range(len(df)):
        if(df["unstructured_analysis"][i] != None and df["unstructured_analysis"][i] != "error"):
            continue
        print(i)
        chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
        user_prompt = df.loc[i, 'prompt']
        ai_answer = df.loc[i, 'polarity_pred']
        try: 
            result = chain.run(user_prompt = user_prompt, ai_answer = str(ai_answer), callbacks=[handler])
        except Exception as e:
            print(e)
            print(chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string())
            df['unstructured_analysis'][i] = 'error'
            continue
        handler.langfuse.flush()
        prompt_text = chain.prompt.format_prompt(user_prompt = user_prompt, ai_answer = str(ai_answer)).to_string()
        if (i < 5):
            print(prompt_text)
            print("\n")
            print(result)
            print("\n")
        
        if (i % 50 == 0):
            print(str(i) + " of  " + str(len(df)))
        df.loc[i, 'unstructured_analysis']= result
    return df

df = execute(df, prompt_unstructured)

32
33
Request timed out.
System: You are a helpful AI.
Human: Example 1:
Input: "The price is cheap - 5 dumplings for $1."
Term: price
Output: positive


Example 2:
Input: "The menu: HOT DOGS, that's it, nothing else."
Term: menu
Output: negative


Example 3
Input: "And who else has this great wine list to go with pizza?"
Term: wine list
Output: positive

    
Task:
Input: There is nothing else on the menu except for exotic teas and cold beverages, but with a meal this cheap and delightful, who would care?
Prompt: What is the sentiment in the text towards 'exotic teas'? Only respond with "positive", "negative" or "neutral" as one word.
AI: positive
Human: Now explain concisely how you made your prediction and explicitly mention the words or word groups that had a high influence on your decision.
34


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['unstructured_analysis'][i] = 'error'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['unstructured_analy

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
50 of  100
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [5]:
df.to_csv(OUTPUT_PATH, index=False)