In [53]:
import os
from dotenv import load_dotenv
from langfuse.callback import CallbackHandler
from langchain.chains import LLMChain
from langchain_openai import OpenAI
from langchain.chains import LLMChain, SimpleSequentialChain
from langchain.prompts import PromptTemplate
from langfuse.callback import CallbackHandler
from langchain_openai import ChatOpenAI
import pandas as pd
import prompts as p
import time

# Load the .env file
load_dotenv()

# Access the environment variables
os.environ["LANGFUSE_PUBLIC_KEY"] = os.environ.get("LANGFUSE_PUBLIC_KEY")
os.environ["LANGFUSE_SECRET_KEY"] = os.environ.get("LANGFUSE_SECRET_KEY")
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")

handler = CallbackHandler()
handler.auth_check()

DATA_PATH = "../Datasets/MAMS-ATSA/test/test.xml"
OUTPUT_PATH = "../Datasets/Evaluations/Sentiment_Analysis_Zero_shot.csv"

MODEL = "gpt-3.5-turbo"
MAX_TOKENS = 15
TEMP = 0



In [44]:
import xml.etree.ElementTree as ET
import pandas as pd

def parse_xml_to_dataframe(xml_file_path):
    # Parse the XML file
    tree = ET.parse(xml_file_path)
    root = tree.getroot()

    # Initialize lists to store data
    texts = []
    aspect_terms = []

    # Iterate over each 'sentence' element
    for sentence in root.findall('./sentence'):
        text = sentence.find('text').text

        # Initialize lists to store aspect term data for the current sentence
        from_indices = []
        to_indices = []
        polarities = []
        terms = []

        # Iterate over each 'aspectTerm' element within the current 'sentence' element
        for aspect_term in sentence.findall('./aspectTerms/aspectTerm'):
            from_index = int(aspect_term.get('from'))
            to_index = int(aspect_term.get('to'))
            polarity = aspect_term.get('polarity')
            term = aspect_term.get('term')

            from_indices.append(from_index)
            to_indices.append(to_index)
            polarities.append(polarity)
            terms.append(term)

        # Append data for the current sentence to the lists
        texts.append(text)
        aspect_terms.append({'from': from_indices, 'to': to_indices, 'polarity': polarities, 'term': terms})

    # Create a DataFrame
    df = pd.DataFrame({'text': texts, 'aspect_terms': aspect_terms})
    return df

# Provide the path to your XML file here
xml_file_path = DATA_PATH

# Convert XML to DataFrame
df = parse_xml_to_dataframe(xml_file_path)

# Print the DataFrame
#print(df.head())

print(df.iloc[0]["aspect_terms"])





{'from': [18, 68, 81], 'to': [24, 72, 93], 'polarity': ['neutral', 'positive', 'positive'], 'term': ['drinks', 'roll', 'cripsy squid']}


In [45]:
#Transform DF

def transformDF(df):
    text_id = []
    term_id = []
    texts = []
    terms = []
    polarities = []
    x = 0  

    for i in range(0, len(df)):
        for j in range(0, len(df["aspect_terms"][i]["term"])):
            text_id.append(i)
            term_id.append(x)
            texts.append(df["text"][i])
            terms.append(df["aspect_terms"][i]["term"][j])
            polarities.append(df["aspect_terms"][i]["polarity"][j])
            x += 1

    df_transformed = pd.DataFrame({'text_id' : text_id, "term_id" :term_id, 'text' : texts, 'term' : terms, 'polarity' : polarities})
    return df_transformed

df = transformDF(df)
df['polarity_pred'] = None
df['prompt_name'] = None
df['prompt'] = None


        


In [46]:
prompt_name = "prompt_3_zeroshot_single_term"
prompt = p.prompt_3_zeroshot_single_term
print(prompt)


input_variables=['entity', 'input_text'] template='{input_text}\n\n    What is the sentiment on \'{entity}\'? Only respond with "positive", "negative" or "neutral" as one word.'


In [64]:
def execute(df, prompt, prompt_name, start = 0):
    llm = ChatOpenAI(model_name = MODEL, temperature = TEMP, max_tokens=MAX_TOKENS, timeout=10)
    for i in range(start, len(df)):
        if(df["polarity_pred"][i] != None and df["polarity_pred"][i] != "error"):
            continue
        time.sleep(0.1)
        print(i)
        chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])
        input_text = df["text"][i]
        entity = df["term"][i]
        try: 
            
            result = chain.run(input_text = input_text, entity = entity, callbacks=[handler])
        except Exception as e:
            print(e)
            df.loc[i, 'polarity_pred'] =  'error'
            df.loc[i, 'prompt_name'] = prompt_name
            df.loc[i, 'prompt'] = chain.prompt.format_prompt(input_text = input_text, entity = entity).text
            continue
        handler.langfuse.flush()
        prompt_text = chain.prompt.format_prompt(input_text = input_text, entity = entity).text
        if (i < 5):
            print(prompt_text)
            print("\n")
            print(result)
            print("\n")
        
        if (i % 50 == 0):
            print(str(i) + " of  " + str(len(df)))

        if ("positive" in result.lower()):
            y_pred = 'positive'
        elif ("negative" in result.lower()):
            y_pred = 'negative'
        elif ("neutral" in result.lower()):
            y_pred = 'neutral'
        else:
            y_pred = 'error'

        df.loc[i, 'polarity_pred'] =  y_pred
        df.loc[i, 'prompt_name'] = prompt_name
        df.loc[i, 'prompt'] = prompt_text
    return df




df = execute(df, prompt, prompt_name)
    



In [63]:
df

Unnamed: 0,text_id,term_id,text,term,polarity,polarity_pred,prompt_name,prompt
0,0,0,"After a couple of drinks, the apps--I like the...",drinks,neutral,positive,prompt_3_zeroshot_single_term,"After a couple of drinks, the apps--I like the..."
1,0,1,"After a couple of drinks, the apps--I like the...",roll,positive,positive,prompt_3_zeroshot_single_term,"After a couple of drinks, the apps--I like the..."
2,0,2,"After a couple of drinks, the apps--I like the...",cripsy squid,positive,positive,prompt_3_zeroshot_single_term,"After a couple of drinks, the apps--I like the..."
3,1,3,The basil pepper mojito was a little daunting ...,basil pepper mojito,negative,positive,prompt_3_zeroshot_single_term,The basil pepper mojito was a little daunting ...
4,1,4,The basil pepper mojito was a little daunting ...,flavor,positive,positive,prompt_3_zeroshot_single_term,The basil pepper mojito was a little daunting ...
...,...,...,...,...,...,...,...,...
1327,498,1327,The Food The menu is better suited to the snac...,bar,neutral,neutral,prompt_3_zeroshot_single_term,The Food The menu is better suited to the snac...
1328,499,1328,Another favorite is the hanger steak which is ...,hanger steak,positive,positive,prompt_3_zeroshot_single_term,Another favorite is the hanger steak which is ...
1329,499,1329,Another favorite is the hanger steak which is ...,amount of arugula,positive,positive,prompt_3_zeroshot_single_term,Another favorite is the hanger steak which is ...
1330,499,1330,Another favorite is the hanger steak which is ...,a balsamic vinegar reduction served,neutral,positive,prompt_3_zeroshot_single_term,Another favorite is the hanger steak which is ...


In [65]:
df.to_csv(OUTPUT_PATH, index = True)