In [0]:
from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
import transformers
from dotenv import load_dotenv
# from langchain.agents import create_csv_agent
from langchain.llms import HuggingFacePipeline
import torch
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
import os
import pandas as pd

text_loader_kwargs={'autodetect_encoding': True}
loader = DirectoryLoader('/Volumes/feedback/default/test', 
    loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)

docs = loader.load()

text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 1,
    chunk_overlap  = 0,
    length_function = lambda x: 1, # usually len is used 
    is_separator_regex = False
)
split_docs = text_splitter.split_documents(docs)

# Assuming docs is a pandas DataFrame
df = pd.DataFrame(split_docs)

# Modify the docs to split reviews into rows
modified_docs = []

for index, row in df.iterrows():

    # Access the content from the first column (assuming the content is in the first column and it's a tuple)
    content_tuple = row[0]
    
    # Access the actual content (assuming it's the first element of the tuple)
    content = content_tuple[1]
    
    # Assuming each review is separated by a newline character '\n'
    reviews = content.split('\t')
    
    # Remove any empty reviews
    reviews = [review.strip() for review in reviews if review.strip()]
    
    # Add the modified document with reviews as separate rows
    modified_docs.append(reviews)

data = pd.DataFrame(modified_docs).fillna('unknown')
data["review"] = data[1]+data[2]

spark_df = spark.createDataFrame(data["review"])
# spark.sql("CREATE DATABASE IF NOT EXISTS hotel")
spark_df.write.mode("overwrite").format("delta").saveAsTable("hive_metastore.chatbot.review")

In [0]:
examples = [
{
"review":'''Efficient and Affordable Chatbot Solution
Tried out this chatbot for a project, and it exceeded expectations. The platform is a bit remote in terms of user interface, but if you're willing to invest a bit of time to navigate, it's an excellent value for money. No need to worry about additional costs – the service is budget-friendly, and the chatbot's responses are modern, clear, and efficient. The interface is user-friendly, making it a great choice for those who don't want to splurge on a high-end AI service but still seek a reliable and functional chatbot solution. Additionally, the platform offers free integration with other tools, a definite plus for users looking to streamline their processes.''',
"aspects":'''Relevant Aspects are user interface, affordability, value for money, efficiency, user-friendliness, integration options, target audience'''
},
{
"review":'''Accessible and Budget-Friendly Language Learning Chatbot
Recently used this language learning chatbot and was pleasantly surprised. While the platform may seem a bit remote in terms of the app store, it's an unbeatable option for those on a budget. The chatbot provides a cost-effective way to practice and improve language skills without shelling out for expensive courses. The lessons are modern, interactive, and the user interface, although not as flashy as premium options, is intuitive and straightforward. Plus, the chatbot offers free additional resources, making it an excellent choice for language enthusiasts looking for quality without breaking the bank.''',
"aspects":'''Relevant Aspects are app store accessibility, budget-friendliness, value for money, lesson quality, user interface, free resources, target audience'''
}
]

prompt_template='''
Review:{review}
{aspects}
'''
example_prompt = PromptTemplate(input_variables=['review','aspects'],template=prompt_template)

final_prompt = FewShotPromptTemplate(
examples=examples,
example_prompt = example_prompt,
suffix="Review: {review}\n",
input_variables= ["review"],
prefix= "Analyze the provided chotbot review and identify the following aspects: User interface, App store accessibility, Affordability, Value for money, Efficiency, User-friendliness, and Integration options. Perform sentiment analysis on each aspect, assigning sentiment labels ['positive', 'negative', 'neutral'] based on the reviewer's opinion. Next please show the evidence from the review for sentiment labels. The final output should consist of pairs, associating each mentioned aspect with its corresponding sentiment label and evidence, presented as (aspect, sentiment label, evidence). For example, if the review mentions that the User interface is 'convenient', the output should include (User interface, positive, 'convenient'). Just return result in dict format. Aspects not explicitly mentioned in the review should be excluded from the output."
)

# model_local_path = "databricks/dolly-v2-12b" 

# tokenizer = transformers.AutoTokenizer.from_pretrained(model_local_path)

# pipeline = transformers.pipeline("text-generation",
#                                   model=model_local_path,
#                                   tokenizer=tokenizer,
#                                   torch_dtype=torch.bfloat16,
#                                   trust_remote_code=True,
#                                   device_map="auto",
#                                   max_new_tokens=1000)

# local_llm = HuggingFacePipeline(pipeline=pipeline)

# llm=OpenAI(model_name = 'text-davinci-003',openai_api_key=OPENAI_API_KEY)
# Initiate a connection to the LLM from Azure OpenAI Service via LangChain.
llm = OpenAI(
    openai_api_key=api_key,
    model_name=model_name,
    openai_api_version=api_version,
    engine="gpt-4"
)

aspects_extraction_chain = LLMChain(llm=llm, prompt = final_prompt, output_key='aspects')

In [0]:
# Read the CSV file into a pandas DataFrame
reviews_df = pd.DataFrame(data['review']).iloc[0:10]

# Initialize an empty list to store the predictions
predictions = []

# Iterate over each review in the DataFrame
for i in reviews_df['review']:
    # Generate predictions for the current review
    output = aspects_extraction_chain.predict(review=i)

    prediction_dict = {'review': i, 'aspect,sentiment,evidence': output}
    predictions.append(prediction_dict)

# Convert the predictions list into a pandas DataFrame
predictions_df = pd.DataFrame(predictions)