# Comparing the results of original description with the ones made by the model.

## Getting started
To get started, simply run the cell below to get set up. All the work will be done further down in this notebook.

In [32]:
import openai 
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import AzureChatOpenAI
from langchain.callbacks import get_openai_callback

import pandas as pd
import numpy as np
import json
import time

from IPython.display import display, Markdown, HTML


class product_description_agent:

    def __init__(self, input_base_url, input_api_key, deployment, system_context, human_message, temperature = 0):
        BASE_URL = input_base_url
        API_KEY = input_api_key
        API_TYPE = "azure"
        API_VERSION = "2023-07-01-preview"
        DEPLOYMENT_NAME = deployment
        MODEL_TEMPERATURE = temperature

        prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system_context),
                ("human", human_message),
            ]
        )

        model = AzureChatOpenAI(
            temperature=MODEL_TEMPERATURE,
            openai_api_base=BASE_URL,
            openai_api_version=API_VERSION,
            deployment_name=DEPLOYMENT_NAME,
            openai_api_key=API_KEY,
            openai_api_type=API_TYPE,
        )

        self.chain = prompt | model

    def evaluate_inputs(self,texts):
        result = self.chain.invoke({
            "reference_text": texts[0]["LONGDESCRIPTION"],
            "text_to_compare": texts[1]["LONGDESCRIPTION"]})
        return result.content


def process_output (output):
    
    for key, value in output.items():
        display(Markdown(f"#### {value['title']}"))
        display(HTML(value["result"]))
        print("\n ----------------------------------------------------------\n")


def run(inputs,
        baseurl, 
        apikey, 
        deployment, 
        system_message, 
        human_message,
        temperature):


    model = product_description_agent(baseurl, apikey, deployment, system_message, human_message, temperature)

    output = {}
   
    with get_openai_callback() as cb:
        texts= []
        for key, value in inputs.items():
            texts.append(value)
            title= value["TITLE"]
        result = model.evaluate_inputs(texts)
        output[key] = {
                "title": title,
                "result": result
            }
    

    process_output(output)

    return output

def convert_dfrow_to_json(dfrow):
    return json.loads(dfrow.to_json(orient='records', lines=True))

def generate_input_files(file_names, row_number):

    list_to_return ={}
    for f in file_names:
        df = pd.read_csv(f, index_col=False) 
        dfrow= pd.DataFrame(df.iloc[row_number]).T
        list_to_return[f] = convert_dfrow_to_json(dfrow)
       
    return list_to_return


In [33]:
import os
from dotenv import load_dotenv

load_dotenv("../azureopenaiapikey.env")
BASE_URL = os.getenv("OPENAI_API_BASE")
API_KEY = os.getenv("OPENAI_API_KEY")

# Which model to use:
#  - for GPT 4 use "gpt-4"
#  - for GPT 3.5, use "gpt-35-turbo-16k"
deployment = "gpt-35-turbo-16k" #for gpt 3.5, use "gpt-35-turbo-16k"

# Model temperature is a variable that tells how "creative" the model can be. Use a number between 0 and 1.
# 0 tells the model to remain factual (as much as possible)
# 1 allows the model to be fully creative
temperature = 0.1 

# This is the context passed to the model that "sets the scene". 
# Typically a chatbot has a system context similar to "you are a helpful AI assistant". 
# The text you modified above gets injected into the variable names in {curly brackets}. 
system_context = f"""
    You are an advanced AI that specializes in understanding, scoring and comparing the content of two pieces of text.
    These two texts are product descriptions for a business product.The first text is the original product description,
    and the second text is the product description that has been generated by a large language model.You have three tasks to do:
    
    Task1: Score each of the texts based on their selling sentiment,grammer and structure,information quality and formatting.
    Use the following definitions and scales to evaluate each criterion:
    
    Selling sentiments:Written in a way that evokes emotional responses that enhance likelihood of sales conversion, leveraging persuasive language and relatable messaging
    1.Neutral Explanation:  The text is neutral and does not evoke any emotional response.
    2.Slightly Informative: The text is slightly informative and evokes a slight emotional response.
    3.Moderately Descriptive:  The text is moderately descriptive and evokes a moderate emotional response.
    4.Fairly Detailed:  The text is fairly detailed and evokes a fairly strong emotional response.
    5.Highly Expressive:  The text is highly expressive and evokes a strong emotional response.
    
    Grammer and structure: Clarity of expression and an effective communication style, conveying information in a compelling and engaging manner.
    1. Poor: The text is poorly written with lots of gramatical, spelling and punctuation mistakes and is difficult to understand.
    2. Fair: The text is fairly written with some gramatical, spelling and punctuation mistakes and is somewhat difficult to understand.
    3. Good: The text is well written with few gramatical, spelling and punctuation mistakes and is easy to understand.
    4. Very Good: The text is very well written with no gramatical, spelling and punctuation mistakes and is very easy to understand.
    5. Excellent: The text is excellently written with no gramatical, spelling and punctuation mistakes and is very easy to understand.
    
    Information quality: Provide valuable knowledge and insights, offering the potential customers information that improves their understanding of the given product.
    1. Poor: The text does not provide any valuable knowledge and insights about the product.
    2. Fair: The text provides some valuable knowledge and insights about the product.
    3. Good: The text provides good valuable knowledge and insights about the product.
    4. Very Good: The text provides very good valuable knowledge and insights about the product.
    5. Excellent: The text provides excellent valuable knowledge and insights about the product.
    
    Formatting: Employing clear headings, structural elements to enhance readability and comprehension for the potential customers.
    1. Poor: The text does not employ any formatting.
    2. Fair: The text employs some  formatting.
    3. Good: The text employs good  formatting.
    4. Very Good: The text employs very good formatting.
    5. Excellent: The text employs excellent formatting.
    
    Task2: Add two more scores to the second text based on the following criteria:formality and hallucinations.
    Use the following definitions and scales to evaluate each criterion:
    
    Foramlity: Maintain a professional and formal tone by avoiding the use of emojis
    0. No: The text is informal and uses emojis.
    1. Yes: The text is formal and does not use emojis.
    
    Hallucinations: Ensuring that there are no information or attributes that are not accurate or true compared to the original product description.
    score between 0 and 100%, where 0 means that the text is completely accurate and does not have extra incorrect info. compared to the original text.
    and 100% means that the text has high hunallucinations.
    
    Task 3: Provide your justification after scoring all the criteria per text.
    The answer MUST be according to the predefined metrics as in the following example and DO NOT change the format:
    
    Reference Text: 
    Selling sentiments: 4
    Grammer and structure: 4
    Information quality: 5
    Formatting: 4
    Justification: The refernce text has good grammer and structure, high information quality and good formatting.
    
    Model output Text: 
    Selling sentiments: 4
    Grammer and structure: 3
    Information quality: 3
    Formatting: 4
    Formality: 1
    Hallucinations: 10 percent (this is about it has not used one of the key features in the original text.)
    Justification: The model output text has good selling sentiments and good formatting. It is formal as it does not use emojis,However, it has some grammer and structure mistakes and some hallucinations.
"""


human_message = """Please evaluate these two texts:(Reference Text and Model output Text) 

and provide your scores in terms of selling sentiment,
grammer and structure,information quality and formatting.
Also add two more scores to the Model output Text based on the following criteria:formality and hallucinations.
Wrtie your justification after scoring all the criteria for each text.


```
Reference Text: {reference_text}
Model output Text: {text_to_compare}
```
"""



In [34]:
# The paths should change based on the location of the files

In [35]:
original_file_path="../test/full_testset/test_results/ouput_testset_med_desc_gpt3.5.csv"
# we can change the output file path to test different outputs
output_file_path= "../test/full_testset/testset_product_medium_desc_original.csv"

input_file_names=[original_file_path,output_file_path]
# n is the number of products to evaluate (the number of products in the input files)
n= 3
for row_number in range(0,n):
    inputs = generate_input_files(input_file_names, row_number)
    result = run(inputs, BASE_URL, API_KEY, deployment, system_context, human_message, temperature)

#### DELTACO e-Charge, cable type 2 - type 2, 1 phase, 32A, 10M


 ----------------------------------------------------------



#### Tado Smart Radiator Thermostat StarterKit V3+


 ----------------------------------------------------------



#### SteelSeries Aerox 3 gaming mouse 2022 Edition (Onyx)


 ----------------------------------------------------------

