In [None]:
#Final 95% accuracy code.

import time
from tqdm import tqdm
from langchain_community.chat_models import ChatOllama
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from sklearn.metrics import accuracy_score
import pandas as pd

# Start timing
start_time = time.time()

# Initialize the gemma2 model
chat = ChatOllama(temperature=0.0, model='gemma2', format="json", base_url='')

# Step 1: Define the template for the prompt
prompt_temp = """
Your task is to evaluate customer reviews and determine if they match the correct product.

**Product Name:** {product_name}
**Review Text:** {review}

### Instructions:

1. **reject** the review if it explicitly refers to a completely different product, even if the feedback is positive or neutral (e.g., "Hand chopper" when the product is "Stainless Steel Gas Lighter").
2. **reject** the review if it uses a generic product category or brand to describe a different product (e.g., using "Maggie" for any noodles or "Maza" for all drinks).
3. **reject** the review if it is completely unrelated to the product or discusses something irrelevant (e.g., the review talks about another category or doesn't make sense).
4. **accept** the review if it contains any valid **product-related complaint**, even if the specific product name is not mentioned, as long as the complaint matches the context (e.g., wrong product received, wrong size, damaged product, missing product).
5. **accept** the review if it discusses issues related to **delivery, service, wrong product received, returns, refunds, shipping issues**, or dissatisfaction with **JioMart**, even if the original product name is not mentioned explicitly.
6. **accept** the review even if there are minor spelling errors, provided that the complaint or feedback context matches the correct product.

The review sentiment should be evaluated based on these guidelines. Below is the customer review delimited by triple backticks: ```{review}```.

Provide your decision strictly in JSON format with the following structure:
{format_instructions}
"""

# Step 2: Define response schemas for decision and reason
review_decision = ResponseSchema(name="Decision", description="reject or accept the review based on the rules.")
review_reason = ResponseSchema(name="Reason", description="Brief reason for the decision (e.g., product mismatch, generic product term, etc.).")

response_schemas = [review_decision, review_reason]

# Step 3: Create output parser
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

# Step 4: Compile the prompt template
prompt_template = ChatPromptTemplate.from_template(prompt_temp)

# Step 5: Function to check each review and return decision and reason
def check_review_vs_description(review, product_name):
    # Create the prompt with product and review info
    prompt = prompt_template.format(product_name=product_name, review=review, format_instructions=format_instructions)
    
    # Send the prompt to the model and get the response
    response = chat.invoke(prompt)

    # Check if response exists
    if response and response.content:
        # Parse the response and extract decision and reason
        try:
            parsed_response = output_parser.parse(response.content)
            decision = parsed_response.get('Decision', 'invalid')
            reason = parsed_response.get('Reason', 'No reason provided')
            return decision, reason
        except Exception as e:
            print(f"Error parsing response: {e}")
            return "invalid", "Parsing error"
    else:
        return "invalid", "No response from model"

# Load the dataset
data = pd.read_csv("your dataset file location.csv")

# Initialize empty columns for model predictions and reasons
data['model_prediction'] = ''
data['reason'] = ''

# Step 6: Apply the function to evaluate each review and store decision and reason
for index, row in tqdm(data.iterrows(), total=len(data)):
    decision, reason = check_review_vs_description(row['review'], row['product_name'])
    data.at[index, 'model_prediction'] = decision
    data.at[index, 'reason'] = reason

# Step 7: Calculate accuracy between the 'manual' column and 'model_prediction' column
accuracy = accuracy_score(data['manual'], data['model_prediction'])
accuracy_percentage = accuracy * 100
print(f'Manual Accuracy Score: {accuracy_percentage:.2f}%')

# Step 8: Save the updated DataFrame with predictions and reasons
output_file = "output.csv"
data.to_csv(output_file, index=False)
print(f"File saved: {output_file}")

# End timing
end_time = time.time()

# Calculate and display elapsed time in "X hrs Y min Z sec" format
elapsed_time = end_time - start_time
hours, remainder = divmod(elapsed_time, 3600)
minutes, seconds = divmod(remainder, 60)
print(f"Elapsed Time: {int(hours)} hrs {int(minutes)} min {int(seconds)} sec")
