In [1]:
import pandas as pd
from groq import Groq
import json
import os
import re
from langchain_core.prompts import PromptTemplate
import random
import logging

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

## load evv variables
from dotenv import load_dotenv

load_dotenv()
GROQ_API_KEY = os.environ["GROQ_API_KEY"]
CHAT_MODEL = os.environ["CHAT_MODEL"]
client = Groq()

In [48]:
region = "SG"
region_path = f"../data/official_data/feedback_{region}.xls"
columns_to_read = ["Feedback id", "Feedback 1", "Feedback 2"]
df = pd.read_excel(region_path, usecols=columns_to_read)

df_filtered = df[
    (df['Feedback 1'].notna()) &
    (df['Feedback 2'].notna()) &
    (df['Feedback 2'] != '{"description":""}')
]

df_filtered['Feedback 2'] = df_filtered['Feedback 2'].apply(lambda x: json.loads(x)['description'])

# Handle problematic 'Feedback id' values
df_filtered['Feedback id'] = pd.to_numeric(df_filtered['Feedback id'], errors='coerce')
df_filtered = df_filtered.dropna(subset=['Feedback id'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Feedback 2'] = df_filtered['Feedback 2'].apply(lambda x: json.loads(x)['description'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Feedback id'] = pd.to_numeric(df_filtered['Feedback id'], errors='coerce')


In [53]:
feedback_id = list(df_filtered['Feedback id'])
feedback_2 = list(df_filtered['Feedback 2'].values)
id_feedback = []
for i in range(len(feedback_id)):
    id_feedback.append({feedback_id[i]: feedback_2[i]})
    
id_feedback

[{3577258: 'try to do discount. yet totally different. cancel all discount. '},
 {3569525: 'The team should think along the purchase amount & not by product category for benefit to everyone, be it Shopee, Seller & customer. Thabk you '},
 {3538575: 'useless junk'},
 {3532189: 'System shd send the alert to respective logistic team lead instead of let sellers fill up the form as  often fill up few times also no action fr team!'},
 {3531102: 'You did not solve my query, this is just a small envelope but I need to pay a postage fee of $0.52, this was not mentioned when I purchased the vouche'},
 {3488212: 'article says "Earnings from your sales will be credited to your Seller Balance within 2 to 7 working days  " but now agent says upto 25 days'},
 {3479043: 'Did not state need to wait how long before a promotion can be set after price increase.'},
 {3431961: 'It says that this only applies for selected sellers, what about others? Thanks.'},
 {3428859: "Hi We don't have a local base in Sin

In [60]:
GENERATE_LABELS_PROMPT = '''
You are an linguistics professor tasked with classifying seller feedback for an e-commerce platform. 
Each feedback item should be categorised into one or more appropriate labels from the following list:
['Negative_Complaint','Constructive_Criticism','Design_Feedback','Positive Comment','Neutral']
You are not to write any code, but just use your knowledge to classify the feedback.
Your output should be the feedback IDs and their corresponding label.

Now classify the following feedback:
Feedbacks: {pairs}

Example Output format:
[{{"feedback_id": 123456, "label": "Negative_Complaint"}}, {{"feedback_id": 423456,"label": "Constructive_Criticism"}}, {{"feedback_id": 654321,"label": "Negative_Complaint"}}]

''' 

In [67]:
def get_id_labels(llm_response, pattern=r'\[\{.*?\}\]'):
    if not isinstance(llm_response, str):
        raise TypeError("The LLM response must be a string.")

    try:
        # Find the match
        match = re.search(pattern, llm_response, re.DOTALL)
        if not match:
            raise ValueError("No valid JSON list found in the response.")

        json_string = match.group(0)
        result = json.loads(json_string)

        # Validate the structure of the result
        if not isinstance(result, list) or not all(isinstance(item, dict) for item in result):
            raise ValueError("Extracted JSON is not a list of dictionaries.")

        return result

    except json.JSONDecodeError as e:
        raise ValueError(f"Failed to decode JSON: {e}")
    except Exception as e:
        raise RuntimeError(f"An unexpected error occurred: {e}")

def generate_labels(id_feedback_pairs, label_prompt, client):
    prompt = PromptTemplate(
        template=label_prompt,
        input_variables=["pairs"],
    )

    final_prompt = prompt.format(pairs=id_feedback_pairs)

    # Generate the completion by interacting with the language model API
    completion = client.chat.completions.create(
        model=CHAT_MODEL,
        messages=[
                    {
                        "role": "user", 
                        "content": final_prompt
                    }
                    ],
        temperature=0,  # Control the randomness of the output (lower means less random)
        max_tokens=1024,  # Limit the response length
        top_p=1,  # Nucleus sampling parameter (1 means only the most likely tokens are considered)
        stream=True,  # Enable streaming of the response chunks
        stop=None,  # Define stopping conditions (None means no stopping condition)
    )

    # Initialize an empty string to accumulate the response content
    response = """"""
    for chunk in completion:
        # Append each chunk of content to the response string
        response += chunk.choices[0].delta.content or ""
        
    pairings = get_id_labels(response)

    return pairings

In [None]:
def pair_id_feedback():
    return

In [68]:
batch_pairs = id_feedback[:5]
feedback_labels = generate_labels(batch_pairs, GENERATE_LABELS_PROMPT, client)
feedback_labels


2025-01-27 10:53:54,016 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


[{'feedback_id': 3577258, 'label': 'Negative_Complaint'},
 {'feedback_id': 3569525, 'label': 'Constructive_Criticism'},
 {'feedback_id': 3538575, 'label': 'Negative_Complaint'},
 {'feedback_id': 3532189, 'label': 'Design_Feedback'},
 {'feedback_id': 3531102, 'label': 'Negative_Complaint'}]