In [14]:
from os import path
import pandas as pd
import json
import requests
from tqdm import tqdm
from dotenv import load_dotenv
import os

# Định nghĩa đường dẫn
DATA_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\data"
RESULT_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\results"
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(RESULT_DIR, exist_ok=True)

In [15]:
load_dotenv()

True

In [16]:
# CONFIG
ViABSA_BP_dir = path.join(DATA_DIR, 'ViABSA_Restaurant')
test_file = path.join(ViABSA_BP_dir, 'data_test.csv')
test_df = pd.read_csv(test_file)

In [17]:
aspects = [
   "AMBIENCE#GENERAL",
   "DRINKS#PRICES",
   "DRINKS#QUALITY",
   "DRINKS#STYLE&OPTIONS",
   "FOOD#PRICES",
   "FOOD#QUALITY",
   "FOOD#STYLE&OPTIONS",
   "LOCATION#GENERAL",
   "RESTAURANT#GENERAL",
   "RESTAURANT#MISCELLANEOUS",
   "RESTAURANT#PRICES",
   "SERVICE#GENERAL"
]

In [18]:
sentiment_map = {
    1: "positive",
    2: "negative",
    3: "neutral"
}

def transform_aspect_sentiment(df, start=0, end=None):
    result = [] 
    
    if end is None:
        end = len(df)

    for idx, row in df.iloc[start:end].iterrows():
        entry = {
            "id": str(idx),
            "text": row['Review'],
            "sentiments": []
        }

        for aspect in aspects:
            sentiment = row[f"{aspect}_label"]
            if sentiment == 1:  # chỉ lấy những cái có sentiment
                aspect_sentiment_value = row[aspect]
                mapped_sent = sentiment_map.get(aspect_sentiment_value, "unknown")
                if aspect_sentiment_value != 'none':
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": mapped_sent
                    })
                else:
                    # nếu cột sentiment text bị none nhưng label == 1 thì có thể log ra kiểm tra
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": "unknown"
                    })

        result.append(entry)

    return result

In [19]:
# SETUP DATA
test_df[aspects] = test_df[aspects].fillna('none')

for aspect in aspects:
    test_df[aspect + '_label'] = (test_df[aspect] != 0).astype(int)

test_json = transform_aspect_sentiment(test_df, 0, 100)
test_json[:5]

[{'id': '0',
  'text': 'Đây là 1 trong những quán mà mình thích vì vị trà đậm và thơm cũng như mùi vị đặc trưng hơn hẳn những quán khác nè  Trà sữa trân châu sợi - 46k Trà sữa pha khá ngon, vị trà chát và mùi hương khá rõ, không quá ngọt, rất đúng với gu mình  Trà đào - 45k Vị trà đào ở đây cũng đặc biệt hơn hẳn những quán khác, không phải chua ngọt như thưởng thấy mà có mùi trà rất ngon  Cà phê đá xay - 65k Món đá xay ở đây uống cũng ngon không kém trà nè, mùi vị thơm hương cà phê, vị đắng kết hợp hoàn hảo với độ béo ngọt của whipping cream, không quá đắng, cũng không quá ngọt hay lạt lẽo mà dịu nhẹ, thơm và dễ uống lắm  Trà vải thiết quan âm - 45k Trà vải có mùi vị rất thơm ngon mùi vải mà vẫn nghe rõ vị trà, có chút vị chát nhẹ mùi trà thơm rất thích, không phải chỉ toàn vị syrup vải ngọt gắt như nhiều chỗ khác. Do trà ở đây pha khá đậm nên bạn nào uống mà đang đói sẽ dễ say nha, hoặc ban đêm có thể khó ngủ à, cảnh báo trước  Trà thiết quan âm latte - 47k Ly này thì vị trà rất đậm n

In [20]:
def evaluate_aspect_sentiment(ground_truth, predictions):
    # Chuẩn hóa dữ liệu thành list các tuple để so sánh
    true_aspects = []
    pred_aspects = []

    true_aspect_sentiments = []
    pred_aspect_sentiments = []

    for gt_entry, pred_entry in zip(ground_truth, predictions):
        # ground truth: list of sentiments
        gt_sents = gt_entry['sentiments']
        gt_aspect_set = set()
        gt_aspect_sentiment_set = set()

        for item in gt_sents:
            gt_aspect_set.add(item['aspect'])
            gt_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        true_aspects.append(gt_aspect_set)
        true_aspect_sentiments.append(gt_aspect_sentiment_set)

        # prediction: list of results
        pred_sents = pred_entry['results']
        pred_aspect_set = set()
        pred_aspect_sentiment_set = set()

        for item in pred_sents:
            pred_aspect_set.add(item['aspect'])
            pred_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        pred_aspects.append(pred_aspect_set)
        pred_aspect_sentiments.append(pred_aspect_sentiment_set)

    # Tính theo micro-F1 (gộp hết lại)
    all_true_aspects = set.union(*true_aspects) if true_aspects else set()
    all_pred_aspects = set.union(*pred_aspects) if pred_aspects else set()

    tp_aspect = sum(len(gt & pred) for gt, pred in zip(true_aspects, pred_aspects))
    fp_aspect = sum(len(pred - gt) for gt, pred in zip(true_aspects, pred_aspects))
    fn_aspect = sum(len(gt - pred) for gt, pred in zip(true_aspects, pred_aspects))

    precision_aspect = tp_aspect / (tp_aspect + fp_aspect + 1e-8)
    recall_aspect = tp_aspect / (tp_aspect + fn_aspect + 1e-8)
    f1_aspect = 2 * precision_aspect * recall_aspect / (precision_aspect + recall_aspect + 1e-8)

    # Tính cho sentiment classification
    tp_sentiment = sum(len(gt & pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fp_sentiment = sum(len(pred - gt) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fn_sentiment = sum(len(gt - pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))

    precision_sentiment = tp_sentiment / (tp_sentiment + fp_sentiment + 1e-8)
    recall_sentiment = tp_sentiment / (tp_sentiment + fn_sentiment + 1e-8)
    f1_sentiment = 2 * precision_sentiment * recall_sentiment / (precision_sentiment + recall_sentiment + 1e-8)

    return {
        "Aspect Detection F1": f1_aspect,
        "Sentiment Classification F1": f1_sentiment
    }


In [21]:
gpt_functions = [
    {
        "name": "extract_aspect_sentiment",
        "description": "Extract aspects and sentiments from text, Think step by step exactly",
        "parameters": {
            "type": "object",
            "properties": {
                "results": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "aspect": {
                                "type": "string",
                                "enum": aspects
                            },
                            "sentiment": {
                                "type": "string",
                                "enum": ["positive", "negative", "neutral"]
                            }
                        },
                        "required": ["aspect", "sentiment"]
                    }
                }
            },
            "required": ["results"]
        }
    }
]


In [23]:

# Setup Grok API
def call_grok_api(prompt, max_retries=3):
    url = "https://api.x.ai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.getenv('GROK_API_KEY')}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "grok-3",
        "messages": [
            {"role": "system", "content": "You are an AI assistant that extracts aspects and their sentiments from text."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }
    
    for attempt in range(max_retries):
        try:
            response = requests.post(url, headers=headers, json=data, timeout=60)
            if response.status_code == 200:
                result = response.json()
                return result['choices'][0]['message']['content']
            else:
                print(f"API Error (attempt {attempt+1}): {response.status_code} - {response.text}")
                if attempt < max_retries - 1:
                    import time
                    time.sleep(2)  # Wait 2 seconds before retry
                continue
        except requests.exceptions.Timeout:
            print(f"Timeout error (attempt {attempt+1})")
            if attempt < max_retries - 1:
                import time
                time.sleep(2)
            continue
        except Exception as e:
            print(f"Request Error (attempt {attempt+1}): {e}")
            if attempt < max_retries - 1:
                import time
                time.sleep(2)
            continue
    
    return None

def extract_with_grok(text):
    """
    Trích xuất aspect và sentiment sử dụng Grok API
    """
    prompt = f"""
    Extract aspects and sentiments from the following review:
    {text}
    
    Available aspects: {', '.join(aspects)}
    Available sentiments: positive, negative, neutral
    
    Return ONLY a valid JSON object in this exact format:
    {{"results": [{{"aspect": "aspect_name", "sentiment": "sentiment_value"}}]}}
    """
    
    response_text = call_grok_api(prompt)
    
    if response_text:
        try:
            # Tìm JSON trong response
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != -1:
                json_str = response_text[start_idx:end_idx]
                parsed_output = json.loads(json_str)
                return parsed_output
            else:
                return {"results": []}
        except json.JSONDecodeError:
            print(f"JSON Parse Error: {response_text}")
            return {"results": []}
    else:
        return {"results": []}

predictions = []

print(f"Processing {len(test_json)} samples...")

for i, data in enumerate(tqdm(test_json)):
    try:
        prediction = extract_with_grok(data['text'])
        predictions.append(prediction)
        
        if (i + 1) % 10 == 0:
            temp_file = path.join(RESULT_DIR, f'temp_grok_restaurant_progress_{i+1}.json')
            with open(temp_file, 'w') as f:
                json.dump(predictions, f, indent=2, ensure_ascii=False)
            print(f"Saved progress at sample {i+1}")
            
    except KeyboardInterrupt:
        print(f"\nInterrupted at sample {i+1}. Saving current progress...")
        temp_file = path.join(RESULT_DIR, f'temp_grok_restaurant_interrupted_{i+1}.json')
        with open(temp_file, 'w') as f:
            json.dump(predictions, f, indent=2, ensure_ascii=False)
        print(f"Progress saved to {temp_file}")
        break
    except Exception as e:
        print(f"Error processing sample {i}: {e}")
        predictions.append({"results": []})  # Add empty result to maintain alignment

print(f"Completed processing {len(predictions)} samples")

Processing 100 samples...


 10%|█         | 10/100 [00:51<09:54,  6.61s/it]

Saved progress at sample 10


 16%|█▌        | 16/100 [01:29<09:38,  6.89s/it]

JSON Parse Error: {
 there! I've analyzed the review and extracted the aspects and their corresponding sentiments. Here's the result in the requested JSON format:

{
    "results": [
        {"aspect": "AMBIENCE#GENERAL", "sentiment": "positive"},
        {"aspect": "DRINKS#QUALITY", "sentiment": "positive"},
        {"aspect": "DRINKS#STYLE&OPTIONS", "sentiment": "positive"},
        {"aspect": "FOOD#QUALITY", "sentiment": "positive"},
        {"aspect": "FOOD#STYLE&OPTIONS", "sentiment": "positive"},
        {"aspect": "FOOD#PRICES", "sentiment": "positive"},
        {"aspect": "LOCATION#GENERAL", "sentiment": "positive"},
        {"aspect": "RESTAURANT#GENERAL", "sentiment": "positive"},
        {"aspect": "RESTAURANT#PRICES", "sentiment": "positive"},
        {"aspect": "SERVICE#GENERAL", "sentiment": "positive"}
    ]
}


 20%|██        | 20/100 [01:51<07:16,  5.45s/it]

Saved progress at sample 20


 30%|███       | 30/100 [02:37<07:12,  6.18s/it]

JSON Parse Error: {
, I’m happy to help with extracting aspects and sentiments from the review. Here’s the JSON object based on the provided text and available aspects/sentiments:

```json
{
  "results": [
    {"aspect": "RESTAURANT#GENERAL", "sentiment": "neutral"},
    {"aspect": "FOOD#STYLE&OPTIONS", "sentiment": "positive"},
    {"aspect": "FOOD#QUALITY", "sentiment": "neutral"},
    {"aspect": "FOOD#PRICES", "sentiment": "positive"}
  ]
}
```
Saved progress at sample 30


 40%|████      | 40/100 [03:25<04:41,  4.69s/it]

Saved progress at sample 40


 50%|█████     | 50/100 [04:13<05:47,  6.95s/it]

Saved progress at sample 50


 60%|██████    | 60/100 [05:04<03:32,  5.32s/it]

Saved progress at sample 60


 68%|██████▊   | 68/100 [05:53<04:53,  9.16s/it]

JSON Parse Error: {
thinking>
I will analyze the review and extract the relevant aspects and their corresponding sentiments based on the provided list of available aspects and sentiments. Let's break down the review:

1. "Quán ngay ngã ba Nguyễn Súy-Gò Dầu, hình như mới mở thì phải." - This refers to the location of the restaurant. The sentiment seems neutral as it just states a fact without strong opinion.
   - Aspect: LOCATION#GENERAL
   - Sentiment: neutral

2. "Bữa mình làm bài tập nhóm nên đi khá đông, quán vẫn có bàn rộng đủ chỗ cho cả nhóm 15 người ngồi." - This refers to the general restaurant experience, specifically the seating capacity. The sentiment is positive as it highlights the adequacy of space.
   - Aspect: RESTAURANT#GENERAL
   - Sentiment: positive

3. "Buổi trưa mở nhạc nhẹ, wifi mạnh (chắc vậy nên thấy nhiều dân văn phòng ngồi đây)." - This refers to the ambience of the restaurant, mentioning light music and strong wifi. The sentiment is positive as it implies a p

 70%|███████   | 70/100 [06:04<03:34,  7.14s/it]

Saved progress at sample 70


 80%|████████  | 80/100 [07:05<02:34,  7.72s/it]

Saved progress at sample 80


 90%|█████████ | 90/100 [07:57<00:49,  4.93s/it]

Saved progress at sample 90


100%|██████████| 100/100 [08:48<00:00,  5.28s/it]

Saved progress at sample 100
Completed processing 100 samples





In [24]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

{'Aspect Detection F1': 0.7789699520656579, 'Sentiment Classification F1': 0.684549351209315}


In [25]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_Zero-shot-Restaurant_Grok.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)