In [1]:
from os import path
import pandas as pd
import numpy as np
import json
import requests
from tqdm import tqdm
from dotenv import load_dotenv
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import os

# Định nghĩa đường dẫn
DATA_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\data"
RESULT_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\results"
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(RESULT_DIR, exist_ok=True)

  from .autonotebook import tqdm as notebook_tqdm





In [2]:
load_dotenv()

True

In [3]:
# CONFIG
ViABSA_BP_dir = path.join(DATA_DIR, 'ViABSA_Hotel')
test_file = path.join(ViABSA_BP_dir, 'data_test.csv')
test_df = pd.read_csv(test_file)

In [4]:
aspects = [
    "FACILITIES#CLEANLINESS",
    "FACILITIES#COMFORT",
    "FACILITIES#DESIGN&FEATURES",
    "FACILITIES#GENERAL",
    "FACILITIES#MISCELLANEOUS",
    "FACILITIES#PRICES",
    "FACILITIES#QUALITY",
    "FOOD&DRINKS#MISCELLANEOUS",
    "FOOD&DRINKS#PRICES",
    "FOOD&DRINKS#QUALITY",
    "FOOD&DRINKS#STYLE&OPTIONS",
    "HOTEL#CLEANLINESS",
    "HOTEL#COMFORT",
    "HOTEL#DESIGN&FEATURES",
    "HOTEL#GENERAL",
    "HOTEL#MISCELLANEOUS",
    "HOTEL#PRICES",
    "HOTEL#QUALITY",
    "LOCATION#GENERAL",
    "ROOMS#CLEANLINESS",
    "ROOMS#COMFORT",
    "ROOMS#DESIGN&FEATURES",
    "ROOMS#GENERAL",
    "ROOMS#MISCELLANEOUS",
    "ROOMS#PRICES",
    "ROOMS#QUALITY",
    "ROOM_AMENITIES#CLEANLINESS",
    "ROOM_AMENITIES#COMFORT",
    "ROOM_AMENITIES#DESIGN&FEATURES",
    "ROOM_AMENITIES#GENERAL",
    "ROOM_AMENITIES#MISCELLANEOUS",
    "ROOM_AMENITIES#PRICES",
    "ROOM_AMENITIES#QUALITY",
    "SERVICE#GENERAL"
]

In [5]:
sentiment_map = {
    1: "positive",
    2: "negative",
    3: "neutral"
}

def transform_aspect_sentiment(df, start=0, end=None):
    result = [] 
    
    if end is None:
        end = len(df)

    for idx, row in df.iloc[start:end].iterrows():
        entry = {
            "id": str(idx),
            "text": row['Review'],
            "sentiments": []
        }

        for aspect in aspects:
            sentiment = row[f"{aspect}_label"]
            if sentiment == 1:  # chỉ lấy những cái có sentiment
                aspect_sentiment_value = row[aspect]
                mapped_sent = sentiment_map.get(aspect_sentiment_value, "unknown")
                if aspect_sentiment_value != 'none':
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": mapped_sent
                    })
                else:
                    # nếu cột sentiment text bị none nhưng label == 1 thì có thể log ra kiểm tra
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": "unknown"
                    })

        result.append(entry)

    return result

In [6]:
# SETUP DATA
test_df[aspects] = test_df[aspects].fillna('none')

for aspect in aspects:
    test_df[aspect + '_label'] = (test_df[aspect] != 0).astype(int)

test_json = transform_aspect_sentiment(test_df, 0, 100)
test_json[:5]

[{'id': '0',
  'text': 'Ga giường không sạch, nhân viên quên dọn phòng một ngày.',
  'sentiments': [{'aspect': 'ROOM_AMENITIES#CLEANLINESS',
    'sentiment': 'negative'},
   {'aspect': 'SERVICE#GENERAL', 'sentiment': 'negative'}]},
 {'id': '1',
  'text': 'Nv nhiệt tình, phòng ở sạch sẽ, tiện nghi, vị trí khá thuận tiện cho việc di chuyển đến các địa điểm ăn + chơi Phòng có gián',
  'sentiments': [{'aspect': 'LOCATION#GENERAL', 'sentiment': 'positive'},
   {'aspect': 'ROOMS#CLEANLINESS', 'sentiment': 'neutral'},
   {'aspect': 'ROOMS#COMFORT', 'sentiment': 'positive'},
   {'aspect': 'SERVICE#GENERAL', 'sentiment': 'positive'}]},
 {'id': '2',
  'text': 'Đi bộ ra biển gần, tiện đi lại Phòng view biển nhưng cửa sổ view biển khá bé',
  'sentiments': [{'aspect': 'LOCATION#GENERAL', 'sentiment': 'positive'},
   {'aspect': 'ROOMS#DESIGN&FEATURES', 'sentiment': 'neutral'},
   {'aspect': 'ROOMS#GENERAL', 'sentiment': 'positive'}]},
 {'id': '3',
  'text': 'Tất cả mọi thứ đều sạch sẽ, giường 

In [57]:
'''
    Nếu dùng TD-IDF thì sẽ chỉ chọn dựa vào tần suất từ, không hiểu nghĩa của câu -> bị trùng lặp
    Semantic grouping để chọn example đa dạng về nội dung, không chỉ về các từ
'''
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(test_df['Review'].tolist(), show_progress_bar=True)

# KMeans clustering (70% đa dạng, 30% khó)
n_total = 5
n_diverse = int(n_total * 0.7)
n_hard = n_total - n_diverse

kmeans = KMeans(n_clusters=n_diverse, random_state=42, n_init=10)
kmeans.fit(embeddings)

# Lấy diverse example gần cluster centroid
closest, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_, embeddings)
diverse_examples = test_df.iloc[closest]

print(f"Selected {len(diverse_examples)} diverse examples:")
for i, row in diverse_examples.iterrows():
    print(f"\nExample {i}:\n{row['Review']}")

Batches: 100%|██████████| 19/19 [00:05<00:00,  3.20it/s]


Selected 3 diverse examples:

Example 545:
bữa sáng ngon,nhân viên vui vẻ,tận tình,đặc biết cô bé Ly rất chu đáo và tận tình. đồ ăn sáng hơi it món,phòng tắm quá nhỏ

Example 302:
Phòng sạch sẽ, thoải mái. Nhân viên vui vẻ thái độ phục vục nhiệt tình, thân thiện.

Example 358:
Tiện nghi đầy đủ thoải mái, nhân viên nhiệt tình và rất thân thiện. Tuy không phải khách sạn tốt nhất nhưng thái độ phục vụ làm mình rất ấn tượng và để lại dấu ấn tốt ở đây. Nhược điểm hơi ít phòng và tầng, hơn nữa khách sạn nằm trong con đường mới xây nên di chuyển hay gọi xe hầu như đều phải ra trục đường chính. View không đẹp lắm


In [20]:
def select_few_shot_examples(df, text_column, n_total=5, model_name='all-MiniLM-L6-v2', random_state=42):
    n_diverse = int(n_total * 0.7)
    n_hard = n_total - n_diverse

    model = SentenceTransformer(model_name)
    embeddings = model.encode(df[text_column].tolist(), show_progress_bar=True)

    kmeans = KMeans(n_clusters=n_diverse, random_state=random_state, n_init=10)
    kmeans.fit(embeddings)

    closest, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_, embeddings)
    diverse_df = df.iloc[closest]

    all_indices = np.arange(len(df))
    remaining_indices = list(set(all_indices) - set(closest))
    hard_df = df.iloc[remaining_indices].sample(n=n_hard, random_state=random_state)

    return diverse_df.reset_index(drop=True), hard_df.reset_index(drop=True)

# SET-UP FEW-SHOT EXAMPLES
diverse, hard = select_few_shot_examples(test_df, text_column='Review', n_total=10)

few_shot_json =  transform_aspect_sentiment(diverse, 0, 100) + transform_aspect_sentiment(hard, 0, 100)


Batches: 100%|██████████| 19/19 [00:04<00:00,  4.61it/s]


In [24]:
few_shot_json[:10]

[{'id': '0',
  'text': 'Nhân viên thân thiện, nhiệt tình Nhà tắm nhỏ, tiện nghi chưa ngăn nắp, phòng ốc bí.',
  'sentiments': [{'aspect': 'ROOMS#COMFORT', 'sentiment': 'negative'},
   {'aspect': 'ROOM_AMENITIES#DESIGN&FEATURES', 'sentiment': 'negative'},
   {'aspect': 'SERVICE#GENERAL', 'sentiment': 'positive'}]},
 {'id': '1',
  'text': 'Phòng sạch sẽ, đang khuyến mại nên giá rẻ. Sáng sớm và tối rất ầm ĩ, vì Khách sạn và nhà bên cạnh đang xây dựng, chính vì vậy nên giá phòng đang giảm giá. Lễ tân đứng tuổi lịch sự nhiệt tình. Lễ tân trẻ tuổi ban đầu không cho khách đặt đồ gửi trong phòng gửi đồ, lý do là sợ nhầm lẫn với đồ của khách khác trong phòng, lý do hết sức vô lý, phải đến khi khách ko đồng ý để ngoài góc sảnh vì có máy ảnh lễ tân mới mở phòng gửi đồ. Vị trí xa, xung quanh ít cửa hàng dịch vụ, bù lại xe máy có thể thuê ngay tại khách sạn',
  'sentiments': [{'aspect': 'FACILITIES#DESIGN&FEATURES',
    'sentiment': 'positive'},
   {'aspect': 'HOTEL#COMFORT', 'sentiment': 'negative

In [12]:
def evaluate_aspect_sentiment(ground_truth, predictions):
    # Chuẩn hóa dữ liệu thành list các tuple để so sánh
    true_aspects = []
    pred_aspects = []

    true_aspect_sentiments = []
    pred_aspect_sentiments = []

    for gt_entry, pred_entry in zip(ground_truth, predictions):
        # ground truth: list of sentiments
        gt_sents = gt_entry['sentiments']
        gt_aspect_set = set()
        gt_aspect_sentiment_set = set()

        for item in gt_sents:
            gt_aspect_set.add(item['aspect'])
            gt_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        true_aspects.append(gt_aspect_set)
        true_aspect_sentiments.append(gt_aspect_sentiment_set)

        # prediction: list of results
        pred_sents = pred_entry['results']
        pred_aspect_set = set()
        pred_aspect_sentiment_set = set()

        for item in pred_sents:
            pred_aspect_set.add(item['aspect'])
            pred_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        pred_aspects.append(pred_aspect_set)
        pred_aspect_sentiments.append(pred_aspect_sentiment_set)

    # Tính theo micro-F1 (gộp hết lại)
    all_true_aspects = set.union(*true_aspects) if true_aspects else set()
    all_pred_aspects = set.union(*pred_aspects) if pred_aspects else set()

    tp_aspect = sum(len(gt & pred) for gt, pred in zip(true_aspects, pred_aspects))
    fp_aspect = sum(len(pred - gt) for gt, pred in zip(true_aspects, pred_aspects))
    fn_aspect = sum(len(gt - pred) for gt, pred in zip(true_aspects, pred_aspects))

    precision_aspect = tp_aspect / (tp_aspect + fp_aspect + 1e-8)
    recall_aspect = tp_aspect / (tp_aspect + fn_aspect + 1e-8)
    f1_aspect = 2 * precision_aspect * recall_aspect / (precision_aspect + recall_aspect + 1e-8)

    # Tính cho sentiment classification
    tp_sentiment = sum(len(gt & pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fp_sentiment = sum(len(pred - gt) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fn_sentiment = sum(len(gt - pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))

    precision_sentiment = tp_sentiment / (tp_sentiment + fp_sentiment + 1e-8)
    recall_sentiment = tp_sentiment / (tp_sentiment + fn_sentiment + 1e-8)
    f1_sentiment = 2 * precision_sentiment * recall_sentiment / (precision_sentiment + recall_sentiment + 1e-8)

    return {
        "Aspect Detection F1": f1_aspect,
        "Sentiment Classification F1": f1_sentiment
    }


In [60]:
# Setup Grok API
def call_grok_api(prompt):
    url = "https://api.x.ai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.getenv('GROK_API_KEY')}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "grok-3",
        "messages": [
            {"role": "system", "content": "You are an AI assistant that extracts aspects and their sentiments from text."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }
    
    try:
        response = requests.post(url, headers=headers, json=data, timeout=30)
        if response.status_code == 200:
            result = response.json()
            return result['choices'][0]['message']['content']
        else:
            print(f"API Error: {response.status_code} - {response.text}")
            return None
    except Exception as e:
        print(f"Request Error: {e}")
        return None

def create_clustering_prompt(text, few_shot_examples):
    prompt = "You are an AI assistant that extracts aspects and their sentiments from text.\n\n"
    
    # Thêm few-shot examples
    for ex in few_shot_examples:
        prompt += f"Extract aspects and sentiments from the following review:\n{ex['text']}\n"
        prompt += f"Result: {json.dumps({'results': ex['sentiments']}, ensure_ascii=False)}\n\n"
    
    # Thêm câu hỏi hiện tại
    prompt += f"Extract aspects and sentiments from the following review:\n{text}\n"
    prompt += f"Available aspects: {', '.join(aspects)}\n"
    prompt += "Available sentiments: positive, negative, neutral\n"
    prompt += "Return ONLY a valid JSON object in this exact format:\n"
    prompt += '{"results": [{"aspect": "aspect_name", "sentiment": "sentiment_value"}]}'
    
    return prompt

def extract_with_grok_clustering(text, few_shot_examples):
    prompt = create_clustering_prompt(text, few_shot_examples)
    response_text = call_grok_api(prompt)
    
    if response_text:
        try:
            # Tìm JSON trong response
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != -1:
                json_str = response_text[start_idx:end_idx]
                parsed_output = json.loads(json_str)
                return parsed_output
            else:
                return {"results": []}
        except json.JSONDecodeError:
            print(f"JSON Parse Error: {response_text}")
            return {"results": []}
    else:
        return {"results": []}

predictions = []

# Dự đoán
for data in tqdm(test_json):
    prediction = extract_with_grok_clustering(data['text'], few_shot_json)
    predictions.append(prediction)

100%|██████████| 100/100 [04:05<00:00,  2.45s/it]


In [61]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

{'Aspect Detection F1': 0.7214285664238946, 'Sentiment Classification F1': 0.6571428521397108}


In [63]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_Few-Hotel-Grok.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)

k = 1 với ví dụ gần tâm

In [13]:
# Setup Grok API
def call_grok_api(prompt):
    url = "https://api.x.ai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.getenv('GROK_API_KEY')}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "grok-3",
        "messages": [
            {"role": "system", "content": "You are an AI assistant that extracts aspects and their sentiments from text."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }
    
    try:
        response = requests.post(url, headers=headers, json=data, timeout=30)
        if response.status_code == 200:
            result = response.json()
            return result['choices'][0]['message']['content']
        else:
            print(f"API Error: {response.status_code} - {response.text}")
            return None
    except Exception as e:
        print(f"Request Error: {e}")
        return None

def create_clustering_prompt(text, few_shot_examples):
    prompt = "You are an AI assistant that extracts aspects and their sentiments from text.\n\n"
    
    # Thêm few-shot examples
    for ex in few_shot_examples:
        prompt += f"Extract aspects and sentiments from the following review:\n{ex['text']}\n"
        prompt += f"Result: {json.dumps({'results': ex['sentiments']}, ensure_ascii=False)}\n\n"
    
    # Thêm câu hỏi hiện tại
    prompt += f"Extract aspects and sentiments from the following review:\n{text}\n"
    prompt += f"Available aspects: {', '.join(aspects)}\n"
    prompt += "Available sentiments: positive, negative, neutral\n"
    prompt += "Return ONLY a valid JSON object in this exact format:\n"
    prompt += '{"results": [{"aspect": "aspect_name", "sentiment": "sentiment_value"}]}'
    
    return prompt

def extract_with_grok_clustering(text, few_shot_examples):
    prompt = create_clustering_prompt(text, few_shot_examples)
    response_text = call_grok_api(prompt)
    
    if response_text:
        try:
            # Tìm JSON trong response
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != -1:
                json_str = response_text[start_idx:end_idx]
                parsed_output = json.loads(json_str)
                return parsed_output
            else:
                return {"results": []}
        except json.JSONDecodeError:
            print(f"JSON Parse Error: {response_text}")
            return {"results": []}
    else:
        return {"results": []}

predictions = []

# Dự đoán
for data in tqdm(test_json):
    prediction = extract_with_grok_clustering(data['text'], few_shot_json)
    predictions.append(prediction)

  9%|▉         | 9/100 [00:22<05:11,  3.42s/it]

JSON Parse Error: {" there! I'm happy to help with extracting aspects and sentiments from the provided review. Based on the text "Rộng rãi, sạch sẽ. Có chỗ trong phòng không bắt được wifi" and the available aspects and sentiments, here's the result in the requested JSON format:

{"results": [{"aspect": "ROOMS#COMFORT", "sentiment": "positive"}, {"aspect": "ROOMS#CLEANLINESS", "sentiment": "positive"}, {"aspect": "ROOM_AMENITIES#QUALITY", "sentiment": "negative"}]}


100%|██████████| 100/100 [05:57<00:00,  3.57s/it]


In [14]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

{'Aspect Detection F1': 0.709219853148316, 'Sentiment Classification F1': 0.6359338011405922}


In [15]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_Few-Hotel-Grok-N1-c.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)

k = 1 với ví dụ xa tâm

In [18]:
# Setup Grok API
def call_grok_api(prompt):
    url = "https://api.x.ai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.getenv('GROK_API_KEY')}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "grok-3",
        "messages": [
            {"role": "system", "content": "You are an AI assistant that extracts aspects and their sentiments from text."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }
    
    try:
        response = requests.post(url, headers=headers, json=data, timeout=30)
        if response.status_code == 200:
            result = response.json()
            return result['choices'][0]['message']['content']
        else:
            print(f"API Error: {response.status_code} - {response.text}")
            return None
    except Exception as e:
        print(f"Request Error: {e}")
        return None

def create_clustering_prompt(text, few_shot_examples):
    prompt = "You are an AI assistant that extracts aspects and their sentiments from text.\n\n"
    
    # Thêm few-shot examples
    for ex in few_shot_examples:
        prompt += f"Extract aspects and sentiments from the following review:\n{ex['text']}\n"
        prompt += f"Result: {json.dumps({'results': ex['sentiments']}, ensure_ascii=False)}\n\n"
    
    # Thêm câu hỏi hiện tại
    prompt += f"Extract aspects and sentiments from the following review:\n{text}\n"
    prompt += f"Available aspects: {', '.join(aspects)}\n"
    prompt += "Available sentiments: positive, negative, neutral\n"
    prompt += "Return ONLY a valid JSON object in this exact format:\n"
    prompt += '{"results": [{"aspect": "aspect_name", "sentiment": "sentiment_value"}]}'
    
    return prompt

def extract_with_grok_clustering(text, few_shot_examples):
    prompt = create_clustering_prompt(text, few_shot_examples)
    response_text = call_grok_api(prompt)
    
    if response_text:
        try:
            # Tìm JSON trong response
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != -1:
                json_str = response_text[start_idx:end_idx]
                parsed_output = json.loads(json_str)
                return parsed_output
            else:
                return {"results": []}
        except json.JSONDecodeError:
            print(f"JSON Parse Error: {response_text}")
            return {"results": []}
    else:
        return {"results": []}

predictions = []

# Dự đoán
for data in tqdm(test_json):
    prediction = extract_with_grok_clustering(data['text'], few_shot_json)
    predictions.append(prediction)

 73%|███████▎  | 73/100 [03:11<01:55,  4.28s/it]

JSON Parse Error: {" {"results": [
    {"aspect": "LOCATION#GENERAL", "sentiment": "positive"},
    {"aspect": "SERVICE#GENERAL", "sentiment": "positive"},
    {"aspect": "FOOD&DRINKS#QUALITY", "sentiment": "negative"},
    {"aspect": "FOOD&DRINKS#STYLE&OPTIONS", "sentiment": "negative"},
    {"aspect": "HOTEL#QUALITY", "sentiment": "negative"},
    {"aspect": "ROOM_AMENITIES#CLEANLINESS", "sentiment": "negative"},
    {"aspect": "ROOM_AMENITIES#QUALITY", "sentiment": "negative"}
  ]}


100%|██████████| 100/100 [06:03<00:00,  3.64s/it]


In [19]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

{'Aspect Detection F1': 0.7570754666882956, 'Sentiment Classification F1': 0.6910377308407963}


In [None]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_Few-Hotel-Grok-N1-f.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)

k = 10

In [25]:
# Setup Grok API
def call_grok_api(prompt):
    url = "https://api.x.ai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.getenv('GROK_API_KEY')}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "grok-3",
        "messages": [
            {"role": "system", "content": "You are an AI assistant that extracts aspects and their sentiments from text."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }
    
    try:
        response = requests.post(url, headers=headers, json=data, timeout=30)
        if response.status_code == 200:
            result = response.json()
            return result['choices'][0]['message']['content']
        else:
            print(f"API Error: {response.status_code} - {response.text}")
            return None
    except Exception as e:
        print(f"Request Error: {e}")
        return None

def create_clustering_prompt(text, few_shot_examples):
    prompt = "You are an AI assistant that extracts aspects and their sentiments from text.\n\n"
    
    # Thêm few-shot examples
    for ex in few_shot_examples:
        prompt += f"Extract aspects and sentiments from the following review:\n{ex['text']}\n"
        prompt += f"Result: {json.dumps({'results': ex['sentiments']}, ensure_ascii=False)}\n\n"
    
    # Thêm câu hỏi hiện tại
    prompt += f"Extract aspects and sentiments from the following review:\n{text}\n"
    prompt += f"Available aspects: {', '.join(aspects)}\n"
    prompt += "Available sentiments: positive, negative, neutral\n"
    prompt += "Return ONLY a valid JSON object in this exact format:\n"
    prompt += '{"results": [{"aspect": "aspect_name", "sentiment": "sentiment_value"}]}'
    
    return prompt

def extract_with_grok_clustering(text, few_shot_examples):
    prompt = create_clustering_prompt(text, few_shot_examples)
    response_text = call_grok_api(prompt)
    
    if response_text:
        try:
            # Tìm JSON trong response
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != -1:
                json_str = response_text[start_idx:end_idx]
                parsed_output = json.loads(json_str)
                return parsed_output
            else:
                return {"results": []}
        except json.JSONDecodeError:
            print(f"JSON Parse Error: {response_text}")
            return {"results": []}
    else:
        return {"results": []}

predictions = []

# Dự đoán
for data in tqdm(test_json):
    prediction = extract_with_grok_clustering(data['text'], few_shot_json)
    predictions.append(prediction)

 94%|█████████▍| 94/100 [05:01<00:13,  2.23s/it]

JSON Parse Error: {" there! I'm happy to help with extracting aspects and sentiments from the provided review. Here's the result in the exact JSON format you requested:

{"results": [{"aspect": "ROOMS#COMFORT", "sentiment": "positive"}, {"aspect": "ROOM_AMENITIES#GENERAL", "sentiment": "positive"}, {"aspect": "FOOD&DRINKS#STYLE&OPTIONS", "sentiment": "negative"}]}


100%|██████████| 100/100 [05:17<00:00,  3.17s/it]


In [None]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

{'Aspect Detection F1': 0.8624613505235967, 'Sentiment Classification F1': 0.7211177119997286}


In [27]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_Few-Hotel-Grok-N10.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)