In [1]:
from os import path
import pandas as pd
import numpy as np
import json
import requests
from tqdm import tqdm
from dotenv import load_dotenv
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import os

# ƒê·ªãnh nghƒ©a ƒë∆∞·ªùng d·∫´n
DATA_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\data"
RESULT_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\results"
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(RESULT_DIR, exist_ok=True)

  from .autonotebook import tqdm as notebook_tqdm





In [2]:
load_dotenv()

True

In [3]:
# CONFIG
ViABSA_BP_dir = path.join(DATA_DIR, 'ViABSA_Hotel')
test_file = path.join(ViABSA_BP_dir, 'data_test.csv')
test_df = pd.read_csv(test_file)

In [4]:
aspects = [
    "FACILITIES#CLEANLINESS",
    "FACILITIES#COMFORT",
    "FACILITIES#DESIGN&FEATURES",
    "FACILITIES#GENERAL",
    "FACILITIES#MISCELLANEOUS",
    "FACILITIES#PRICES",
    "FACILITIES#QUALITY",
    "FOOD&DRINKS#MISCELLANEOUS",
    "FOOD&DRINKS#PRICES",
    "FOOD&DRINKS#QUALITY",
    "FOOD&DRINKS#STYLE&OPTIONS",
    "HOTEL#CLEANLINESS",
    "HOTEL#COMFORT",
    "HOTEL#DESIGN&FEATURES",
    "HOTEL#GENERAL",
    "HOTEL#MISCELLANEOUS",
    "HOTEL#PRICES",
    "HOTEL#QUALITY",
    "LOCATION#GENERAL",
    "ROOMS#CLEANLINESS",
    "ROOMS#COMFORT",
    "ROOMS#DESIGN&FEATURES",
    "ROOMS#GENERAL",
    "ROOMS#MISCELLANEOUS",
    "ROOMS#PRICES",
    "ROOMS#QUALITY",
    "ROOM_AMENITIES#CLEANLINESS",
    "ROOM_AMENITIES#COMFORT",
    "ROOM_AMENITIES#DESIGN&FEATURES",
    "ROOM_AMENITIES#GENERAL",
    "ROOM_AMENITIES#MISCELLANEOUS",
    "ROOM_AMENITIES#PRICES",
    "ROOM_AMENITIES#QUALITY",
    "SERVICE#GENERAL"
]

In [5]:
sentiment_map = {
    1: "positive",
    2: "negative",
    3: "neutral"
}

def transform_aspect_sentiment(df, start=0, end=None):
    result = [] 
    
    if end is None:
        end = len(df)

    for idx, row in df.iloc[start:end].iterrows():
        entry = {
            "id": str(idx),
            "text": row['Review'],
            "sentiments": []
        }

        for aspect in aspects:
            sentiment = row[f"{aspect}_label"]
            if sentiment == 1:  # ch·ªâ l·∫•y nh·ªØng c√°i c√≥ sentiment
                aspect_sentiment_value = row[aspect]
                mapped_sent = sentiment_map.get(aspect_sentiment_value, "unknown")
                if aspect_sentiment_value != 'none':
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": mapped_sent
                    })
                else:
                    # n·∫øu c·ªôt sentiment text b·ªã none nh∆∞ng label == 1 th√¨ c√≥ th·ªÉ log ra ki·ªÉm tra
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": "unknown"
                    })

        result.append(entry)

    return result

In [6]:
# SETUP DATA
test_df[aspects] = test_df[aspects].fillna('none')

for aspect in aspects:
    test_df[aspect + '_label'] = (test_df[aspect] != 0).astype(int)

test_json = transform_aspect_sentiment(test_df, 0, 100)
test_json[:5]

[{'id': '0',
  'text': 'Ga gi∆∞∆°ÃÄng kh√¥ng saÃ£ch, nh√¢n vi√™n qu√™n doÃ£n phoÃÄng m√¥Ã£t ngaÃÄy.',
  'sentiments': [{'aspect': 'ROOM_AMENITIES#CLEANLINESS',
    'sentiment': 'negative'},
   {'aspect': 'SERVICE#GENERAL', 'sentiment': 'negative'}]},
 {'id': '1',
  'text': 'Nv nhi·ªát t√¨nh, ph√≤ng ·ªü s·∫°ch s·∫Ω, ti·ªán nghi, v·ªã tr√≠ kh√° thu·∫≠n ti·ªán cho vi·ªác di chuy·ªÉn ƒë·∫øn c√°c ƒë·ªãa ƒëi·ªÉm ƒÉn + ch∆°i Ph√≤ng c√≥ gi√°n',
  'sentiments': [{'aspect': 'LOCATION#GENERAL', 'sentiment': 'positive'},
   {'aspect': 'ROOMS#CLEANLINESS', 'sentiment': 'neutral'},
   {'aspect': 'ROOMS#COMFORT', 'sentiment': 'positive'},
   {'aspect': 'SERVICE#GENERAL', 'sentiment': 'positive'}]},
 {'id': '2',
  'text': 'ƒêi b·ªô ra bi·ªÉn g·∫ßn, ti·ªán ƒëi l·∫°i Ph√≤ng view bi·ªÉn nh∆∞ng c·ª≠a s·ªï view bi·ªÉn kh√° b√©',
  'sentiments': [{'aspect': 'LOCATION#GENERAL', 'sentiment': 'positive'},
   {'aspect': 'ROOMS#DESIGN&FEATURES', 'sentiment': 'neutral'},
   {'aspect': 'ROOMS#GENERAL', 'sentiment'

In [57]:
'''
    N·∫øu d√πng TD-IDF th√¨ s·∫Ω ch·ªâ ch·ªçn d·ª±a v√†o t·∫ßn su·∫•t t·ª´, kh√¥ng hi·ªÉu nghƒ©a c·ªßa c√¢u -> b·ªã tr√πng l·∫∑p
    Semantic grouping ƒë·ªÉ ch·ªçn example ƒëa d·∫°ng v·ªÅ n·ªôi dung, kh√¥ng ch·ªâ v·ªÅ c√°c t·ª´
'''
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(test_df['Review'].tolist(), show_progress_bar=True)

# KMeans clustering (70% ƒëa d·∫°ng, 30% kh√≥)
n_total = 5
n_diverse = int(n_total * 0.7)
n_hard = n_total - n_diverse

kmeans = KMeans(n_clusters=n_diverse, random_state=42, n_init=10)
kmeans.fit(embeddings)

# L·∫•y diverse example g·∫ßn cluster centroid
closest, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_, embeddings)
diverse_examples = test_df.iloc[closest]

print(f"Selected {len(diverse_examples)} diverse examples:")
for i, row in diverse_examples.iterrows():
    print(f"\nExample {i}:\n{row['Review']}")

Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 19/19 [00:05<00:00,  3.20it/s]


Selected 3 diverse examples:

Example 545:
b·ªØa s√°ng ngon,nh√¢n vi√™n vui v·∫ª,t·∫≠n t√¨nh,ƒë·∫∑c bi·∫øt c√¥ b√© Ly r·∫•t chu ƒë√°o v√† t·∫≠n t√¨nh. ƒë·ªì ƒÉn s√°ng h∆°i it m√≥n,ph√≤ng t·∫Øm qu√° nh·ªè

Example 302:
Ph√≤ng s·∫°ch s·∫Ω, tho·∫£i m√°i. Nh√¢n vi√™n vui v·∫ª th√°i ƒë·ªô ph·ª•c v·ª•c nhi·ªát t√¨nh, th√¢n thi·ªán.

Example 358:
Ti·ªán nghi ƒë·∫ßy ƒë·ªß tho·∫£i m√°i, nh√¢n vi√™n nhi·ªát t√¨nh v√† r·∫•t th√¢n thi·ªán. Tuy kh√¥ng ph·∫£i kh√°ch s·∫°n t·ªët nh·∫•t nh∆∞ng th√°i ƒë·ªô ph·ª•c v·ª• l√†m m√¨nh r·∫•t ·∫•n t∆∞·ª£ng v√† ƒë·ªÉ l·∫°i d·∫•u ·∫•n t·ªët ·ªü ƒë√¢y. Nh∆∞·ª£c ƒëi·ªÉm h∆°i √≠t ph√≤ng v√† t·∫ßng, h∆°n n·ªØa kh√°ch s·∫°n n·∫±m trong con ƒë∆∞·ªùng m·ªõi x√¢y n√™n di chuy·ªÉn hay g·ªçi xe h·∫ßu nh∆∞ ƒë·ªÅu ph·∫£i ra tr·ª•c ƒë∆∞·ªùng ch√≠nh. View kh√¥ng ƒë·∫πp l·∫Øm


In [20]:
def select_few_shot_examples(df, text_column, n_total=5, model_name='all-MiniLM-L6-v2', random_state=42):
    n_diverse = int(n_total * 0.7)
    n_hard = n_total - n_diverse

    model = SentenceTransformer(model_name)
    embeddings = model.encode(df[text_column].tolist(), show_progress_bar=True)

    kmeans = KMeans(n_clusters=n_diverse, random_state=random_state, n_init=10)
    kmeans.fit(embeddings)

    closest, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_, embeddings)
    diverse_df = df.iloc[closest]

    all_indices = np.arange(len(df))
    remaining_indices = list(set(all_indices) - set(closest))
    hard_df = df.iloc[remaining_indices].sample(n=n_hard, random_state=random_state)

    return diverse_df.reset_index(drop=True), hard_df.reset_index(drop=True)

# SET-UP FEW-SHOT EXAMPLES
diverse, hard = select_few_shot_examples(test_df, text_column='Review', n_total=10)

few_shot_json =  transform_aspect_sentiment(diverse, 0, 100) + transform_aspect_sentiment(hard, 0, 100)


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 19/19 [00:04<00:00,  4.61it/s]


In [24]:
few_shot_json[:10]

[{'id': '0',
  'text': 'Nh√¢n vi√™n th√¢n thi·ªán, nhi·ªát t√¨nh Nh√† t·∫Øm nh·ªè, ti·ªán nghi ch∆∞a ngƒÉn n·∫Øp, ph√≤ng ·ªëc b√≠.',
  'sentiments': [{'aspect': 'ROOMS#COMFORT', 'sentiment': 'negative'},
   {'aspect': 'ROOM_AMENITIES#DESIGN&FEATURES', 'sentiment': 'negative'},
   {'aspect': 'SERVICE#GENERAL', 'sentiment': 'positive'}]},
 {'id': '1',
  'text': 'Ph√≤ng s·∫°ch s·∫Ω, ƒëang khuy·∫øn m·∫°i n√™n gi√° r·∫ª. S√°ng s·ªõm v√† t·ªëi r·∫•t ·∫ßm ƒ©, v√¨ Kh√°ch s·∫°n v√† nh√† b√™n c·∫°nh ƒëang x√¢y d·ª±ng, ch√≠nh v√¨ v·∫≠y n√™n gi√° ph√≤ng ƒëang gi·∫£m gi√°. L·ªÖ t√¢n ƒë·ª©ng tu·ªïi l·ªãch s·ª± nhi·ªát t√¨nh. L·ªÖ t√¢n tr·∫ª tu·ªïi ban ƒë·∫ßu kh√¥ng cho kh√°ch ƒë·∫∑t ƒë·ªì g·ª≠i trong ph√≤ng g·ª≠i ƒë·ªì, l√Ω do l√† s·ª£ nh·∫ßm l·∫´n v·ªõi ƒë·ªì c·ªßa kh√°ch kh√°c trong ph√≤ng, l√Ω do h·∫øt s·ª©c v√¥ l√Ω, ph·∫£i ƒë·∫øn khi kh√°ch ko ƒë·ªìng √Ω ƒë·ªÉ ngo√†i g√≥c s·∫£nh v√¨ c√≥ m√°y ·∫£nh l·ªÖ t√¢n m·ªõi m·ªü ph√≤ng g·ª≠i ƒë·ªì. V·ªã tr√≠ xa, xung quanh √≠t c·ª≠a h√†ng d·ªãch v·ª•, b√π 

In [12]:
def evaluate_aspect_sentiment(ground_truth, predictions):
    # Chu·∫©n h√≥a d·ªØ li·ªáu th√†nh list c√°c tuple ƒë·ªÉ so s√°nh
    true_aspects = []
    pred_aspects = []

    true_aspect_sentiments = []
    pred_aspect_sentiments = []

    for gt_entry, pred_entry in zip(ground_truth, predictions):
        # ground truth: list of sentiments
        gt_sents = gt_entry['sentiments']
        gt_aspect_set = set()
        gt_aspect_sentiment_set = set()

        for item in gt_sents:
            gt_aspect_set.add(item['aspect'])
            gt_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        true_aspects.append(gt_aspect_set)
        true_aspect_sentiments.append(gt_aspect_sentiment_set)

        # prediction: list of results
        pred_sents = pred_entry['results']
        pred_aspect_set = set()
        pred_aspect_sentiment_set = set()

        for item in pred_sents:
            pred_aspect_set.add(item['aspect'])
            pred_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        pred_aspects.append(pred_aspect_set)
        pred_aspect_sentiments.append(pred_aspect_sentiment_set)

    # T√≠nh theo micro-F1 (g·ªôp h·∫øt l·∫°i)
    all_true_aspects = set.union(*true_aspects) if true_aspects else set()
    all_pred_aspects = set.union(*pred_aspects) if pred_aspects else set()

    tp_aspect = sum(len(gt & pred) for gt, pred in zip(true_aspects, pred_aspects))
    fp_aspect = sum(len(pred - gt) for gt, pred in zip(true_aspects, pred_aspects))
    fn_aspect = sum(len(gt - pred) for gt, pred in zip(true_aspects, pred_aspects))

    precision_aspect = tp_aspect / (tp_aspect + fp_aspect + 1e-8)
    recall_aspect = tp_aspect / (tp_aspect + fn_aspect + 1e-8)
    f1_aspect = 2 * precision_aspect * recall_aspect / (precision_aspect + recall_aspect + 1e-8)

    # T√≠nh cho sentiment classification
    tp_sentiment = sum(len(gt & pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fp_sentiment = sum(len(pred - gt) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fn_sentiment = sum(len(gt - pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))

    precision_sentiment = tp_sentiment / (tp_sentiment + fp_sentiment + 1e-8)
    recall_sentiment = tp_sentiment / (tp_sentiment + fn_sentiment + 1e-8)
    f1_sentiment = 2 * precision_sentiment * recall_sentiment / (precision_sentiment + recall_sentiment + 1e-8)

    return {
        "Aspect Detection F1": f1_aspect,
        "Sentiment Classification F1": f1_sentiment
    }


In [60]:
# Setup Grok API
def call_grok_api(prompt):
    url = "https://api.x.ai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.getenv('GROK_API_KEY')}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "grok-3",
        "messages": [
            {"role": "system", "content": "You are an AI assistant that extracts aspects and their sentiments from text."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }
    
    try:
        response = requests.post(url, headers=headers, json=data, timeout=30)
        if response.status_code == 200:
            result = response.json()
            return result['choices'][0]['message']['content']
        else:
            print(f"API Error: {response.status_code} - {response.text}")
            return None
    except Exception as e:
        print(f"Request Error: {e}")
        return None

def create_clustering_prompt(text, few_shot_examples):
    prompt = "You are an AI assistant that extracts aspects and their sentiments from text.\n\n"
    
    # Th√™m few-shot examples
    for ex in few_shot_examples:
        prompt += f"Extract aspects and sentiments from the following review:\n{ex['text']}\n"
        prompt += f"Result: {json.dumps({'results': ex['sentiments']}, ensure_ascii=False)}\n\n"
    
    # Th√™m c√¢u h·ªèi hi·ªán t·∫°i
    prompt += f"Extract aspects and sentiments from the following review:\n{text}\n"
    prompt += f"Available aspects: {', '.join(aspects)}\n"
    prompt += "Available sentiments: positive, negative, neutral\n"
    prompt += "Return ONLY a valid JSON object in this exact format:\n"
    prompt += '{"results": [{"aspect": "aspect_name", "sentiment": "sentiment_value"}]}'
    
    return prompt

def extract_with_grok_clustering(text, few_shot_examples):
    prompt = create_clustering_prompt(text, few_shot_examples)
    response_text = call_grok_api(prompt)
    
    if response_text:
        try:
            # T√¨m JSON trong response
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != -1:
                json_str = response_text[start_idx:end_idx]
                parsed_output = json.loads(json_str)
                return parsed_output
            else:
                return {"results": []}
        except json.JSONDecodeError:
            print(f"JSON Parse Error: {response_text}")
            return {"results": []}
    else:
        return {"results": []}

predictions = []

# D·ª± ƒëo√°n
for data in tqdm(test_json):
    prediction = extract_with_grok_clustering(data['text'], few_shot_json)
    predictions.append(prediction)

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [04:05<00:00,  2.45s/it]


In [61]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

{'Aspect Detection F1': 0.7214285664238946, 'Sentiment Classification F1': 0.6571428521397108}


In [63]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_Few-Hotel-Grok.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)

k = 1 v·ªõi v√≠ d·ª• g·∫ßn t√¢m

In [13]:
# Setup Grok API
def call_grok_api(prompt):
    url = "https://api.x.ai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.getenv('GROK_API_KEY')}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "grok-3",
        "messages": [
            {"role": "system", "content": "You are an AI assistant that extracts aspects and their sentiments from text."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }
    
    try:
        response = requests.post(url, headers=headers, json=data, timeout=30)
        if response.status_code == 200:
            result = response.json()
            return result['choices'][0]['message']['content']
        else:
            print(f"API Error: {response.status_code} - {response.text}")
            return None
    except Exception as e:
        print(f"Request Error: {e}")
        return None

def create_clustering_prompt(text, few_shot_examples):
    prompt = "You are an AI assistant that extracts aspects and their sentiments from text.\n\n"
    
    # Th√™m few-shot examples
    for ex in few_shot_examples:
        prompt += f"Extract aspects and sentiments from the following review:\n{ex['text']}\n"
        prompt += f"Result: {json.dumps({'results': ex['sentiments']}, ensure_ascii=False)}\n\n"
    
    # Th√™m c√¢u h·ªèi hi·ªán t·∫°i
    prompt += f"Extract aspects and sentiments from the following review:\n{text}\n"
    prompt += f"Available aspects: {', '.join(aspects)}\n"
    prompt += "Available sentiments: positive, negative, neutral\n"
    prompt += "Return ONLY a valid JSON object in this exact format:\n"
    prompt += '{"results": [{"aspect": "aspect_name", "sentiment": "sentiment_value"}]}'
    
    return prompt

def extract_with_grok_clustering(text, few_shot_examples):
    prompt = create_clustering_prompt(text, few_shot_examples)
    response_text = call_grok_api(prompt)
    
    if response_text:
        try:
            # T√¨m JSON trong response
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != -1:
                json_str = response_text[start_idx:end_idx]
                parsed_output = json.loads(json_str)
                return parsed_output
            else:
                return {"results": []}
        except json.JSONDecodeError:
            print(f"JSON Parse Error: {response_text}")
            return {"results": []}
    else:
        return {"results": []}

predictions = []

# D·ª± ƒëo√°n
for data in tqdm(test_json):
    prediction = extract_with_grok_clustering(data['text'], few_shot_json)
    predictions.append(prediction)

  9%|‚ñâ         | 9/100 [00:22<05:11,  3.42s/it]

JSON Parse Error: {" there! I'm happy to help with extracting aspects and sentiments from the provided review. Based on the text "R·ªông r√£i, s·∫°ch s·∫Ω. C√≥ ch·ªó trong ph√≤ng kh√¥ng b·∫Øt ƒë∆∞·ª£c wifi" and the available aspects and sentiments, here's the result in the requested JSON format:

{"results": [{"aspect": "ROOMS#COMFORT", "sentiment": "positive"}, {"aspect": "ROOMS#CLEANLINESS", "sentiment": "positive"}, {"aspect": "ROOM_AMENITIES#QUALITY", "sentiment": "negative"}]}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [05:57<00:00,  3.57s/it]


In [14]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

{'Aspect Detection F1': 0.709219853148316, 'Sentiment Classification F1': 0.6359338011405922}


In [15]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_Few-Hotel-Grok-N1-c.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)

k = 1 v·ªõi v√≠ d·ª• xa t√¢m

In [18]:
# Setup Grok API
def call_grok_api(prompt):
    url = "https://api.x.ai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.getenv('GROK_API_KEY')}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "grok-3",
        "messages": [
            {"role": "system", "content": "You are an AI assistant that extracts aspects and their sentiments from text."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }
    
    try:
        response = requests.post(url, headers=headers, json=data, timeout=30)
        if response.status_code == 200:
            result = response.json()
            return result['choices'][0]['message']['content']
        else:
            print(f"API Error: {response.status_code} - {response.text}")
            return None
    except Exception as e:
        print(f"Request Error: {e}")
        return None

def create_clustering_prompt(text, few_shot_examples):
    prompt = "You are an AI assistant that extracts aspects and their sentiments from text.\n\n"
    
    # Th√™m few-shot examples
    for ex in few_shot_examples:
        prompt += f"Extract aspects and sentiments from the following review:\n{ex['text']}\n"
        prompt += f"Result: {json.dumps({'results': ex['sentiments']}, ensure_ascii=False)}\n\n"
    
    # Th√™m c√¢u h·ªèi hi·ªán t·∫°i
    prompt += f"Extract aspects and sentiments from the following review:\n{text}\n"
    prompt += f"Available aspects: {', '.join(aspects)}\n"
    prompt += "Available sentiments: positive, negative, neutral\n"
    prompt += "Return ONLY a valid JSON object in this exact format:\n"
    prompt += '{"results": [{"aspect": "aspect_name", "sentiment": "sentiment_value"}]}'
    
    return prompt

def extract_with_grok_clustering(text, few_shot_examples):
    prompt = create_clustering_prompt(text, few_shot_examples)
    response_text = call_grok_api(prompt)
    
    if response_text:
        try:
            # T√¨m JSON trong response
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != -1:
                json_str = response_text[start_idx:end_idx]
                parsed_output = json.loads(json_str)
                return parsed_output
            else:
                return {"results": []}
        except json.JSONDecodeError:
            print(f"JSON Parse Error: {response_text}")
            return {"results": []}
    else:
        return {"results": []}

predictions = []

# D·ª± ƒëo√°n
for data in tqdm(test_json):
    prediction = extract_with_grok_clustering(data['text'], few_shot_json)
    predictions.append(prediction)

 73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 73/100 [03:11<01:55,  4.28s/it]

JSON Parse Error: {" {"results": [
    {"aspect": "LOCATION#GENERAL", "sentiment": "positive"},
    {"aspect": "SERVICE#GENERAL", "sentiment": "positive"},
    {"aspect": "FOOD&DRINKS#QUALITY", "sentiment": "negative"},
    {"aspect": "FOOD&DRINKS#STYLE&OPTIONS", "sentiment": "negative"},
    {"aspect": "HOTEL#QUALITY", "sentiment": "negative"},
    {"aspect": "ROOM_AMENITIES#CLEANLINESS", "sentiment": "negative"},
    {"aspect": "ROOM_AMENITIES#QUALITY", "sentiment": "negative"}
  ]}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [06:03<00:00,  3.64s/it]


In [19]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

{'Aspect Detection F1': 0.7570754666882956, 'Sentiment Classification F1': 0.6910377308407963}


In [None]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_Few-Hotel-Grok-N1-f.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)

k = 10

In [25]:
# Setup Grok API
def call_grok_api(prompt):
    url = "https://api.x.ai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.getenv('GROK_API_KEY')}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "grok-3",
        "messages": [
            {"role": "system", "content": "You are an AI assistant that extracts aspects and their sentiments from text."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }
    
    try:
        response = requests.post(url, headers=headers, json=data, timeout=30)
        if response.status_code == 200:
            result = response.json()
            return result['choices'][0]['message']['content']
        else:
            print(f"API Error: {response.status_code} - {response.text}")
            return None
    except Exception as e:
        print(f"Request Error: {e}")
        return None

def create_clustering_prompt(text, few_shot_examples):
    prompt = "You are an AI assistant that extracts aspects and their sentiments from text.\n\n"
    
    # Th√™m few-shot examples
    for ex in few_shot_examples:
        prompt += f"Extract aspects and sentiments from the following review:\n{ex['text']}\n"
        prompt += f"Result: {json.dumps({'results': ex['sentiments']}, ensure_ascii=False)}\n\n"
    
    # Th√™m c√¢u h·ªèi hi·ªán t·∫°i
    prompt += f"Extract aspects and sentiments from the following review:\n{text}\n"
    prompt += f"Available aspects: {', '.join(aspects)}\n"
    prompt += "Available sentiments: positive, negative, neutral\n"
    prompt += "Return ONLY a valid JSON object in this exact format:\n"
    prompt += '{"results": [{"aspect": "aspect_name", "sentiment": "sentiment_value"}]}'
    
    return prompt

def extract_with_grok_clustering(text, few_shot_examples):
    prompt = create_clustering_prompt(text, few_shot_examples)
    response_text = call_grok_api(prompt)
    
    if response_text:
        try:
            # T√¨m JSON trong response
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != -1:
                json_str = response_text[start_idx:end_idx]
                parsed_output = json.loads(json_str)
                return parsed_output
            else:
                return {"results": []}
        except json.JSONDecodeError:
            print(f"JSON Parse Error: {response_text}")
            return {"results": []}
    else:
        return {"results": []}

predictions = []

# D·ª± ƒëo√°n
for data in tqdm(test_json):
    prediction = extract_with_grok_clustering(data['text'], few_shot_json)
    predictions.append(prediction)

 94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 94/100 [05:01<00:13,  2.23s/it]

JSON Parse Error: {" there! I'm happy to help with extracting aspects and sentiments from the provided review. Here's the result in the exact JSON format you requested:

{"results": [{"aspect": "ROOMS#COMFORT", "sentiment": "positive"}, {"aspect": "ROOM_AMENITIES#GENERAL", "sentiment": "positive"}, {"aspect": "FOOD&DRINKS#STYLE&OPTIONS", "sentiment": "negative"}]}


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [05:17<00:00,  3.17s/it]


In [None]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

{'Aspect Detection F1': 0.8624613505235967, 'Sentiment Classification F1': 0.7211177119997286}


In [27]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_Few-Hotel-Grok-N10.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)