In [11]:
from os import path
import pandas as pd
import numpy as np
import json
import requests
from tqdm import tqdm
from dotenv import load_dotenv
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from sentence_transformers import SentenceTransformer
import os

# Định nghĩa đường dẫn
DATA_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\data"
RESULT_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\results"
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(RESULT_DIR, exist_ok=True)

In [12]:
load_dotenv()

True

In [13]:
# CONFIG
ViABSA_BP_dir = path.join(DATA_DIR, 'ViABSA_Hotel')
test_file = path.join(ViABSA_BP_dir, 'data_test.csv')
test_df = pd.read_csv(test_file)

In [14]:
aspects = [
    "FACILITIES#CLEANLINESS",
    "FACILITIES#COMFORT",
    "FACILITIES#DESIGN&FEATURES",
    "FACILITIES#GENERAL",
    "FACILITIES#MISCELLANEOUS",
    "FACILITIES#PRICES",
    "FACILITIES#QUALITY",
    "FOOD&DRINKS#MISCELLANEOUS",
    "FOOD&DRINKS#PRICES",
    "FOOD&DRINKS#QUALITY",
    "FOOD&DRINKS#STYLE&OPTIONS",
    "HOTEL#CLEANLINESS",
    "HOTEL#COMFORT",
    "HOTEL#DESIGN&FEATURES",
    "HOTEL#GENERAL",
    "HOTEL#MISCELLANEOUS",
    "HOTEL#PRICES",
    "HOTEL#QUALITY",
    "LOCATION#GENERAL",
    "ROOMS#CLEANLINESS",
    "ROOMS#COMFORT",
    "ROOMS#DESIGN&FEATURES",
    "ROOMS#GENERAL",
    "ROOMS#MISCELLANEOUS",
    "ROOMS#PRICES",
    "ROOMS#QUALITY",
    "ROOM_AMENITIES#CLEANLINESS",
    "ROOM_AMENITIES#COMFORT",
    "ROOM_AMENITIES#DESIGN&FEATURES",
    "ROOM_AMENITIES#GENERAL",
    "ROOM_AMENITIES#MISCELLANEOUS",
    "ROOM_AMENITIES#PRICES",
    "ROOM_AMENITIES#QUALITY",
    "SERVICE#GENERAL"
]

In [15]:
sentiment_map = {
    1: "positive",
    2: "negative",
    3: "neutral"
}

def transform_aspect_sentiment(df, start=0, end=None):
    result = [] 
    
    if end is None:
        end = len(df)

    for idx, row in df.iloc[start:end].iterrows():
        entry = {
            "id": str(idx),
            "text": row['Review'],
            "sentiments": []
        }

        for aspect in aspects:
            sentiment = row[f"{aspect}_label"]
            if sentiment == 1:  # chỉ lấy những cái có sentiment
                aspect_sentiment_value = row[aspect]
                mapped_sent = sentiment_map.get(aspect_sentiment_value, "unknown")
                if aspect_sentiment_value != 'none':
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": mapped_sent
                    })
                else:
                    # nếu cột sentiment text bị none nhưng label == 1 thì có thể log ra kiểm tra
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": "unknown"
                    })

        result.append(entry)

    return result

In [16]:
# SETUP DATA
test_df[aspects] = test_df[aspects].fillna('none')

for aspect in aspects:
    test_df[aspect + '_label'] = (test_df[aspect] != 0).astype(int)

test_json = transform_aspect_sentiment(test_df, 0, 100)
test_json[:5]

[{'id': '0',
  'text': 'Ga giường không sạch, nhân viên quên dọn phòng một ngày.',
  'sentiments': [{'aspect': 'ROOM_AMENITIES#CLEANLINESS',
    'sentiment': 'negative'},
   {'aspect': 'SERVICE#GENERAL', 'sentiment': 'negative'}]},
 {'id': '1',
  'text': 'Nv nhiệt tình, phòng ở sạch sẽ, tiện nghi, vị trí khá thuận tiện cho việc di chuyển đến các địa điểm ăn + chơi Phòng có gián',
  'sentiments': [{'aspect': 'LOCATION#GENERAL', 'sentiment': 'positive'},
   {'aspect': 'ROOMS#CLEANLINESS', 'sentiment': 'neutral'},
   {'aspect': 'ROOMS#COMFORT', 'sentiment': 'positive'},
   {'aspect': 'SERVICE#GENERAL', 'sentiment': 'positive'}]},
 {'id': '2',
  'text': 'Đi bộ ra biển gần, tiện đi lại Phòng view biển nhưng cửa sổ view biển khá bé',
  'sentiments': [{'aspect': 'LOCATION#GENERAL', 'sentiment': 'positive'},
   {'aspect': 'ROOMS#DESIGN&FEATURES', 'sentiment': 'neutral'},
   {'aspect': 'ROOMS#GENERAL', 'sentiment': 'positive'}]},
 {'id': '3',
  'text': 'Tất cả mọi thứ đều sạch sẽ, giường 

In [17]:
def evaluate_aspect_sentiment(ground_truth, predictions):
    # Chuẩn hóa dữ liệu thành list các tuple để so sánh
    true_aspects = []
    pred_aspects = []

    true_aspect_sentiments = []
    pred_aspect_sentiments = []

    for gt_entry, pred_entry in zip(ground_truth, predictions):
        # ground truth: list of sentiments
        gt_sents = gt_entry['sentiments']
        gt_aspect_set = set()
        gt_aspect_sentiment_set = set()

        for item in gt_sents:
            gt_aspect_set.add(item['aspect'])
            gt_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        true_aspects.append(gt_aspect_set)
        true_aspect_sentiments.append(gt_aspect_sentiment_set)

        # prediction: list of results
        pred_sents = pred_entry['results']
        pred_aspect_set = set()
        pred_aspect_sentiment_set = set()

        for item in pred_sents:
            pred_aspect_set.add(item['aspect'])
            pred_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        pred_aspects.append(pred_aspect_set)
        pred_aspect_sentiments.append(pred_aspect_sentiment_set)

    # Tính theo micro-F1 (gộp hết lại)
    all_true_aspects = set.union(*true_aspects) if true_aspects else set()
    all_pred_aspects = set.union(*pred_aspects) if pred_aspects else set()

    tp_aspect = sum(len(gt & pred) for gt, pred in zip(true_aspects, pred_aspects))
    fp_aspect = sum(len(pred - gt) for gt, pred in zip(true_aspects, pred_aspects))
    fn_aspect = sum(len(gt - pred) for gt, pred in zip(true_aspects, pred_aspects))

    precision_aspect = tp_aspect / (tp_aspect + fp_aspect + 1e-8)
    recall_aspect = tp_aspect / (tp_aspect + fn_aspect + 1e-8)
    f1_aspect = 2 * precision_aspect * recall_aspect / (precision_aspect + recall_aspect + 1e-8)

    # Tính cho sentiment classification
    tp_sentiment = sum(len(gt & pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fp_sentiment = sum(len(pred - gt) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fn_sentiment = sum(len(gt - pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))

    precision_sentiment = tp_sentiment / (tp_sentiment + fp_sentiment + 1e-8)
    recall_sentiment = tp_sentiment / (tp_sentiment + fn_sentiment + 1e-8)
    f1_sentiment = 2 * precision_sentiment * recall_sentiment / (precision_sentiment + recall_sentiment + 1e-8)

    return {
        "Aspect Detection F1": f1_aspect,
        "Sentiment Classification F1": f1_sentiment
    }


In [18]:

# Setup Grok API
def call_grok_api(prompt):
    url = "https://api.x.ai/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.getenv('GROK_API_KEY')}",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": "grok-3",
        "messages": [
            {"role": "system", "content": "You are an AI assistant that extracts aspects and their sentiments from text. Think step by step exactly."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }
    
    try:
        response = requests.post(url, headers=headers, json=data, timeout=30)
        if response.status_code == 200:
            result = response.json()
            return result['choices'][0]['message']['content']
        else:
            print(f"API Error: {response.status_code} - {response.text}")
            return None
    except Exception as e:
        print(f"Request Error: {e}")
        return None

def extract_with_grok(text):
    """
    Trích xuất aspect và sentiment sử dụng Grok API
    """
    prompt = f"""
    Extract aspects and sentiments from the following review:
    {text}
    
    Available aspects: {', '.join(aspects)}
    Available sentiments: positive, negative, neutral
    
    Return ONLY a valid JSON object in this exact format:
    {{"results": [{{"aspect": "aspect_name", "sentiment": "sentiment_value"}}]}}
    """
    
    response_text = call_grok_api(prompt)
    
    if response_text:
        try:
            # Tìm JSON trong response
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != -1:
                json_str = response_text[start_idx:end_idx]
                parsed_output = json.loads(json_str)
                return parsed_output
            else:
                return {"results": []}
        except json.JSONDecodeError:
            print(f"JSON Parse Error: {response_text}")
            return {"results": []}
    else:
        return {"results": []}

predictions = []

for data in tqdm(test_json):
    prediction = extract_with_grok(data['text'])
    predictions.append(prediction)

 51%|█████     | 51/100 [03:42<06:29,  7.95s/it]

JSON Parse Error: {
, I’m going to break this down step by step to extract the aspects and their corresponding sentiments from the provided review text. I’ll analyze each part of the review, match it to the available aspects, determine the sentiment, and then format the output as a JSON object.

**Step 1: Analyze the review text sentence by sentence.**

- "Địa thế khách sạn rất đẹp, view tốt, gần các chỗ vui chơi."
  - This talks about the location of the hotel, describing it as beautiful, with a good view and close to entertainment spots. This relates to the aspect "LOCATION#GENERAL" and the sentiment is positive.
  
- "Nhân viên rất thân thiện và nhiệt tình. Đặc biệt là bác bảo vệ cực đáng yêu."
  - This refers to the staff being friendly and enthusiastic, with a special mention of a lovable security guard. This corresponds to "SERVICE#GENERAL" with a positive sentiment.
  
- "Phòng hơi nhỏ tuy nhiên bài trí rất hợp lí."
  - This mentions the room being a bit small but well-arranged.

100%|██████████| 100/100 [07:27<00:00,  4.48s/it]


In [20]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

{'Aspect Detection F1': 0.6690997517013575, 'Sentiment Classification F1': 0.6075334093487214}


In [21]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_CoT-Hotel_Grok.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)