In [1]:
from os import path
import pandas as pd
import numpy as np
import json
from openai import OpenAI
from tqdm import tqdm
from dotenv import load_dotenv
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from sentence_transformers import SentenceTransformer
import os

DATA_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\data"
RESULT_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\results"
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(RESULT_DIR, exist_ok=True)

  from .autonotebook import tqdm as notebook_tqdm





In [2]:
load_dotenv()

True

In [3]:
# CONFIG
ViABSA_BP_dir = path.join(DATA_DIR, 'ViABSA_Hotel')
test_file = path.join(ViABSA_BP_dir, 'data_test.csv')
test_df = pd.read_csv(test_file)

In [4]:
aspects = [
    "FACILITIES#CLEANLINESS",
    "FACILITIES#COMFORT",
    "FACILITIES#DESIGN&FEATURES",
    "FACILITIES#GENERAL",
    "FACILITIES#MISCELLANEOUS",
    "FACILITIES#PRICES",
    "FACILITIES#QUALITY",
    "FOOD&DRINKS#MISCELLANEOUS",
    "FOOD&DRINKS#PRICES",
    "FOOD&DRINKS#QUALITY",
    "FOOD&DRINKS#STYLE&OPTIONS",
    "HOTEL#CLEANLINESS",
    "HOTEL#COMFORT",
    "HOTEL#DESIGN&FEATURES",
    "HOTEL#GENERAL",
    "HOTEL#MISCELLANEOUS",
    "HOTEL#PRICES",
    "HOTEL#QUALITY",
    "LOCATION#GENERAL",
    "ROOMS#CLEANLINESS",
    "ROOMS#COMFORT",
    "ROOMS#DESIGN&FEATURES",
    "ROOMS#GENERAL",
    "ROOMS#MISCELLANEOUS",
    "ROOMS#PRICES",
    "ROOMS#QUALITY",
    "ROOM_AMENITIES#CLEANLINESS",
    "ROOM_AMENITIES#COMFORT",
    "ROOM_AMENITIES#DESIGN&FEATURES",
    "ROOM_AMENITIES#GENERAL",
    "ROOM_AMENITIES#MISCELLANEOUS",
    "ROOM_AMENITIES#PRICES",
    "ROOM_AMENITIES#QUALITY",
    "SERVICE#GENERAL"
]

In [5]:
sentiment_map = {
    1: "positive",
    2: "negative",
    3: "neutral"
}

def transform_aspect_sentiment(df, start=0, end=None):
    result = [] 
    
    if end is None:
        end = len(df)

    for idx, row in df.iloc[start:end].iterrows():
        entry = {
            "id": str(idx),
            "text": row['Review'],
            "sentiments": []
        }

        for aspect in aspects:
            sentiment = row[f"{aspect}_label"]
            if sentiment == 1:  # chỉ lấy những cái có sentiment
                aspect_sentiment_value = row[aspect]
                mapped_sent = sentiment_map.get(aspect_sentiment_value, "unknown")
                if aspect_sentiment_value != 'none':
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": mapped_sent
                    })
                else:
                    # nếu cột sentiment text bị none nhưng label == 1 thì có thể log ra kiểm tra
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": "unknown"
                    })

        result.append(entry)

    return result

In [6]:
# SETUP DATA
test_df[aspects] = test_df[aspects].fillna('none')

for aspect in aspects:
    test_df[aspect + '_label'] = (test_df[aspect] != 0).astype(int)

test_json = transform_aspect_sentiment(test_df, 0, 100)
test_json[:5]

[{'id': '0',
  'text': 'Ga giường không sạch, nhân viên quên dọn phòng một ngày.',
  'sentiments': [{'aspect': 'ROOM_AMENITIES#CLEANLINESS',
    'sentiment': 'negative'},
   {'aspect': 'SERVICE#GENERAL', 'sentiment': 'negative'}]},
 {'id': '1',
  'text': 'Nv nhiệt tình, phòng ở sạch sẽ, tiện nghi, vị trí khá thuận tiện cho việc di chuyển đến các địa điểm ăn + chơi Phòng có gián',
  'sentiments': [{'aspect': 'LOCATION#GENERAL', 'sentiment': 'positive'},
   {'aspect': 'ROOMS#CLEANLINESS', 'sentiment': 'neutral'},
   {'aspect': 'ROOMS#COMFORT', 'sentiment': 'positive'},
   {'aspect': 'SERVICE#GENERAL', 'sentiment': 'positive'}]},
 {'id': '2',
  'text': 'Đi bộ ra biển gần, tiện đi lại Phòng view biển nhưng cửa sổ view biển khá bé',
  'sentiments': [{'aspect': 'LOCATION#GENERAL', 'sentiment': 'positive'},
   {'aspect': 'ROOMS#DESIGN&FEATURES', 'sentiment': 'neutral'},
   {'aspect': 'ROOMS#GENERAL', 'sentiment': 'positive'}]},
 {'id': '3',
  'text': 'Tất cả mọi thứ đều sạch sẽ, giường 

In [7]:
def evaluate_aspect_sentiment(ground_truth, predictions):
    # Chuẩn hóa dữ liệu thành list các tuple để so sánh
    true_aspects = []
    pred_aspects = []

    true_aspect_sentiments = []
    pred_aspect_sentiments = []

    for gt_entry, pred_entry in zip(ground_truth, predictions):
        # ground truth: list of sentiments
        gt_sents = gt_entry['sentiments']
        gt_aspect_set = set()
        gt_aspect_sentiment_set = set()

        for item in gt_sents:
            gt_aspect_set.add(item['aspect'])
            gt_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        true_aspects.append(gt_aspect_set)
        true_aspect_sentiments.append(gt_aspect_sentiment_set)

        # prediction: list of results
        pred_sents = pred_entry['results']
        pred_aspect_set = set()
        pred_aspect_sentiment_set = set()

        for item in pred_sents:
            pred_aspect_set.add(item['aspect'])
            pred_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        pred_aspects.append(pred_aspect_set)
        pred_aspect_sentiments.append(pred_aspect_sentiment_set)

    # Tính theo micro-F1 (gộp hết lại)
    all_true_aspects = set.union(*true_aspects) if true_aspects else set()
    all_pred_aspects = set.union(*pred_aspects) if pred_aspects else set()

    tp_aspect = sum(len(gt & pred) for gt, pred in zip(true_aspects, pred_aspects))
    fp_aspect = sum(len(pred - gt) for gt, pred in zip(true_aspects, pred_aspects))
    fn_aspect = sum(len(gt - pred) for gt, pred in zip(true_aspects, pred_aspects))

    precision_aspect = tp_aspect / (tp_aspect + fp_aspect + 1e-8)
    recall_aspect = tp_aspect / (tp_aspect + fn_aspect + 1e-8)
    f1_aspect = 2 * precision_aspect * recall_aspect / (precision_aspect + recall_aspect + 1e-8)

    # Tính cho sentiment classification
    tp_sentiment = sum(len(gt & pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fp_sentiment = sum(len(pred - gt) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fn_sentiment = sum(len(gt - pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))

    precision_sentiment = tp_sentiment / (tp_sentiment + fp_sentiment + 1e-8)
    recall_sentiment = tp_sentiment / (tp_sentiment + fn_sentiment + 1e-8)
    f1_sentiment = 2 * precision_sentiment * recall_sentiment / (precision_sentiment + recall_sentiment + 1e-8)

    return {
        "Aspect Detection F1": f1_aspect,
        "Sentiment Classification F1": f1_sentiment
    }


In [8]:
gpt_functions = [
    {
        "name": "extract_aspect_sentiment",
        "description": "Extract aspects and sentiments from text, Think step by step exactly",
        "parameters": {
            "type": "object",
            "properties": {
                "results": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "aspect": {
                                "type": "string",
                                "enum": aspects
                            },
                            "sentiment": {
                                "type": "string",
                                "enum": ["positive", "negative", "neutral"]
                            }
                        },
                        "required": ["aspect", "sentiment"]
                    }
                }
            },
            "required": ["results"]
        }
    }
]


In [9]:
client = OpenAI()
predictions = []

# Dự đoán
for data in tqdm(test_json):
    messages = [
        {"role": "system", "content": "You are an AI assistant that extracts aspects and their sentiments from text."},
        {"role": "user", "content": f"Extract aspects and sentiments from the following review:\n{data['text']}"}
    ]
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        functions= gpt_functions,
        temperature=0
    )

    output = response.choices[0].message.function_call.arguments
    parsed_output = json.loads(output)
    predictions.append(parsed_output)

100%|██████████| 100/100 [03:11<00:00,  1.92s/it]


In [10]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

{'Aspect Detection F1': 0.657831320304979, 'Sentiment Classification F1': 0.5893491074216729}


In [11]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_CoT-Hotel-OpenAi.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)