In [12]:
from os import path
import pandas as pd
import json
import google.generativeai as genai
from tqdm import tqdm
from dotenv import load_dotenv
import os

# Định nghĩa đường dẫn
DATA_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\data"
RESULT_DIR = r"c:\Users\Admin\Python\ABSA_Prompting\results"
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(RESULT_DIR, exist_ok=True)

In [None]:
load_dotenv()

In [None]:
# CONFIG
ViABSA_BP_dir = path.join(DATA_DIR, 'ViABSA_BP')
test_file = path.join(ViABSA_BP_dir, 'data_test.csv')
test_df = pd.read_csv(test_file)

In [None]:
def transform_aspect_sentiment(df, start=0, end=None):
    aspects = [
        "stayingpower",
        "texture",
        "smell",
        "price",
        "others",
        "colour",
        "shipping",
        "packing"
    ]

    if end is None:
        end = len(df)

    result = []

    for idx, row in df.iloc[start:end].iterrows():
        entry = {
            "id": str(idx),
            "text": row['data'],
            "sentiments": []
        }

        for aspect in aspects:
            sentiment = row[f"{aspect}_label"]
            if sentiment == 1:  # chỉ lấy những cái có sentiment
                aspect_sentiment_value = row[aspect]
                if aspect_sentiment_value != 'none':
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": aspect_sentiment_value
                    })
                else:
                    # nếu cột sentiment text bị none nhưng label == 1 thì có thể log ra kiểm tra
                    entry["sentiments"].append({
                        "aspect": aspect,
                        "sentiment": "unknown"
                    })

        result.append(entry)

    return result

In [None]:
def evaluate_aspect_sentiment(ground_truth, predictions):
    # Chuẩn hóa dữ liệu thành list các tuple để so sánh
    true_aspects = []
    pred_aspects = []

    true_aspect_sentiments = []
    pred_aspect_sentiments = []

    for gt_entry, pred_entry in zip(ground_truth, predictions):
        # ground truth: list of sentiments
        gt_sents = gt_entry['sentiments']
        gt_aspect_set = set()
        gt_aspect_sentiment_set = set()

        for item in gt_sents:
            gt_aspect_set.add(item['aspect'])
            gt_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        true_aspects.append(gt_aspect_set)
        true_aspect_sentiments.append(gt_aspect_sentiment_set)

        # prediction: list of results
        pred_sents = pred_entry['results']
        pred_aspect_set = set()
        pred_aspect_sentiment_set = set()

        for item in pred_sents:
            pred_aspect_set.add(item['aspect'])
            pred_aspect_sentiment_set.add((item['aspect'], item['sentiment']))

        pred_aspects.append(pred_aspect_set)
        pred_aspect_sentiments.append(pred_aspect_sentiment_set)

    # Tính theo micro-F1 (gộp hết lại)
    all_true_aspects = set.union(*true_aspects) if true_aspects else set()
    all_pred_aspects = set.union(*pred_aspects) if pred_aspects else set()

    tp_aspect = sum(len(gt & pred) for gt, pred in zip(true_aspects, pred_aspects))
    fp_aspect = sum(len(pred - gt) for gt, pred in zip(true_aspects, pred_aspects))
    fn_aspect = sum(len(gt - pred) for gt, pred in zip(true_aspects, pred_aspects))

    precision_aspect = tp_aspect / (tp_aspect + fp_aspect + 1e-8)
    recall_aspect = tp_aspect / (tp_aspect + fn_aspect + 1e-8)
    f1_aspect = 2 * precision_aspect * recall_aspect / (precision_aspect + recall_aspect + 1e-8)

    # Tính cho sentiment classification
    tp_sentiment = sum(len(gt & pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fp_sentiment = sum(len(pred - gt) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))
    fn_sentiment = sum(len(gt - pred) for gt, pred in zip(true_aspect_sentiments, pred_aspect_sentiments))

    precision_sentiment = tp_sentiment / (tp_sentiment + fp_sentiment + 1e-8)
    recall_sentiment = tp_sentiment / (tp_sentiment + fn_sentiment + 1e-8)
    f1_sentiment = 2 * precision_sentiment * recall_sentiment / (precision_sentiment + recall_sentiment + 1e-8)

    return {
        "Aspect Detection F1": f1_aspect,
        "Sentiment Classification F1": f1_sentiment
    }


In [None]:
# SETUP DATA

aspects = ['stayingpower', 'texture', 'smell', 'price', 'others', 'colour', 'shipping', 'packing']
test_df[aspects] = test_df[aspects].fillna('none')

for aspect in aspects:
    test_df[aspect + '_label'] = (test_df[aspect] != 'none').astype(int)

In [None]:
test_json = transform_aspect_sentiment(test_df, 0, 100)
test_json[:5]

In [None]:
# Setup Gemini
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
model = genai.GenerativeModel('gemini-2.5-flash')

def extract_with_gemini(text):
    prompt = f"""
    You are an AI assistant that extracts aspects and their sentiments from text.
    
    Extract aspects and sentiments from the following review:
    {text}
    
    Available aspects: {', '.join(aspects)}
    Available sentiments: positive, negative, neutral
    
    Return ONLY a valid JSON object in this exact format:
    {{"results": [{{"aspect": "aspect_name", "sentiment": "sentiment_value"}}]}}
    """
    
    try:
        response = model.generate_content(prompt)
        # Parse JSON từ response
        response_text = response.text.strip()
        
        # Tìm JSON trong response
        start_idx = response_text.find('{')
        end_idx = response_text.rfind('}') + 1
        
        if start_idx != -1 and end_idx != -1:
            json_str = response_text[start_idx:end_idx]
            parsed_output = json.loads(json_str)
            return parsed_output
        else:
            return {"results": []}
    except Exception as e:
        print(f"Error: {e}")
        return {"results": []}

predictions = []

for data in tqdm(test_json):
    prediction = extract_with_gemini(data['text'])
    predictions.append(prediction)

In [4]:
# Check API key
gemini_api_key = os.getenv("GEMINI_API_KEY")
if gemini_api_key:
    print(f"✅ GEMINI_API_KEY found: {gemini_api_key[:10]}...")
else:
    print("❌ GEMINI_API_KEY not found!")
    print("Bạn có thể set API key bằng:")
    print("os.environ['GEMINI_API_KEY'] = 'your-api-key-here'")

NameError: name 'os' is not defined

In [None]:
# Test Gemini models để tìm model name đúng
def test_gemini_models():
    """
    Test các model names khác nhau để tìm model đúng
    """
    possible_models = [
        "gemini-1.5-flash",      # Most likely correct
        "gemini-1.5-pro", 
        "gemini-2.0-flash-exp",
        "gemini-2.5-flash",      # Current one being used
        "gemini-pro",
        "gemini-flash",
        "models/gemini-1.5-flash",
        "models/gemini-1.5-pro"
    ]
    
    genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
    
    for model_name in possible_models:
        print(f"Testing model: {model_name}")
        try:
            test_model = genai.GenerativeModel(model_name)
            response = test_model.generate_content("Hello, just say 'Hi' back")
            print(f"✅ SUCCESS with model: {model_name}")
            print(f"Response: {response.text}")
            return model_name
        except Exception as e:
            print(f"❌ FAILED: {e}")
    
    return None

# Test models
working_model = test_gemini_models()
print(f"\nWorking model found: {working_model}")

In [None]:
scores = evaluate_aspect_sentiment(test_json, predictions)
print(scores)

In [None]:
result_file = path.join(RESULT_DIR, 'ViABSA_BP_Zero-shot_Gemini.json')
with open(result_file, 'w') as f:
    json.dump(predictions, f, indent=4, ensure_ascii=False)