In [1]:
import torch
import torch.nn as nn
# from transformers import BertModel
# from transformers.modeling_outputs import SequenceClassifierOutput
# from transformers import PretrainedConfig, PreTrainedModel, BertTokenizer
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
import re
import json
import numpy as np
import fasttext
from sklearn.svm import SVC
import pickle

import warnings

# Bỏ qua tất cả cảnh báo UserWarning (trong đó có InconsistentVersionWarning cũ)
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")

import joblib
import os

nltk.download('punkt')
nltk.download('punkt_tab')
!wget https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz
!gunzip cc.en.300.bin.gz   # extracts cc.en.300.bin

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load FastText model
print("Loading fastText model...")
fasttext_model = fasttext.load_model("cc.en.300.bin")



[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /usr/share/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


--2025-11-13 14:37:05--  https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 13.225.143.54, 13.225.143.109, 13.225.143.122, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|13.225.143.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4503593528 (4.2G) [application/octet-stream]
Saving to: ‘cc.en.300.bin.gz’


2025-11-13 14:37:20 (284 MB/s) - ‘cc.en.300.bin.gz’ saved [4503593528/4503593528]

Loading fastText model...


In [2]:
def get_embedding(text):
    return fasttext_model.get_sentence_vector(str(text))

class SentenceAspectSVM:
    def __init__(self, aspect, model_path=None):
        self.aspect = aspect
        if model_path is None:
            model_path = f"/kaggle/input/sudo-svm-phase1/{aspect}_best_model.pkl"
        try:
            with open(model_path, "rb") as f:
                self.model = pickle.load(f)
            # print(f"Loaded sentence SVM for {aspect} from {model_path}")
        except FileNotFoundError:
            print(f"Sentence SVM model not found: {model_path}")
            self.model = None

    def filter_positive_sentences(self, sentences):
        # Trả về các câu dự đoán nhãn 1 (thuộc khía cạnh)
        if not sentences:
            return []
        if self.model is None:
            # Fallback: không có model thì giữ nguyên để không phá luồng xử lý
            return sentences
        try:
            X = np.vstack([get_embedding(s) for s in sentences])
            preds = self.model.predict(X)
            return [s for s, p in zip(sentences, preds) if int(p) == 1]
        except Exception as e:
            print(f"Sentence SVM prediction failed: {e}")
            return sentences


class SVMComparativeModel:
    def __init__(self, aspect, model_path=None):
        self.aspect = aspect
        
        # Load the trained SVM model from pickle file
        if model_path is None:
            model_path = f"/kaggle/input/sudo-svm-checkpoint-phase2/{aspect}_best_model.pkl"
        try:
            with open(model_path, 'rb') as f:
                self.model = pickle.load(f)
        except FileNotFoundError:
            print(f"Model file not found: {model_path}")
            # Create a default SVM model if file doesn't exist
            self.model = SVC(class_weight="balanced", random_state=42)
    
    def predict(self, text_1, text_2):
        """Predict comparison between two texts using FastText embeddings"""
        try:
            # Get FastText embeddings for both texts
            v1 = get_embedding(text_1)
            v2 = get_embedding(text_2)
            
            # Concatenate embeddings like in training
            feature_vector = np.concatenate([v1, v2]).reshape(1, -1)
            
            # Get prediction
            pred = self.model.predict(feature_vector)[0]
            return int(pred)
        except Exception as e:
            print(f"SVM prediction failed: {e}")
            return 2  # Fallback to no comparison



class OverallModel(nn.Module):
    def __init__(self, aspect, device):
        super().__init__()
        self.aspect = aspect
        self.device = device
        
        self.aspect_index = {'appearance': 0, 'aroma': 1, 'palate': 2, 'taste': 3}
        self.aspect_idx = self.aspect_index[aspect]  

        # Thay SLAC-BERT bằng SVM câu theo khía cạnh đã train ở phase1
        self.sentence_svm = SentenceAspectSVM(aspect)

        # Không còn cần tokenizer BERT
        # self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        
        # SVM so sánh (giữ nguyên)
        self.svm_model = SVMComparativeModel(aspect)

    @staticmethod
    def _truncate_seq(tokens, max_length):
        while True:
            total_length = len(tokens)
            if total_length <= max_length:
                break
            tokens.pop()
            
        return tokens
    
    @staticmethod
    def _split_clean_sentences(text):
        sentences = sent_tokenize(text.lower()) 
        sentences = [re.sub(r'\W+', ' ', s).strip() for s in sentences if len(word_tokenize(s)) > 1]  
        return sentences
    
    def _get_aspect_sentences(self, review_sentences):
        if len(review_sentences) == 0:
            return []
        # Dùng SVM câu theo khía cạnh thay vì SLAC-BERT
        aspect_sentences = self.sentence_svm.filter_positive_sentences(review_sentences)
        return aspect_sentences
        
    def forward(self, review_1, review_2):
        review_1_sent = self._split_clean_sentences(review_1)
        review_2_sent = self._split_clean_sentences(review_2)
        
        if len(review_1_sent) == 0 or len(review_2_sent) == 0:
            return 2
        
        review_1_aspect_sentences = self._get_aspect_sentences(review_1_sent)
        review_2_aspect_sentences = self._get_aspect_sentences(review_2_sent)
        
        if len(review_1_aspect_sentences) == 0 or len(review_2_aspect_sentences) == 0:
            return 2

        # Combine aspect sentences into single texts
        review_1_text = " ".join(review_1_aspect_sentences)
        review_2_text = " ".join(review_2_aspect_sentences)
        
        # Use SVM model for comparison
        try:
            result = self.svm_model.predict(review_1_text, review_2_text)
            return result
        except Exception as e:
            print(f"SVM prediction failed: {e}")
            return 2


In [3]:
# Define labels
labels = [-1, 0, 1, 2]
positive_labels = [-1, 0, 1]  # Positive labels: A<B, A=B, A>B
negative_label = 2            # Negative label: No comparison

from collections import defaultdict
def compute_metrics(true_labels, pred_labels):
        # Initialize dictionaries to count
    tp = defaultdict(int)
    fp = defaultdict(int)
    fn = defaultdict(int)
    
    # Process each sample
    for true, pred in zip(true_labels, pred_labels):
        if true == pred:
            # True Positive: Only count for positive labels
            if true in positive_labels:
                tp[true] += 1
        else:
            # True is positive, Prediction is null (missing)
            if true in positive_labels and pred == negative_label:
                fn[true] += 1  # Only increase FN
            
            # True is null, Prediction is positive (excess)
            elif true == negative_label and pred in positive_labels:
                fp[pred] += 1  # Only increase FP
            
            # Confusion between positive labels
            elif true in positive_labels and pred in positive_labels:
                fn[true] += 1  # Missing the correct label
                fp[pred] += 1  # Excess of the predicted label
    
    return tp, fp, fn


In [4]:
from tqdm import tqdm

# Define a function to run evaluation for all aspects
def overall_evaluate_all_aspects(eval_dataset):
    aspects = ['appearance', 'aroma', 'palate', 'taste']
    
    # Dictionary to store predictions for each aspect
    aspect_predictions = {}
    
    # Run each aspect model and collect predictions
    for aspect in aspects:
        model = OverallModel(
            aspect, 
            device
        )
        
        predictions = []
        for sample in tqdm(eval_dataset):
            pred = model(sample['reviewText_1'], sample['reviewText_2'])
            predictions.append(pred)
        
        # Store predictions for this aspect
        aspect_predictions[aspect] = predictions
        del model
    
    # Combine all predictions and true labels into flat lists
    all_predictions_flat = []
    all_true_labels_flat = []
    
    for i in range(len(eval_dataset)):
        for aspect in aspects:
            all_predictions_flat.append(aspect_predictions[aspect][i])
            all_true_labels_flat.append(eval_dataset[i][aspect])
    
    # Calculate TP, FP, FN
    tp, fp, fn = compute_metrics(all_true_labels_flat, all_predictions_flat)
    
    # Calculate precision, recall, F1 for each class
    class_metrics = {}
    for label in positive_labels:
        # Skip if no instances for this class
        if tp[label] + fp[label] == 0:
            precision = 0
        else:
            precision = tp[label] / (tp[label] + fp[label])
            
        if tp[label] + fn[label] == 0:
            recall = 0
        else:
            recall = tp[label] / (tp[label] + fn[label])
            
        if precision + recall == 0:
            f1 = 0
        else:
            f1 = 2 * precision * recall / (precision + recall)
            
        class_metrics[label] = {
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'tp': tp[label],
            'fp': fp[label],
            'fn': fn[label]
        }
    
    # Calculate macro metrics (average over classes)
    macro_precision = sum(m['precision'] for m in class_metrics.values()) / len(positive_labels)
    macro_recall = sum(m['recall'] for m in class_metrics.values()) / len(positive_labels)
    macro_f1 = sum(m['f1'] for m in class_metrics.values()) / len(positive_labels)
    
    # Calculate micro metrics (aggregate TP, FP, FN)
    total_tp = sum(tp[label] for label in positive_labels)
    total_fp = sum(fp[label] for label in positive_labels)
    total_fn = sum(fn[label] for label in positive_labels)
    
    micro_precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0
    micro_recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0
    micro_f1 = 2 * micro_precision * micro_recall / (micro_precision + micro_recall) if (micro_precision + micro_recall) > 0 else 0
    
    # Return the combined results
    return {
        'micro_f1': micro_f1,
        'macro_f1': macro_f1,
        'micro_precision': micro_precision,
        'macro_precision': macro_precision,
        'micro_recall': micro_recall,
        'macro_recall': macro_recall
    }


In [5]:
# Define a function to evaluate each aspect separately
def aspect_wise_evaluate(eval_dataset):
    aspects = ['appearance', 'aroma', 'palate', 'taste']
    aspect_results = {}
    
    for aspect in aspects:
        print(f"Evaluating {aspect} model for aspect-wise metrics...")
        model = OverallModel(
            aspect, 
            device
        )
        
        predictions = []
        true_labels = []
        
        for i, sample in enumerate(tqdm(eval_dataset)):
            pred = model(sample['reviewText_1'], sample['reviewText_2'])
            predictions.append(pred)
            true_labels.append(sample[aspect])
        
        # Calculate TP, FP, FN for this aspect
        tp, fp, fn = compute_metrics(true_labels, predictions)
        
        # Calculate precision, recall, F1 for each class
        class_metrics = {}
        for label in positive_labels:
            # Skip if no instances for this class
            if tp[label] + fp[label] == 0:
                precision = 0
            else:
                precision = tp[label] / (tp[label] + fp[label])
                
            if tp[label] + fn[label] == 0:
                recall = 0
            else:
                recall = tp[label] / (tp[label] + fn[label])
                
            if precision + recall == 0:
                f1 = 0
            else:
                f1 = 2 * precision * recall / (precision + recall)
                
            class_metrics[label] = {
                'precision': precision,
                'recall': recall,
                'f1': f1,
                'tp': tp[label],
                'fp': fp[label],
                'fn': fn[label]
            }
        
        # Calculate macro metrics (average over classes)
        macro_precision = sum(m['precision'] for m in class_metrics.values()) / len(positive_labels)
        macro_recall = sum(m['recall'] for m in class_metrics.values()) / len(positive_labels)
        macro_f1 = sum(m['f1'] for m in class_metrics.values()) / len(positive_labels)
        
        # Calculate micro metrics (aggregate TP, FP, FN)
        total_tp = sum(tp[label] for label in positive_labels)
        total_fp = sum(fp[label] for label in positive_labels)
        total_fn = sum(fn[label] for label in positive_labels)
        
        micro_precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0
        micro_recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0
        micro_f1 = 2 * micro_precision * micro_recall / (micro_precision + micro_recall) if (micro_precision + micro_recall) > 0 else 0
        
        aspect_results[aspect] = {
            'micro_precision': micro_precision,
            'micro_recall': micro_recall,
            'micro_f1': micro_f1,
            'macro_precision': macro_precision,
            'macro_recall': macro_recall,
            'macro_f1': macro_f1,
            'class_metrics': class_metrics
        }
        
        print(f"{aspect} - Micro P/R/F1: {micro_precision:.4f}/{micro_recall:.4f}/{micro_f1:.4f}")
        print(f"{aspect} - Macro P/R/F1: {macro_precision:.4f}/{macro_recall:.4f}/{macro_f1:.4f}")
        
        del model
    
    return aspect_results


In [6]:
import datasets

eval_dataset = datasets.load_dataset("lengocquangLAB/beer-com-reviews", split="test")
results = overall_evaluate_all_aspects(eval_dataset)
print(results)


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/977k [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/142k [00:00<?, ?B/s]

data/validation-00000-of-00001.parquet:   0%|          | 0.00/133k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3320 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/410 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/420 [00:00<?, ? examples/s]

100%|██████████| 410/410 [00:06<00:00, 60.51it/s]
100%|██████████| 410/410 [00:08<00:00, 46.08it/s]
100%|██████████| 410/410 [00:12<00:00, 32.69it/s]
100%|██████████| 410/410 [00:14<00:00, 27.89it/s]


{'micro_f1': 0.508029197080292, 'macro_f1': 0.5105251995880314, 'micro_precision': 0.525679758308157, 'macro_precision': 0.5282806575736697, 'micro_recall': 0.4915254237288136, 'macro_recall': 0.49764419759924433}


In [7]:
# Run aspect-wise evaluation
print("="*70)
print("ASPECT-WISE EVALUATION")
print("="*70)

aspect_results = aspect_wise_evaluate(eval_dataset)

# Print summary for each aspect
for aspect, metrics in aspect_results.items():
    print(f"\n{aspect.upper()} Results:")
    print(f"  Micro F1: {metrics['micro_f1']:.4f}")
    print(f"  Macro F1: {metrics['macro_f1']:.4f}")
print()




ASPECT-WISE EVALUATION
Evaluating appearance model for aspect-wise metrics...


100%|██████████| 410/410 [00:06<00:00, 61.05it/s]


appearance - Micro P/R/F1: 0.5887/0.5428/0.5648
appearance - Macro P/R/F1: 0.6205/0.5420/0.5708
Evaluating aroma model for aspect-wise metrics...


100%|██████████| 410/410 [00:08<00:00, 47.03it/s]


aroma - Micro P/R/F1: 0.5268/0.4979/0.5119
aroma - Macro P/R/F1: 0.5277/0.5167/0.5127
Evaluating palate model for aspect-wise metrics...


100%|██████████| 410/410 [00:12<00:00, 32.39it/s]


palate - Micro P/R/F1: 0.5732/0.5193/0.5449
palate - Macro P/R/F1: 0.5735/0.5325/0.5485
Evaluating taste model for aspect-wise metrics...


100%|██████████| 410/410 [00:14<00:00, 27.43it/s]

taste - Micro P/R/F1: 0.4594/0.4373/0.4481
taste - Macro P/R/F1: 0.4644/0.4403/0.4470

APPEARANCE Results:
  Micro F1: 0.5648
  Macro F1: 0.5708

AROMA Results:
  Micro F1: 0.5119
  Macro F1: 0.5127

PALATE Results:
  Micro F1: 0.5449
  Macro F1: 0.5485

TASTE Results:
  Micro F1: 0.4481
  Macro F1: 0.4470


APPEARANCE Results:
  Micro F1: 0.5648
  Macro F1: 0.5708

AROMA Results:
  Micro F1: 0.5119
  Macro F1: 0.5127

PALATE Results:
  Micro F1: 0.5449
  Macro F1: 0.5485

TASTE Results:
  Micro F1: 0.4481
  Macro F1: 0.4470




