In [None]:
!pip install underthesea

Collecting underthesea
  Downloading underthesea-6.8.4-py3-none-any.whl.metadata (15 kB)
Collecting python-crfsuite>=0.9.6 (from underthesea)
  Downloading python_crfsuite-0.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting underthesea-core==1.0.4 (from underthesea)
  Downloading underthesea_core-1.0.4-cp311-cp311-manylinux2010_x86_64.whl.metadata (1.7 kB)
Downloading underthesea-6.8.4-py3-none-any.whl (20.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.9/20.9 MB[0m [31m89.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading underthesea_core-1.0.4-cp311-cp311-manylinux2010_x86_64.whl (657 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m657.8/657.8 kB[0m [31m49.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_crfsuite-0.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m70.6 MB/s[0m eta [36m

In [None]:
import pandas as pd
import numpy as np
from underthesea import word_tokenize
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback
)
from datasets import Dataset
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
import warnings
warnings.filterwarnings('ignore')
from google.colab import drive
drive.mount('/content/drive')
# Load and preprocess data
df = pd.read_excel("drive/My Drive/2025_ABSA/processed_result_full_new.xlsx", sheet_name="Sheet 1")
aspects = ["CHẤT LƯỢNG", "GIAO HÀNG", "HÌNH THỨC", "ĐÓNG GÓI", "GIÁ TIỀN", "ĐÁNH GIÁ CHUNG", "KHÍA CẠNH KHÁC"]

# Tokenize comments
df['comment'] = df['comment'].apply(lambda x: word_tokenize(x, format="text"))

# Prepare dataset
rows = []
for _, row in df.iterrows():
    comment = row["comment"]
    for aspect in aspects:
        if pd.isna(row[aspect]):
            label = 0  # Không đề cập
        elif int(row[aspect]) == -1:
            label = 1  # Tiêu cực
        elif int(row[aspect]) == 1:
            label = 2  # Tích cực
        else:
            continue
        rows.append({
            "comment": comment,
            "aspect": aspect,
            "label": label
        })

df_joint = pd.DataFrame(rows)
df_joint["input_text"] = df_joint.apply(lambda r: f"{r['comment']} [SEP] {r['aspect']}", axis=1)
df_joint["stratify_label"] = df_joint["aspect"] + "_" + df_joint["label"].astype(str)
print(f"Tổng số mẫu: {len(df_joint)}")

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base", use_fast=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# K-Fold Cross Validation setup
num_folds = 5
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)

# Store results for each fold
fold_results = []

for fold, (train_idx, val_idx) in enumerate(skf.split(df_joint, df_joint["stratify_label"])):
    print(f"\n{'='*40}")
    print(f"Fold {fold + 1}/{num_folds}")
    print(f"{'='*40}")

    # Split data
    train_df = df_joint.iloc[train_idx]
    val_df = df_joint.iloc[val_idx]

    # Tokenize data
    train_encodings = tokenizer(
        train_df["input_text"].tolist(),
        truncation=True,
        padding=True,
        max_length=128
    )
    val_encodings = tokenizer(
        val_df["input_text"].tolist(),
        truncation=True,
        padding=True,
        max_length=128
    )

    # Create datasets
    train_dataset = Dataset.from_dict({
        **train_encodings,
        "labels": train_df["label"].tolist()
    })
    val_dataset = Dataset.from_dict({
        **val_encodings,
        "labels": val_df["label"].tolist()
    })

    # Initialize model for each fold
    model = AutoModelForSequenceClassification.from_pretrained(
        "vinai/phobert-base",
        num_labels=3
    )
    model.to(device)

    # Training arguments with early stopping
    training_args = TrainingArguments(
        output_dir=f"./joint_absa_model_fold_{fold}",
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=5,  # Increased epochs for better fine-tuning
        save_strategy="epoch",
        logging_dir=f"./logs_joint_fold_{fold}",
        fp16=True,
        #load_best_model_at_end=True,
        #metric_for_best_model="eval_loss",
        greater_is_better=False,
        save_total_limit=2,
        report_to="none",
        learning_rate=2e-5,  # Lower learning rate for fine-tuning
        weight_decay=0.01
    )

    # Custom compute_metrics function
    def compute_metrics(p):
        predictions, labels = p
        predictions = np.argmax(predictions, axis=1)

        # Calculate metrics for each class
        report = classification_report(
            labels,
            predictions,
            output_dict=True,
            target_names=["KHÔNG", "TIÊU CỰC", "TÍCH CỰC"],
            zero_division=0
        )

        return {
            "accuracy": report["accuracy"],
            "macro_avg_precision": report["macro avg"]["precision"],
            "macro_avg_recall": report["macro avg"]["recall"],
            "macro_avg_f1": report["macro avg"]["f1-score"],
            "weighted_avg_f1": report["weighted avg"]["f1-score"]
        }

    # Create Trainer with early stopping
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics
    )

    # Train the model
    trainer.train()

    # Evaluate on validation set
    val_results = trainer.predict(val_dataset)
    y_pred = np.argmax(val_results.predictions, axis=1)
    y_true = val_results.label_ids

    # Store predictions for detailed analysis
    val_df = val_df.copy()
    val_df["y_true"] = y_true
    val_df["y_pred"] = y_pred

    # Print classification report for each aspect
    print("\nValidation Results:")
    for aspect in aspects:
        print(f"\n📌 Aspect: {aspect}")
        df_subset = val_df[val_df["aspect"] == aspect]
        print(classification_report(
            df_subset["y_true"],
            df_subset["y_pred"],
            digits=4,
            target_names=["KHÔNG", "TIÊU CỰC", "TÍCH CỰC"],
            zero_division=0
        ))

    # Save fold results
    fold_results.append({
        "fold": fold + 1,
        "model": model,
        "val_df": val_df,
        "metrics": val_results.metrics
    })



Mounted at /content/drive
Tổng số mẫu: 68532


config.json:   0%|          | 0.00/557 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

bpe.codes: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]


Fold 1/5


pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/phobert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/543M [00:00<?, ?B/s]

Step,Training Loss
500,0.6494
1000,0.5549
1500,0.4858
2000,0.4153
2500,0.3616
3000,0.3532
3500,0.3341
4000,0.2907
4500,0.2839
5000,0.2802



Validation Results:

📌 Aspect: CHẤT LƯỢNG
              precision    recall  f1-score   support

       KHÔNG     0.8112    0.8215    0.8163       549
    TIÊU CỰC     0.9034    0.8875    0.8954       622
    TÍCH CỰC     0.9280    0.9331    0.9305       732

    accuracy                         0.8860      1903
   macro avg     0.8809    0.8807    0.8807      1903
weighted avg     0.8863    0.8860    0.8861      1903


📌 Aspect: GIAO HÀNG
              precision    recall  f1-score   support

       KHÔNG     0.9633    0.9640    0.9636      1306
    TIÊU CỰC     0.8213    0.8465    0.8337       228
    TÍCH CỰC     0.9659    0.9487    0.9572       448

    accuracy                         0.9470      1982
   macro avg     0.9168    0.9197    0.9182      1982
weighted avg     0.9475    0.9470    0.9472      1982


📌 Aspect: HÌNH THỨC
              precision    recall  f1-score   support

       KHÔNG     0.9320    0.9170    0.9244      1494
    TIÊU CỰC     0.5465    0.5000    0.5222 

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/phobert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.6554
1000,0.5443
1500,0.503
2000,0.4359
2500,0.3749
3000,0.369
3500,0.3408
4000,0.3176
4500,0.2965
5000,0.3056



Validation Results:

📌 Aspect: CHẤT LƯỢNG
              precision    recall  f1-score   support

       KHÔNG     0.8092    0.8342    0.8215       549
    TIÊU CỰC     0.9160    0.8953    0.9055       621
    TÍCH CỰC     0.9177    0.9139    0.9158       732

    accuracy                         0.8849      1902
   macro avg     0.8810    0.8812    0.8810      1902
weighted avg     0.8858    0.8849    0.8852      1902


📌 Aspect: GIAO HÀNG
              precision    recall  f1-score   support

       KHÔNG     0.9628    0.9709    0.9669      1307
    TIÊU CỰC     0.8756    0.8333    0.8539       228
    TÍCH CỰC     0.9577    0.9577    0.9577       449

    accuracy                         0.9521      1984
   macro avg     0.9320    0.9206    0.9262      1984
weighted avg     0.9516    0.9521    0.9518      1984


📌 Aspect: HÌNH THỨC
              precision    recall  f1-score   support

       KHÔNG     0.9378    0.9378    0.9378      1494
    TIÊU CỰC     0.6866    0.4894    0.5714 

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/phobert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.6501
1000,0.5401
1500,0.4477
2000,0.3917
2500,0.3586
3000,0.3341
3500,0.3258
4000,0.2752
4500,0.2959
5000,0.2849


Step,Training Loss
500,0.6501
1000,0.5401
1500,0.4477
2000,0.3917
2500,0.3586
3000,0.3341
3500,0.3258
4000,0.2752
4500,0.2959
5000,0.2849



Validation Results:

📌 Aspect: CHẤT LƯỢNG
              precision    recall  f1-score   support

       KHÔNG     0.8435    0.8051    0.8239       549
    TIÊU CỰC     0.9177    0.9163    0.9170       621
    TÍCH CỰC     0.9075    0.9398    0.9234       731

    accuracy                         0.8932      1901
   macro avg     0.8896    0.8871    0.8881      1901
weighted avg     0.8924    0.8932    0.8926      1901


📌 Aspect: GIAO HÀNG
              precision    recall  f1-score   support

       KHÔNG     0.9676    0.9594    0.9635      1307
    TIÊU CỰC     0.8333    0.8370    0.8352       227
    TÍCH CỰC     0.9498    0.9710    0.9603       448

    accuracy                         0.9480      1982
   macro avg     0.9169    0.9225    0.9196      1982
weighted avg     0.9482    0.9480    0.9481      1982


📌 Aspect: HÌNH THỨC
              precision    recall  f1-score   support

       KHÔNG     0.9333    0.9183    0.9257      1493
    TIÊU CỰC     0.5326    0.5158    0.5241 

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/phobert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.6607
1000,0.5295
1500,0.5001
2000,0.4483
2500,0.3968
3000,0.359
3500,0.3379
4000,0.3053
4500,0.3082
5000,0.2902



Validation Results:

📌 Aspect: CHẤT LƯỢNG
              precision    recall  f1-score   support

       KHÔNG     0.8007    0.8051    0.8029       549
    TIÊU CỰC     0.8987    0.8857    0.8921       621
    TÍCH CỰC     0.9119    0.9194    0.9156       732

    accuracy                         0.8754      1902
   macro avg     0.8704    0.8701    0.8702      1902
weighted avg     0.8755    0.8754    0.8754      1902


📌 Aspect: GIAO HÀNG
              precision    recall  f1-score   support

       KHÔNG     0.9587    0.9602    0.9595      1307
    TIÊU CỰC     0.8194    0.8194    0.8194       227
    TÍCH CỰC     0.9596    0.9554    0.9575       448

    accuracy                         0.9430      1982
   macro avg     0.9126    0.9117    0.9121      1982
weighted avg     0.9430    0.9430    0.9430      1982


📌 Aspect: HÌNH THỨC
              precision    recall  f1-score   support

       KHÔNG     0.9400    0.9230    0.9314      1493
    TIÊU CỰC     0.5444    0.5213    0.5326 

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/phobert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.669
1000,0.5526
1500,0.4721
2000,0.4032
2500,0.3772
3000,0.3526
3500,0.3254
4000,0.2869
4500,0.2967
5000,0.2869



Validation Results:

📌 Aspect: CHẤT LƯỢNG
              precision    recall  f1-score   support

       KHÔNG     0.7910    0.8327    0.8113       550
    TIÊU CỰC     0.9272    0.8824    0.9043       621
    TÍCH CỰC     0.9100    0.9112    0.9106       732

    accuracy                         0.8791      1903
   macro avg     0.8761    0.8755    0.8754      1903
weighted avg     0.8812    0.8791    0.8798      1903


📌 Aspect: GIAO HÀNG
              precision    recall  f1-score   support

       KHÔNG     0.9596    0.9625    0.9610      1307
    TIÊU CỰC     0.8387    0.8018    0.8198       227
    TÍCH CỰC     0.9581    0.9710    0.9645       448

    accuracy                         0.9460      1982
   macro avg     0.9188    0.9118    0.9151      1982
weighted avg     0.9454    0.9460    0.9457      1982


📌 Aspect: HÌNH THỨC
              precision    recall  f1-score   support

       KHÔNG     0.9252    0.9283    0.9268      1493
    TIÊU CỰC     0.6765    0.4894    0.5679 

In [None]:
import pickle

# Lưu fold_results vào file
with open('drive/My Drive/2025_ABSA/fold_results.pkl', 'wb') as f:
    pickle.dump(fold_results, f)
