In [1]:
import json
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score

from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence

from sklearn.model_selection import train_test_split
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

In [2]:
!pip install einops



In [3]:
with open('/kaggle/input/data-ds200/data_labeled_soft_cleaned.json', 'r', encoding='utf-8') as f:
    raw_data = json.load(f)

def build_text(item):
    parts = [item.get("post_content", "")]
    if 'comment' in item:
        parts += item['comment'].get('parent_comment_texts', [])
        if item['comment'].get('comment_text'):
            parts.append(item['comment']['comment_text'])
    img_descs = [desc['image_description'] for desc in item.get("image_descriptions", [])]
    parts += img_descs
    return "\n".join(parts)

In [4]:
data = []
for item in raw_data:
    text = build_text(item)
    aspects = [item.get("Aspect_1", "Other"), item.get("Aspect_2") or "null"]
    sentiments = item.get("Sentiment", ["null", "null"])
    data.append({
        "text": text,
        "aspect_1": aspects[0],
        "aspect_2": aspects[1],
        "sentiment_1": sentiments[0],
        "sentiment_2": sentiments[1] if aspects[1] != "null" else "null"
    })

In [5]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,text,aspect_1,aspect_2,sentiment_1,sentiment_2
0,TÒA ĐÃ TUYÊN PHẠT BỐ RUỘT 6 NĂM TÙ Bị cáo và...,Law,Other,negative,negative
1,TÒA ĐÃ TUYÊN PHẠT BỐ RUỘT 6 NĂM TÙ Bị cáo và...,Law,Other,negative,negative
2,TÒA ĐÃ TUYÊN PHẠT BỐ RUỘT 6 NĂM TÙ Bị cáo và...,Law,Health,negative,negative
3,TÒA ĐÃ TUYÊN PHẠT BỐ RUỘT 6 NĂM TÙ Bị cáo và...,Law,Health,negative,negative
4,TÒA ĐÃ TUYÊN PHẠT BỐ RUỘT 6 NĂM TÙ Bị cáo và...,Law,Other,negative,negative


In [6]:
all_aspects = pd.concat([df['aspect_1'], df['aspect_2']]).unique()
aspect_encoder = LabelEncoder()
aspect_encoder.fit(all_aspects)

df['aspect_1_enc'] = aspect_encoder.transform(df['aspect_1'])
df['aspect_2_enc'] = aspect_encoder.transform(df['aspect_2'])

In [7]:
print(aspect_encoder.classes_)

['Art' 'Fashion' 'Food' 'Health' 'Law' 'Other' 'Sport' 'null']


In [8]:
# sentiment_map = {"positive": 1.0, "neutral": 0.0, "negative": -1.0}
# df["sentiment_1_score"] = df["sentiment_1"].map(sentiment_map)
# df["sentiment_2_score"] = df["sentiment_2"].map(sentiment_map)

In [9]:
all_aspects = pd.concat([df['sentiment_1'], df['sentiment_2']]).unique()
sentiment_encoder = LabelEncoder()
sentiment_encoder.fit(all_aspects)

df['sentiment_1_enc'] = sentiment_encoder.transform(df['sentiment_1'])
df['sentiment_2_enc'] = sentiment_encoder.transform(df['sentiment_2'])

In [10]:
print(sentiment_encoder.classes_)

['negative' 'neutral' 'null' 'positive']


In [11]:
from collections import Counter
import numpy as np
import torch

def compute_class_weights(name, combined_labels):
    counts = Counter(combined_labels)
    total = sum(counts.values())
    print(f"\n{name} distribution:")
    for k, v in sorted(counts.items()):
        print(f"  Class {k}: {v} ({v/total:.2%})")

    num_classes = len(counts)
    weights = [total / (num_classes * counts[i]) for i in range(num_classes)]
    print(f"{name} weights: {np.round(weights, 3)}")
    return torch.tensor(weights, dtype=torch.float32)


In [12]:
# Gộp nhãn aspect
combined_aspect = pd.concat([df["aspect_1_enc"], df["aspect_2_enc"]]).tolist()
w_aspect = compute_class_weights("Aspect", combined_aspect)

# Gộp nhãn sentiment
combined_sentiment = pd.concat([df["sentiment_1_enc"], df["sentiment_2_enc"]]).tolist()
w_sentiment = compute_class_weights("Sentiment", combined_sentiment)



Aspect distribution:
  Class 0: 579 (4.85%)
  Class 1: 344 (2.88%)
  Class 2: 809 (6.77%)
  Class 3: 1894 (15.86%)
  Class 4: 2399 (20.09%)
  Class 5: 3294 (27.58%)
  Class 6: 624 (5.22%)
  Class 7: 2001 (16.75%)
Aspect weights: [2.579 4.34  1.845 0.788 0.622 0.453 2.393 0.746]

Sentiment distribution:
  Class 0: 6488 (54.32%)
  Class 1: 885 (7.41%)
  Class 2: 2001 (16.75%)
  Class 3: 2570 (21.52%)
Sentiment weights: [0.46  3.374 1.492 1.162]


In [13]:
df.head()

Unnamed: 0,text,aspect_1,aspect_2,sentiment_1,sentiment_2,aspect_1_enc,aspect_2_enc,sentiment_1_enc,sentiment_2_enc
0,TÒA ĐÃ TUYÊN PHẠT BỐ RUỘT 6 NĂM TÙ Bị cáo và...,Law,Other,negative,negative,4,5,0,0
1,TÒA ĐÃ TUYÊN PHẠT BỐ RUỘT 6 NĂM TÙ Bị cáo và...,Law,Other,negative,negative,4,5,0,0
2,TÒA ĐÃ TUYÊN PHẠT BỐ RUỘT 6 NĂM TÙ Bị cáo và...,Law,Health,negative,negative,4,3,0,0
3,TÒA ĐÃ TUYÊN PHẠT BỐ RUỘT 6 NĂM TÙ Bị cáo và...,Law,Health,negative,negative,4,3,0,0
4,TÒA ĐÃ TUYÊN PHẠT BỐ RUỘT 6 NĂM TÙ Bị cáo và...,Law,Other,negative,negative,4,5,0,0


In [14]:
tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v3")  # Tokenizer for Jina embeddings
model_token = AutoModel.from_pretrained("jinaai/jina-embeddings-v3", 
                                                   trust_remote_code=True,
                                                   torch_dtype=torch.float32)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_token.to(device)
model_token.eval()

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration_xlm_roberta.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- configuration_xlm_roberta.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_lora.py: 0.00B [00:00, ?B/s]

modeling_xlm_roberta.py: 0.00B [00:00, ?B/s]

embedding.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- embedding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


rotary.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- rotary.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


mlp.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- mlp.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


xlm_padding.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- xlm_padding.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


mha.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- mha.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


block.py: 0.00B [00:00, ?B/s]

stochastic_depth.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- stochastic_depth.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- block.py
- stochastic_depth.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/jinaai/xlm-roberta-flash-implementation:
- modeling_xlm_roberta.py
- embedding.py
- rotary.py
- mlp.py
- xlm_padding.py
- mha.py
- block.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following fi

model.safetensors:   0%|          | 0.00/1.14G [00:00<?, ?B/s]

XLMRobertaLoRA(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): ParametrizedEmbedding(
        250002, 1024, padding_idx=1
        (parametrizations): ModuleDict(
          (weight): ParametrizationList(
            (0): LoRAParametrization()
          )
        )
      )
      (token_type_embeddings): ParametrizedEmbedding(
        1, 1024
        (parametrizations): ModuleDict(
          (weight): ParametrizationList(
            (0): LoRAParametrization()
          )
        )
      )
    )
    (emb_drop): Dropout(p=0.1, inplace=False)
    (emb_ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
    (encoder): XLMRobertaEncoder(
      (layers): ModuleList(
        (0-23): 24 x Block(
          (mixer): MHA(
            (rotary_emb): RotaryEmbedding()
            (Wqkv): ParametrizedLinearResidual(
              in_features=1024, out_features=3072, bias=True
              (parametrizations): ModuleDict(
                (weight): 

In [15]:
def get_embedding_full(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=256, padding='max_length')
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model_token(**inputs)
        return outputs.last_hidden_state.squeeze(0)

embeddings = []
for text in tqdm(df["text"], desc="Encoding"):
    emb = get_embedding_full(text)
    embeddings.append(emb)

Encoding: 100%|██████████| 5972/5972 [03:49<00:00, 25.97it/s]


In [16]:
X = pad_sequence(embeddings, batch_first=True)
y_aspect = torch.tensor(df[["aspect_1_enc", "aspect_2_enc"]].values, dtype=torch.long)
y_sentiment = torch.tensor(df[["sentiment_1_enc", "sentiment_2_enc"]].values, dtype=torch.long)

X_cpu = X.cpu()
y_aspect_cpu = y_aspect.cpu()
y_sentiment_cpu = y_sentiment.cpu()

X_temp, X_test, y_a_temp, y_a_test, y_s_temp, y_s_test = train_test_split(
    X_cpu, y_aspect_cpu, y_sentiment_cpu, test_size=0.1, random_state=42
)

X_train, X_val, y_a_train, y_a_val, y_s_train, y_s_val = train_test_split(
    X_temp, y_a_temp, y_s_temp, test_size=0.2222, random_state=42
)


In [17]:
X_train.shape

torch.Size([4179, 256, 1024])

In [18]:
class MultiTaskClassifier(nn.Module):
    def __init__(self, input_dim, lstm_hidden_dim, lstm_layers, hidden_dims, 
                 num_aspects, num_sentiments, task_specific_dims=None):
        super().__init__()
        
        # BiLSTM layer
        self.bilstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=lstm_hidden_dim,
            num_layers=lstm_layers,
            batch_first=True,
            bidirectional=True
        )
        
        # Enhanced shared fully connected layers
        layers = []
        prev_dim = lstm_hidden_dim * 2  # *2 because bidirectional
        for hdim in hidden_dims:
            layers.append(nn.Linear(prev_dim, hdim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.3))
            layers.append(nn.BatchNorm1d(hdim))  # Thêm BatchNorm để ổn định training
            prev_dim = hdim
        self.shared = nn.Sequential(*layers)
        
        # Task-specific dimensions (default nếu không được truyền vào)
        if task_specific_dims is None:
            task_specific_dims = [prev_dim // 2, prev_dim // 4]
        
        # Task-specific layers cho aspect 1
        self.aspect_1_layers = self._create_task_head(prev_dim, task_specific_dims, num_aspects)
        
        # Task-specific layers cho aspect 2  
        self.aspect_2_layers = self._create_task_head(prev_dim, task_specific_dims, num_aspects)
        
        # Task-specific layers cho sentiment 1
        self.sentiment_1_layers = self._create_task_head(prev_dim, task_specific_dims, num_sentiments)
        
        # Task-specific layers cho sentiment 2
        self.sentiment_2_layers = self._create_task_head(prev_dim, task_specific_dims, num_sentiments)
        
    def _create_task_head(self, input_dim, hidden_dims, output_dim):
        """Tạo task-specific head với nhiều lớp linear"""
        layers = []
        prev_dim = input_dim
        
        # Thêm các lớp ẩn
        for hdim in hidden_dims:
            layers.append(nn.Linear(prev_dim, hdim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.2))  # Dropout thấp hơn cho task-specific layers
            prev_dim = hdim
            
        # Lớp output cuối cùng
        layers.append(nn.Linear(prev_dim, output_dim))
        
        return nn.Sequential(*layers)
        
    def forward(self, x):
        # BiLSTM processing
        lstm_out, _ = self.bilstm(x)
        pooled = torch.mean(lstm_out, dim=1)  # Global average pooling
        
        # Shared feature extraction
        shared_features = self.shared(pooled)
        
        # Pass shared features through task-specific heads
        a1 = self.aspect_1_layers(shared_features)
        a2 = self.aspect_2_layers(shared_features)
        s1 = self.sentiment_1_layers(shared_features)
        s2 = self.sentiment_2_layers(shared_features)
        
        return a1, a2, s1, s2

In [19]:
class Trainer:
    def __init__(self, model, train_data, val_data, device='cuda',
                 lr=1e-3, batch_size=64, num_epochs=10, step_size=10, gamma=0.5,
                 class_weights=None):
        self.model = model.to(device)
        self.train_data = train_data
        self.val_data = val_data
        self.device = device
        self.lr = lr
        self.batch_size = batch_size
        self.num_epochs = num_epochs

        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=step_size, gamma=gamma)

        self.ce_a1 = nn.CrossEntropyLoss(weight=class_weights['a1'].to(device) if class_weights and 'a1' in class_weights else None)
        self.ce_a2 = nn.CrossEntropyLoss(weight=class_weights['a2'].to(device) if class_weights and 'a2' in class_weights else None)
        self.ce_s1 = nn.CrossEntropyLoss(weight=class_weights['s1'].to(device) if class_weights and 's1' in class_weights else None)
        self.ce_s2 = nn.CrossEntropyLoss(weight=class_weights['s2'].to(device) if class_weights and 's2' in class_weights else None)

    def get_batches(self, X, y_aspect, y_sentiment):
        n = len(X)
        for i in range(0, n, self.batch_size):
            yield (
                X[i:i+self.batch_size],
                y_aspect[i:i+self.batch_size],
                y_sentiment[i:i+self.batch_size]
            )

    def train_epoch(self):
        self.model.train()
        total_loss = 0
        total_batches = 0
        X, y_aspect, y_sentiment = self.train_data
        for xb, yb_a, yb_s in self.get_batches(X, y_aspect, y_sentiment):
            xb = xb.to(self.device)
            yb_a = yb_a.to(self.device)
            yb_s = yb_s.to(self.device)

            self.optimizer.zero_grad()
            out_a1, out_a2, out_s1, out_s2 = self.model(xb)

            loss_a1 = self.ce_a1(out_a1, yb_a[:, 0])
            loss_a2 = self.ce_a2(out_a2, yb_a[:, 1])
            loss_s1 = self.ce_s1(out_s1, yb_s[:, 0])
            loss_s2 = self.ce_s2(out_s2, yb_s[:, 1])

            loss = loss_a1 + loss_a2 + loss_s1 + loss_s2
            # loss = loss_a1 * 3 + loss_a2 * 2 + loss_s1 * 2 + loss_s2
            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()
            total_batches += 1

        mean_loss = total_loss / total_batches
        return mean_loss


    def eval_epoch(self):
        self.model.eval()
        total_loss = 0
        total_batches = 0
        y_true_a1, y_pred_a1 = [], []
        y_true_a2, y_pred_a2 = [], []
        y_true_s1, y_pred_s1 = [], []
        y_true_s2, y_pred_s2 = [], []
    
        X, y_aspect, y_sentiment = self.val_data
        with torch.no_grad():
            for xb, yb_a, yb_s in self.get_batches(X, y_aspect, y_sentiment):
                xb = xb.to(self.device)
                yb_a = yb_a.to(self.device)
                yb_s = yb_s.to(self.device)
    
                out_a1, out_a2, out_s1, out_s2 = self.model(xb)

                loss_a1 = self.ce_a1(out_a1, yb_a[:, 0])
                loss_a2 = self.ce_a2(out_a2, yb_a[:, 1])
                loss_s1 = self.ce_s1(out_s1, yb_s[:, 0])
                loss_s2 = self.ce_s2(out_s2, yb_s[:, 1])

                loss = loss_a1 + loss_a2 + loss_s1 + loss_s2
                # loss = loss_a1 * 3 + loss_a2 * 2 + loss_s1 * 2 + loss_s2

                total_loss += loss.item()
                total_batches += 1

                y_true_a1 += yb_a[:, 0].cpu().tolist()
                y_pred_a1 += out_a1.argmax(dim=1).cpu().tolist()
                y_true_a2 += yb_a[:, 1].cpu().tolist()
                y_pred_a2 += out_a2.argmax(dim=1).cpu().tolist()
                y_true_s1 += yb_s[:, 0].cpu().tolist()
                y_pred_s1 += out_s1.argmax(dim=1).cpu().tolist()
                y_true_s2 += yb_s[:, 1].cpu().tolist()
                y_pred_s2 += out_s2.argmax(dim=1).cpu().tolist()

        f1_a1 = f1_score(y_true_a1, y_pred_a1, average="macro")
        f1_a2 = f1_score(y_true_a2, y_pred_a2, average="macro")
        f1_s1 = f1_score(y_true_s1, y_pred_s1, average="macro")
        f1_s2 = f1_score(y_true_s2, y_pred_s2, average="macro")
        avg_f1 = (f1_a1 + f1_a2 + f1_s1 + f1_s2) / 4

        mean_loss = total_loss / total_batches
        print(f"F1 Scores — Aspect1: {f1_a1:.4f}, Aspect2: {f1_a2:.4f}, Sent1: {f1_s1:.4f}, Sent2: {f1_s2:.4f} | Avg: {avg_f1:.4f}")
        return mean_loss


    def train(self):
        for epoch in range(1, self.num_epochs + 1):
            train_loss = self.train_epoch()
            val_loss = self.eval_epoch()
            self.scheduler.step()

            current_lr = self.scheduler.get_last_lr()[0]
            print(f"Epoch {epoch}/{self.num_epochs} | LR: {current_lr:.6f} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")


In [20]:
def get_batch(X, y_a, y_s, batch_size):
    for i in range(0, len(X), batch_size):
        yield X[i:i+batch_size], y_a[i:i+batch_size], y_s[i:i+batch_size]

In [21]:
num_aspects = len(aspect_encoder.classes_)
num_sentiments = len(sentiment_encoder.classes_)

In [22]:
model = MultiTaskClassifier(
    input_dim=1024,
    lstm_hidden_dim=256,
    lstm_layers=2,
    hidden_dims=[1024, 512, 256, 128],  # 4 lớp shared
    num_aspects=num_aspects,
    num_sentiments=num_sentiments,
    task_specific_dims=[128, 64, 32]    # 3 lớp ẩn cho mỗi task head
)
device = "cuda" if torch.cuda.is_available() else "cpu"

trainer = Trainer(
    model=model,
    train_data=(X_train, y_a_train, y_s_train),
    val_data=(X_val, y_a_val, y_s_val),
    device=device,
    lr=4e-3,
    batch_size=32,
    num_epochs=200,
    step_size=10,
    gamma=0.75,
    class_weights={
        'a1': w_aspect,
        'a2': w_aspect,
        's1': w_sentiment,
        's2': w_sentiment
    }
)

In [23]:
trainer.train()

F1 Scores — Aspect1: 0.2824, Aspect2: 0.1382, Sent1: 0.4377, Sent2: 0.3406 | Avg: 0.2997
Epoch 1/200 | LR: 0.004000 | Train Loss: 5.5631 | Val Loss: 5.1578
F1 Scores — Aspect1: 0.3146, Aspect2: 0.1594, Sent1: 0.4475, Sent2: 0.3332 | Avg: 0.3137
Epoch 2/200 | LR: 0.004000 | Train Loss: 5.0950 | Val Loss: 4.8757
F1 Scores — Aspect1: 0.2808, Aspect2: 0.2138, Sent1: 0.4708, Sent2: 0.3072 | Avg: 0.3182
Epoch 3/200 | LR: 0.004000 | Train Loss: 4.9205 | Val Loss: 5.0579
F1 Scores — Aspect1: 0.4000, Aspect2: 0.1664, Sent1: 0.4424, Sent2: 0.3378 | Avg: 0.3366
Epoch 4/200 | LR: 0.004000 | Train Loss: 4.9092 | Val Loss: 5.5573
F1 Scores — Aspect1: 0.4393, Aspect2: 0.2035, Sent1: 0.4527, Sent2: 0.3969 | Avg: 0.3731
Epoch 5/200 | LR: 0.004000 | Train Loss: 4.7311 | Val Loss: 4.6288
F1 Scores — Aspect1: 0.4224, Aspect2: 0.1716, Sent1: 0.4635, Sent2: 0.3136 | Avg: 0.3428
Epoch 6/200 | LR: 0.004000 | Train Loss: 4.7634 | Val Loss: 5.3311
F1 Scores — Aspect1: 0.5449, Aspect2: 0.1910, Sent1: 0.4582, Sen

In [24]:
from sklearn.metrics import classification_report

def evaluate_on_test(model, test_data, batch_size=8, device='cuda'):
    model.eval()
    X_test, y_a_test, y_s_test = test_data

    y_true_a1, y_pred_a1 = [], []
    y_true_a2, y_pred_a2 = [], []
    y_true_s1, y_pred_s1 = [], []
    y_true_s2, y_pred_s2 = [], []

    with torch.no_grad():
        for i in range(0, len(X_test), batch_size):
            xb = X_test[i:i+batch_size].to(device)
            yb_a = y_a_test[i:i+batch_size].to(device)
            yb_s = y_s_test[i:i+batch_size].to(device)

            out_a1, out_a2, out_s1, out_s2 = model(xb)

            y_true_a1 += yb_a[:, 0].cpu().tolist()
            y_pred_a1 += out_a1.argmax(dim=1).cpu().tolist()
            y_true_a2 += yb_a[:, 1].cpu().tolist()
            y_pred_a2 += out_a2.argmax(dim=1).cpu().tolist()
            y_true_s1 += yb_s[:, 0].cpu().tolist()
            y_pred_s1 += out_s1.argmax(dim=1).cpu().tolist()
            y_true_s2 += yb_s[:, 1].cpu().tolist()
            y_pred_s2 += out_s2.argmax(dim=1).cpu().tolist()

    print("\n--- Test Evaluation ---")
    print("Aspect 1:")
    print(classification_report(y_true_a1, y_pred_a1, digits=4))
    print("Aspect 2:")
    print(classification_report(y_true_a2, y_pred_a2, digits=4))
    print("Sentiment 1:")
    print(classification_report(y_true_s1, y_pred_s1, digits=4))
    print("Sentiment 2:")
    print(classification_report(y_true_s2, y_pred_s2, digits=4))


In [25]:
evaluate_on_test(model, (X_test, y_a_test, y_s_test), batch_size=8, device=device)


--- Test Evaluation ---
Aspect 1:
              precision    recall  f1-score   support

           0     0.8649    0.9143    0.8889        35
           1     0.7742    0.7059    0.7385        34
           2     0.5658    0.8600    0.6825        50
           3     0.6289    0.4692    0.5374       130
           4     0.6138    0.8162    0.7007       185
           5     0.7255    0.3458    0.4684       107
           6     0.9333    0.9825    0.9573        57

    accuracy                         0.6756       598
   macro avg     0.7295    0.7277    0.7105       598
weighted avg     0.6873    0.6756    0.6597       598

Aspect 2:
              precision    recall  f1-score   support

           0     0.6667    0.4444    0.5333         9
           1     0.3571    0.7692    0.4878        13
           2     0.6087    0.7568    0.6747        37
           3     0.5294    0.4821    0.5047        56
           4     0.7115    0.5873    0.6435        63
           5     0.6158    0.5896

In [26]:
test_text = """Khoảnh khắc Quang Hùng MasterD "flex" sự học tiếng Hàn cấp tốc để giao lưu với anh Long. 
Fan boy này đi đu idol thành công quá rồi 🤣🤣 
Video: Phạm Thanh Hoa 
Gọi ộp pa thì đội tóc giả nữa a kkk"""


In [27]:
def predict(text):
    model.eval()
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=256, padding='max_length')
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model_token(**inputs)
        last_hidden = outputs.last_hidden_state  # shape (1, seq_len, 768)

        # Truyền toàn bộ chuỗi token embeddings vào model (model đã có LSTM)
        out_a1, out_a2, out_s1, out_s2 = model(last_hidden)

        pred_a1 = torch.argmax(out_a1, dim=1).cpu().item()
        pred_a2 = torch.argmax(out_a2, dim=1).cpu().item()
        pred_s1 = torch.argmax(out_s1, dim=1).cpu().item()
        pred_s2 = torch.argmax(out_s2, dim=1).cpu().item()

        aspect_1_label = aspect_encoder.inverse_transform([pred_a1])[0]
        aspect_2_label = aspect_encoder.inverse_transform([pred_a2])[0]
        sentiment_1_label = sentiment_encoder.inverse_transform([pred_s1])[0]
        sentiment_2_label = sentiment_encoder.inverse_transform([pred_s2])[0]

        print("=== PREDICTION RESULT ===")
        print(f"Aspect 1:    {aspect_1_label}")
        print(f"Sentiment 1: {sentiment_1_label}")
        print(f"Aspect 2:    {aspect_2_label}")
        print(f"Sentiment 2: {sentiment_2_label}")


In [28]:
predict(test_text)

=== PREDICTION RESULT ===
Aspect 1:    Art
Sentiment 1: positive
Aspect 2:    null
Sentiment 2: null


In [29]:
import torch
import joblib
import os

SAVE_DIR = "saved_model"

tokenizer.save_pretrained(SAVE_DIR)
model_token.save_pretrained(SAVE_DIR)

torch.save(model.state_dict(), "classifier.pt")

model_config = {
    "input_dim": 1024,
    "lstm_hidden_dim": 256,
    "lstm_layers": 2,
    "hidden_dims": [1024, 512, 256, 128],
    "num_aspects": num_aspects,
    "num_sentiments": num_sentiments,
    "task_specific_dims": [128, 64, 32]
}

import json
with open("classifier_config.json", "w") as f:
    json.dump(model_config, f)
