In [1]:
import warnings
import logging
import os
warnings.filterwarnings("ignore")
os.environ["TOKENIZERS_PARALLELISM"] = "false"
logging.getLogger("transformers.tokenization_utils_base").setLevel(logging.ERROR)

# Import Library

In [2]:
import os, sys, random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.amp import autocast, GradScaler
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score
from transformers import AutoTokenizer, AutoModel
from tqdm.auto import tqdm

# Define Config & Prompt

In [3]:
# ====================CONFIG======================
import random, numpy as np, torch
import unicodedata, re

SEED = 42
BASE_MODEL     = "vinai/phobert-base"
MAX_LEN        = 256
BATCH_SIZE     = 64
NUM_EPOCHS     = 8
LABEL_SMOOTH   = 0.1
CLIP_GRAD_NORM = 1.0
WEIGHT_DECAY   = 3e-4
LR_BACKBONE    = 1e-5
LR_HEAD        = 1e-4
NEG_PER_SENT   = 2

ASPECTS = ["giang_vien", "chuong_trinh", "co_so_vat_chat", "khac"]
ASPECTS_VI = ["giang_vien", "chuong_trinh", "co_so_vat_chat", "khac"]
ASPECTS_EN = ["lecturer", "training_program", "facility", "others"]


def _norm_store(s: str) -> str:
    s = unicodedata.normalize("NFC", str(s)).strip()
    s = re.sub(r"\s+", " ", s)
    return s


def _norm_match(s: str) -> str:
    s = s.lower()
    s = unicodedata.normalize("NFD", s)
    return "".join(ch for ch in s if unicodedata.category(ch) != "Mn")


def _no_diacritics_set(kws: set) -> set:
    return kws | {_norm_match(k) for k in kws}

# ===== PROMPTS =====
ASPECT_PROMPTS = {
    "giang_vien": {
        "_default": (
            "ƒê√ÅNH GI√Å ph·∫ßn li√™n quan GI·∫¢NG VI√äN (gi·∫£ng d·∫°y, th√°i ƒë·ªô, h·ªó tr·ª£, ch·∫•m ƒëi·ªÉm, ƒë√∫ng gi·ªù). N·∫øu c√¢u kh√¥ng nh·∫Øc r√µ ƒë·∫øn GI·∫¢NG VI√äN -> NONE. M·ªói aspect ƒë√°nh gi√° ƒë·ªôc l·∫≠p (v√≠ d·ª•: gi·∫£ng vi√™n ƒëi d·∫°y tr·ªÖ nh∆∞ng m·∫°ng wifi t·ªët -> gi·∫£ng vi√™n NEGATIVE, c∆° s·ªü v·∫≠t ch·∫•t POSITIVE). NEGATIVE khi ph√†n n√†n tr·ªÖ, kh√≥ hi·ªÉu, thi·∫øu h·ªó tr·ª£; POSITIVE khi ƒë∆∞·ª£c khen ƒë√∫ng gi·ªù, nhi·ªát t√¨nh, d·ªÖ hi·ªÉu; kh√¥ng r√µ -> NEUTRAL."
        ),
        "giang_day": (
            "ƒê√ÅNH GI√Å GI·∫¢NG D·∫†Y c·ªßa GI·∫¢NG VI√äN. N·∫øu c√¢u kh√¥ng n√≥i v·ªÅ b√†i gi·∫£ng, c√°ch truy·ªÅn ƒë·∫°t, ph∆∞∆°ng ph√°p -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi kh√≥ hi·ªÉu, qu√° nhanh/ch·∫≠m, thi·∫øu v√≠ d·ª•; POSITIVE khi d·ªÖ hi·ªÉu, nhi·ªÅu v√≠ d·ª•, r√µ r√†ng; kh√¥ng r√µ -> NEUTRAL."
        ),
        "dung_gio": (
            "ƒê√ÅNH GI√Å ƒê√öNG GI·ªú c·ªßa GI·∫¢NG VI√äN. N·∫øu c√¢u kh√¥ng nh·∫Øc vi·ªác v√†o l·ªõp, b·∫Øt ƒë·∫ßu/k·∫øt th√∫c ti·∫øt -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi tr·ªÖ, b·ªè ti·∫øt; POSITIVE khi ƒë√∫ng gi·ªù, gi·ªØ l·ªãch; kh√¥ng r√µ -> NEUTRAL."
        ),
        "ho_tro": (
            "ƒê√ÅNH GI√Å H·ªñ TR·ª¢/T∆Ø V·∫§N c·ªßa GI·∫¢NG VI√äN. N·∫øu c√¢u kh√¥ng nh·∫Øc h·ªó tr·ª£, ph·∫£n h·ªìi, gi·∫£i ƒë√°p -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi ch·∫≠m ph·∫£n h·ªìi, kh√¥ng gi√∫p; POSITIVE khi nhi·ªát t√¨nh, ph·∫£n h·ªìi nhanh; kh√¥ng r√µ -> NEUTRAL."
        ),
        "cham_diem": (
            "ƒê√ÅNH GI√Å CH·∫§M ƒêI·ªÇM c·ªßa GI·∫¢NG VI√äN. N·∫øu c√¢u kh√¥ng n√≥i ƒëi·ªÉm, rubric, ph√∫c kh·∫£o -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi kh√¥ng c√¥ng b·∫±ng, kh√≥ hi·ªÉu; POSITIVE khi minh b·∫°ch, c√¥ng b·∫±ng; kh√¥ng r√µ -> NEUTRAL."
        ),
        "thai_do": (
            "ƒê√ÅNH GI√Å TH√ÅI ƒê·ªò/T√ÅC PHONG c·ªßa GI·∫¢NG VI√äN. N·∫øu c√¢u kh√¥ng nh·∫Øc th√°i ƒë·ªô, giao ti·∫øp -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi th√¥ l·ªó, thi·∫øu t√¥n tr·ªçng; POSITIVE khi th√¢n thi·ªán, t√¥n tr·ªçng; kh√¥ng r√µ -> NEUTRAL."
        ),
    },
    "chuong_trinh": {
        "_default": (
            "ƒê√ÅNH GI√Å CH∆Ø∆†NG TR√åNH ƒê√ÄO T·∫†O (m√¥n h·ªçc, t√≠n ch·ªâ, n·ªôi dung, l·ªô tr√¨nh, l·ªãch). N·∫øu c√¢u kh√¥ng nh·∫Øc r√µ ƒë·∫øn ch∆∞∆°ng tr√¨nh -> NONE. M·ªói aspect ƒë√°nh gi√° ƒë·ªôc l·∫≠p (v√≠ d·ª•: l·ªãch h·ªçc d√†y nh∆∞ng gi·∫£ng vi√™n h·ªó tr·ª£ t·ªët -> ch∆∞∆°ng tr√¨nh NEGATIVE, gi·∫£ng vi√™n POSITIVE). NEGATIVE khi qu√° t·∫£i, l·∫°c h·∫≠u, tr√πng l·∫∑p; POSITIVE khi h·ª£p l√Ω, c·∫≠p nh·∫≠t, th·ª±c t·∫ø; kh√¥ng r√µ -> NEUTRAL."
        ),
        "noi_dung": (
            "ƒê√ÅNH GI√Å N·ªòI DUNG CH∆Ø∆†NG TR√åNH. N·∫øu c√¢u kh√¥ng n√≥i n·ªôi dung m√¥n, h·ªçc li·ªáu, l·ªô tr√¨nh -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi l·∫°c h·∫≠u, tr√πng l·∫∑p, thi·∫øu th·ª±c t·∫ø; POSITIVE khi c·∫≠p nh·∫≠t, h·ªØu √≠ch; kh√¥ng r√µ -> NEUTRAL."
        ),
        "lich_hoc": (
            "ƒê√ÅNH GI√Å L·ªäCH H·ªåC/K·∫æ HO·∫†CH. N·∫øu c√¢u kh√¥ng nh·∫Øc l·ªãch, th·ªùi kh√≥a bi·ªÉu, x·∫øp ca -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi d·ªìn d·∫≠p, tr√πng l·ªãch, ƒë·ªïi l·ªãch li√™n t·ª•c; POSITIVE khi r√µ r√†ng, h·ª£p l√Ω; kh√¥ng r√µ -> NEUTRAL."
        ),
        "tin_chi": (
            "ƒê√ÅNH GI√Å T√çN CH·ªà/H·ªåC PH·∫¶N. N·∫øu c√¢u kh√¥ng n√≥i t√≠n ch·ªâ, ƒëƒÉng k√Ω h·ªçc ph·∫ßn, ti√™n quy·∫øt -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi b·∫•t h·ª£p l√Ω, kh√≥ ƒëƒÉng k√Ω; POSITIVE khi ph√¢n b·ªï h·ª£p l√Ω, d·ªÖ ƒëƒÉng k√Ω; kh√¥ng r√µ -> NEUTRAL."
        ),
        "de_cuong": (
            "ƒê√ÅNH GI√Å ƒê·ªÄ C∆Ø∆†NG/GI√ÅO TR√åNH. N·∫øu c√¢u kh√¥ng nh·∫Øc ƒë·ªÅ c∆∞∆°ng, t√†i li·ªáu, rubric -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi thi·∫øu r√µ r√†ng, thi·∫øu t√†i li·ªáu; POSITIVE khi ƒë·∫ßy ƒë·ªß, minh b·∫°ch; kh√¥ng r√µ -> NEUTRAL."
        ),
    },
    "co_so_vat_chat": {
        "_default": (
            "ƒê√ÅNH GI√Å C∆† S·ªû V·∫¨T CH·∫§T (m·∫°ng, ph√≤ng h·ªçc, ph√≤ng th√≠ nghi·ªám, thi·∫øt b·ªã, th∆∞ vi·ªán, g·ª≠i xe, v·ªá sinh, c·ªïng ƒë√†o t·∫°o). N·∫øu c√¢u kh√¥ng nh·∫Øc r√µ ƒë·∫øn c∆° s·ªü v·∫≠t ch·∫•t -> NONE. M·ªói aspect ƒë√°nh gi√° ƒë·ªôc l·∫≠p (v√≠ d·ª•: ph√≤ng h·ªçc n√≥ng nh∆∞ng th·∫ßy c√¥ d·∫°y d·ªÖ hi·ªÉu -> c∆° s·ªü v·∫≠t ch·∫•t NEGATIVE, gi·∫£ng vi√™n POSITIVE). NEGATIVE khi ph√†n n√†n h·ªèng, thi·∫øu, b·∫©n; POSITIVE khi khen ƒë·∫ßy ƒë·ªß, s·∫°ch, hi·ªán ƒë·∫°i; kh√¥ng r√µ -> NEUTRAL."
        ),
        "mang": (
            "ƒê√ÅNH GI√Å M·∫†NG/WI-FI. N·∫øu c√¢u kh√¥ng n√≥i m·∫°ng, wifi, internet -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi ch·∫≠m, r·ªõt k·∫øt n·ªëi; POSITIVE khi nhanh, ·ªïn ƒë·ªãnh; kh√¥ng r√µ -> NEUTRAL."
        ),
        "phong_hoc": (
            "ƒê√ÅNH GI√Å PH√íNG H·ªåC. N·∫øu c√¢u kh√¥ng n√≥i ph√≤ng h·ªçc, b√†n gh·∫ø, ƒëi·ªÅu h√≤a, ti·∫øng ·ªìn -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi n√≥ng, ·ªìn, xu·ªëng c·∫•p; POSITIVE khi m√°t, s·∫°ch, ƒë·ªß ti·ªán nghi; kh√¥ng r√µ -> NEUTRAL."
        ),
        "phong_thi_nghiem": (
            "ƒê√ÅNH GI√Å PH√íNG TH√ç NGHI·ªÜM/TH·ª∞C H√ÄNH. N·∫øu c√¢u kh√¥ng nh·∫Øc lab, thi·∫øt b·ªã th·ª±c h√†nh -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi thi·∫øu m√°y, ph·∫ßn m·ªÅm l·ªói; POSITIVE khi ƒë·∫ßy ƒë·ªß, hi·ªán ƒë·∫°i; kh√¥ng r√µ -> NEUTRAL."
        ),
        "thiet_bi": (
            "ƒê√ÅNH GI√Å THI·∫æT B·ªä GI·∫¢NG D·∫†Y. N·∫øu c√¢u kh√¥ng n√≥i m√°y chi·∫øu, micro, loa, TV -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi h·ªèng, √¢m k√©m; POSITIVE khi ho·∫°t ƒë·ªông t·ªët, r√µ r√†ng; kh√¥ng r√µ -> NEUTRAL."
        ),
        "thu_vien": (
            "ƒê√ÅNH GI√Å TH∆Ø VI·ªÜN. N·∫øu c√¢u kh√¥ng nh·∫Øc th∆∞ vi·ªán, t√†i li·ªáu, ch·ªó ng·ªìi -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi thi·∫øu t√†i li·ªáu, ch·∫≠t, ·ªìn; POSITIVE khi phong ph√∫, y√™n tƒ©nh; kh√¥ng r√µ -> NEUTRAL."
        ),
        "giu_xe_ve_sinh": (
            "ƒê√ÅNH GI√Å GI·ªÆ XE/NH√Ä V·ªÜ SINH. N·∫øu c√¢u kh√¥ng n√≥i g·ª≠i xe ho·∫∑c nh√† v·ªá sinh -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi b·∫©n, ƒë·∫Øt, m√πi kh√≥ ch·ªãu; POSITIVE khi s·∫°ch, thu·∫≠n ti·ªán; kh√¥ng r√µ -> NEUTRAL."
        ),
        "cong_quan_ly_dao_tao": (
            "ƒê√ÅNH GI√Å C·ªîNG/TRANG QU·∫¢N L√ù ƒê√ÄO T·∫†O. N·∫øu c√¢u kh√¥ng nh·∫Øc c·ªïng ƒë√†o t·∫°o, ƒëƒÉng nh·∫≠p, tra c·ª©u -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi qu√° t·∫£i, treo, kh√≥ d√πng; POSITIVE khi ·ªïn ƒë·ªãnh, d·ªÖ d√πng; kh√¥ng r√µ -> NEUTRAL."
        ),
    },
    "khac": {
        "_default": (
            "ƒê√ÅNH GI√Å NH√ìM KH√ÅC (h·ªçc ph√≠, h·ªçc b·ªïng, h√†nh ch√≠nh, CLB, KTX, m·ªôt c·ª≠a, ƒëƒÉng k√Ω t√≠n ch·ªâ, ƒëi·ªÉm r√®n luy·ªán). N·∫øu c√¢u kh√¥ng nh·∫Øc r√µ ƒë·∫øn nh√≥m n√†y -> NONE. M·ªói aspect ƒë√°nh gi√° ƒë·ªôc l·∫≠p (v√≠ d·ª•: h·ªçc ph√≠ tƒÉng nh∆∞ng ph√≤ng h·ªçc t·ªët -> nh√≥m kh√°c NEGATIVE, c∆° s·ªü v·∫≠t ch·∫•t POSITIVE). NEGATIVE khi ph√†n n√†n kh√≥ khƒÉn, ch·∫≠m tr·ªÖ; POSITIVE khi khen r√µ r√†ng, nhanh ch√≥ng; kh√¥ng r√µ -> NEUTRAL."
        ),
        "hoc_phi": (
            "ƒê√ÅNH GI√Å H·ªåC PH√ç. N·∫øu c√¢u kh√¥ng nh·∫Øc h·ªçc ph√≠, m·ª©c thu, ƒë√≥ng ti·ªÅn -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi ƒë·∫Øt, tƒÉng, thi·∫øu minh b·∫°ch; POSITIVE khi h·ª£p l√Ω, minh b·∫°ch; kh√¥ng r√µ -> NEUTRAL."
        ),
        "hoc_bong": (
            "ƒê√ÅNH GI√Å H·ªåC B·ªîNG. N·∫øu c√¢u kh√¥ng n√≥i ti√™u ch√≠, quy tr√¨nh, k·∫øt qu·∫£ h·ªçc b·ªïng -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi kh√≥, ch·∫≠m, kh√¥ng r√µ; POSITIVE khi d·ªÖ, minh b·∫°ch, k·ªãp th·ªùi; kh√¥ng r√µ -> NEUTRAL."
        ),
        "hanh_chinh": (
            "ƒê√ÅNH GI√Å TH·ª¶ T·ª§C H√ÄNH CH√çNH/CTSV. N·∫øu c√¢u kh√¥ng nh·∫Øc h·ªì s∆°, gi·∫•y t·ªù, x·ª≠ l√Ω -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi r∆∞·ªùm r√†, ch·∫≠m, thi·∫øu ph·∫£n h·ªìi; POSITIVE khi nhanh, r√µ r√†ng; kh√¥ng r√µ -> NEUTRAL."
        ),
        "clb": (
            "ƒê√ÅNH GI√Å CLB/HO·∫†T ƒê·ªòNG NGO·∫†I KH√ìA. N·∫øu c√¢u kh√¥ng n√≥i CLB, s·ª± ki·ªán, ho·∫°t ƒë·ªông sinh vi√™n -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi √≠t ho·∫°t ƒë·ªông, thi·∫øu h·∫•p d·∫´n; POSITIVE khi s√¥i n·ªïi, h·ªØu √≠ch; kh√¥ng r√µ -> NEUTRAL."
        ),
        "ktx": (
            "ƒê√ÅNH GI√Å K√ù T√öC X√Å. N·∫øu c√¢u kh√¥ng nh·∫Øc ph√≤ng KTX, an ninh, ƒëi·ªán n∆∞·ªõc -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi ch·∫≠t, m·∫•t an ninh, thi·∫øu ƒëi·ªán n∆∞·ªõc; POSITIVE khi s·∫°ch, an to√†n, ƒë·∫ßy ƒë·ªß; kh√¥ng r√µ -> NEUTRAL."
        ),
        "mot_cua": (
            "ƒê√ÅNH GI√Å VƒÇN PH√íNG M·ªòT C·ª¨A. N·∫øu c√¢u kh√¥ng nh·∫Øc m·ªôt c·ª≠a, ti·∫øp nh·∫≠n, tr·∫£ k·∫øt qu·∫£ -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi ch·ªù l√¢u, ƒë√¥ng, x·ª≠ l√Ω ch·∫≠m; POSITIVE khi nhanh, r√µ r√†ng; kh√¥ng r√µ -> NEUTRAL."
        ),
        "dang_ky_tin": (
            "ƒê√ÅNH GI√Å ƒêƒÇNG K√ù T√çN CH·ªà. N·∫øu c√¢u kh√¥ng n√≥i ƒëƒÉng k√Ω m√¥n, h·ªá th·ªëng ƒëƒÉng k√Ω -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi qu√° t·∫£i, l·ªói, kh√≥ d√πng; POSITIVE khi ·ªïn ƒë·ªãnh, d·ªÖ d√πng; kh√¥ng r√µ -> NEUTRAL."
        ),
        "diem_ren_luyen": (
            "ƒê√ÅNH GI√Å ƒêI·ªÇM R√àN LUY·ªÜN. N·∫øu c√¢u kh√¥ng nh·∫Øc DRL, minh ch·ª©ng, quy tr√¨nh -> NONE. M·ªói aspect ƒë·ªôc l·∫≠p. NEGATIVE khi kh√≥, kh√¥ng c√¥ng b·∫±ng; POSITIVE khi r√µ r√†ng, c√¥ng b·∫±ng; kh√¥ng r√µ -> NEUTRAL."
        ),
    },
}

SUBTOPIC_KW = {
    "giang_vien": {
        "dung_gio": _no_diacritics_set({
            "ƒëi d·∫°y","l√™n l·ªõp","v√†o l·ªõp","b·∫Øt ƒë·∫ßu ti·∫øt","k·∫øt th√∫c ti·∫øt",
            "gi·∫£ng vi√™n","gi√°o vi√™n","th·∫ßy gi√°o","c√¥ gi√°o","th·∫ßy c√¥",
            "gi·∫£ng vi√™n ƒëi d·∫°y","gi·∫£ng vi√™n l√™n l·ªõp","gi·∫£ng vi√™n v√†o l·ªõp",
            "th·∫ßy ƒëi d·∫°y","c√¥ ƒëi d·∫°y","th·∫ßy l√™n l·ªõp","c√¥ l√™n l·ªõp"
        }),
        "cham_diem": _no_diacritics_set({
            "ch·∫•m ƒëi·ªÉm","thang ƒëi·ªÉm","ƒëi·ªÉm thi","ƒëi·ªÉm th√†nh ph·∫ßn","ƒëi·ªÉm t·ªïng k·∫øt","ph√∫c kh·∫£o",
            "ƒëi·ªÉm gi·ªØa k·ª≥","ƒëi·ªÉm cu·ªëi k·ª≥","ƒëi·ªÉm nh√≥m","ƒëi·ªÉm c√° nh√¢n","ƒëi·ªÉm bonus",
            "ƒëi·ªÉm chuy√™n c·∫ßn","ƒëi·ªÉm chuy√™n ƒë·ªÅ","rubric","grading",
            "gi·∫£ng vi√™n","gi√°o vi√™n","th·∫ßy gi√°o","c√¥ gi√°o",
            "gi·∫£ng vi√™n ch·∫•m ƒëi·ªÉm","th·∫ßy ch·∫•m ƒëi·ªÉm","c√¥ ch·∫•m ƒëi·ªÉm","gi√°o vi√™n ch·∫•m ƒëi·ªÉm",
            "th·∫ßy gi√°o ch·∫•m ƒëi·ªÉm","c√¥ gi√°o ch·∫•m ƒëi·ªÉm"
        }),
        "ho_tro": _no_diacritics_set({
            "t∆∞ v·∫•n h·ªçc t·∫≠p","gi·∫£i ƒë√°p h·ªçc t·∫≠p","ph·∫£n h·ªìi h·ªçc t·∫≠p","cvht",
            "c·ªë v·∫•n h·ªçc t·∫≠p","h∆∞·ªõng d·∫´n h·ªçc t·∫≠p","trao ƒë·ªïi h·ªçc t·∫≠p","h·ªèi ƒë√°p h·ªçc t·∫≠p",
            "t∆∞ v·∫•n sinh vi√™n","gi·∫£i ƒë√°p sinh vi√™n","ph·∫£n h·ªìi sinh vi√™n",
            "c·ªë v·∫•n sinh vi√™n","h∆∞·ªõng d·∫´n sinh vi√™n",
            "gi·∫£ng vi√™n","gi√°o vi√™n","th·∫ßy gi√°o","c√¥ gi√°o",
            "gi·∫£ng vi√™n t∆∞ v·∫•n","gi·∫£ng vi√™n h∆∞·ªõng d·∫´n","gi·∫£ng vi√™n gi·∫£i ƒë√°p",
            "th·∫ßy t∆∞ v·∫•n","c√¥ t∆∞ v·∫•n","th·∫ßy h∆∞·ªõng d·∫´n","c√¥ h∆∞·ªõng d·∫´n",
            "th·∫ßy gi√°o t∆∞ v·∫•n","c√¥ gi√°o t∆∞ v·∫•n"
        }),
        "thai_do": _no_diacritics_set({
            "th√°i ƒë·ªô","·ª©ng x·ª≠","t√°c phong","phong th√°i","giao ti·∫øp","c√°ch n√≥i",
            "ng·ªØ ƒëi·ªáu","h√†nh vi","c·ª≠ ch·ªâ","c√°ch c∆∞ x·ª≠","th√°i ƒë·ªô gi·∫£ng vi√™n",
            "phong c√°ch","t∆∞∆°ng t√°c","th√°i ƒë·ªô l·ªõp","ng√¥n ng·ªØ c∆° th·ªÉ",
            "gi·∫£ng vi√™n","gi√°o vi√™n","th·∫ßy gi√°o","c√¥ gi√°o",
            "th√°i ƒë·ªô th·∫ßy","th√°i ƒë·ªô c√¥","th√°i ƒë·ªô gi√°o vi√™n",
            "th·∫ßy gi√°o th√°i ƒë·ªô","c√¥ gi√°o th√°i ƒë·ªô","gi·∫£ng vi√™n th√°i ƒë·ªô"
        }),
        "giang_day": _no_diacritics_set({
            "gi·∫£ng d·∫°y","truy·ªÅn ƒë·∫°t","di·ªÖn ƒë·∫°t","v√≠ d·ª•","b√†i gi·∫£ng","slide","ghi ch√∫",
            "√¥n t·∫≠p","b√†i h·ªçc","ph∆∞∆°ng ph√°p","th·ª±c h√†nh","l√Ω thuy·∫øt",
            "th·∫£o lu·∫≠n","minh h·ªça","slide gi·∫£ng","slide b√†i","gi·∫£i th√≠ch","phong c√°ch gi·∫£ng",
            "gi·∫£ng vi√™n","gi√°o vi√™n","th·∫ßy gi√°o","c√¥ gi√°o",
            "gi·∫£ng vi√™n gi·∫£ng d·∫°y","th·∫ßy gi·∫£ng","c√¥ gi·∫£ng","gi√°o vi√™n gi·∫£ng d·∫°y",
            "th·∫ßy gi√°o gi·∫£ng d·∫°y","c√¥ gi√°o gi·∫£ng d·∫°y"
        }),
    },

    "chuong_trinh": {
        "lich_hoc": _no_diacritics_set({
            "l·ªãch h·ªçc","th·ªùi kh√≥a bi·ªÉu","th·ªùi kho√° bi·ªÉu","k·∫ø ho·∫°ch h·ªçc t·∫≠p","x·∫øp l·ªãch","tr√πng l·ªãch",
            "ƒë·ªïi l·ªãch","l·ªãch thi","l·ªãch h·ªçc th√™m","ca t·ªëi","online","offline","ca s√°ng",
            "ca chi·ªÅu","h·ªçc b√π","thi d·ªìn","thi li√™n t·ª•c","x·∫øp ca","th·ªùi gian h·ªçc","l·ªãch ki·ªÉm tra"
        }),
        "tin_chi": _no_diacritics_set({
            "t√≠n ch·ªâ","h·ªçc ph·∫ßn","ti√™n quy·∫øt","song h√†nh","ƒëƒÉng k√Ω h·ªçc ph·∫ßn","n·ª£ m√¥n",
            "ƒë·ªß t√≠n","s·ªë t√≠n","kh·ªëi l∆∞·ª£ng h·ªçc","ƒëi·ªÅu ki·ªán h·ªçc ph·∫ßn","m√£ m√¥n","t·∫£i h·ªçc",
            "ph√¢n b·ªï h·ªçc ph·∫ßn","l·ªô tr√¨nh h·ªçc","s·ªë h·ªçc ph·∫ßn"
        }),
        "de_cuong": _no_diacritics_set({
            "ƒë·ªÅ c∆∞∆°ng","syllabus","gi√°o tr√¨nh","t√†i li·ªáu b·∫Øt bu·ªôc m√¥n h·ªçc","t√†i li·ªáu tham kh·∫£o m√¥n h·ªçc",
            "m·ª•c ti√™u h·ªçc ph·∫ßn","k·∫ø ho·∫°ch m√¥n","outline","k·∫ø ho·∫°ch gi·∫£ng d·∫°y","ph√¢n b·ªï ƒëi·ªÉm",
            "t√†i li·ªáu h·ªçc m√¥n h·ªçc","h∆∞·ªõng d·∫´n m√¥n h·ªçc","ph√¢n ph·ªëi ch∆∞∆°ng tr√¨nh","khung ƒëi·ªÉm m√¥n h·ªçc","thang ƒë√°nh gi√° m√¥n h·ªçc"
        }),
        "noi_dung": _no_diacritics_set({
            "n·ªôi dung","th·ª±c t·∫ø","th·ª±c ti·ªÖn","l·ªô tr√¨nh","khung ch∆∞∆°ng tr√¨nh",
            "c·∫≠p nh·∫≠t","ƒë·ªãnh h∆∞·ªõng ngh·ªÅ","ki·∫øn th·ª©c","module",
            "chuy√™n ƒë·ªÅ","c·∫•u tr√∫c m√¥n","ch∆∞∆°ng tr√¨nh h·ªçc","ƒë·ªÅ m·ª•c","m√¥n h·ªçc","h·ªçc li·ªáu"
        }),
    },

    "co_so_vat_chat": {
        "mang": _no_diacritics_set({
            "m·∫°ng wifi","wifi","wi-fi","wi fi","ƒëƒÉng nh·∫≠p wifi", "ping wifi","bƒÉng th√¥ng wifi","wifi tr∆∞·ªùng"
        }),
        "phong_hoc": _no_diacritics_set({
            "ph√≤ng h·ªçc","√°nh s√°ng","ƒë√®n ph√≤ng h·ªçc","m√°y l·∫°nh","ƒëi·ªÅu h√≤a","ƒëi·ªÅu ho√†","qu·∫°t",
            "b√†n gh·∫ø ph√≤ng h·ªçc","·ªï ƒëi·ªán ph√≤ng h·ªçc","·ªï c·∫Øm ph√≤ng h·ªçc","c√°ch √¢m","s√†n nh√†","r√®m c·ª≠a","tr·∫ßn nh√†","b·∫£ng vi·∫øt"
        }),
        "phong_thi_nghiem": _no_diacritics_set({
            "ph√≤ng th√≠ nghi·ªám","ph√≤ng th·ª±c h√†nh","lab","ph√≤ng lab","m√°y th·ª±c h√†nh",
            "c√†i ph·∫ßn m·ªÅm","thi·∫øt b·ªã th√≠ nghi·ªám","d·ª•ng c·ª• lab","ph√≤ng m√°y","thi·∫øt b·ªã lab"
        }),
        "thiet_bi": _no_diacritics_set({
            "m√°y chi·∫øu","micro","mic","loa","√¢m thanh","tivi","c√°p","hdmi","adapter",
            "thi·∫øt b·ªã gi·∫£ng d·∫°y","m√°y quay","camera l·ªõp","loa bluetooth","√¢m l∆∞·ª£ng",
            "ƒë·∫ßu n·ªëi","b·ªô chia","thi·∫øt b·ªã ph√≤ng h·ªçc","tv ph√≤ng h·ªçc"
        }),
        "thu_vien": _no_diacritics_set({
            "th∆∞ vi·ªán","m∆∞·ª£n s√°ch","tr·∫£ s√°ch","t√†i li·ªáu s·ªë th∆∞ vi·ªán","ebook th∆∞ vi·ªán","ch·ªó ng·ªìi th∆∞ vi·ªán",
            "b√†n ƒë·ªçc","y√™n tƒ©nh th∆∞ vi·ªán","gi·ªù m·ªü c·ª≠a th∆∞ vi·ªán","m∆∞·ª£n gi√°o tr√¨nh","tra c·ª©u s√°ch","wifi th∆∞ vi·ªán",
            "tra c·ª©u th∆∞ vi·ªán","k·ªá s√°ch","t√†i nguy√™n s·ªë th∆∞ vi·ªán","khu ƒë·ªçc","m∆∞·ª£n t√†i li·ªáu th∆∞ vi·ªán","m∆∞·ª£n thi·∫øt b·ªã th∆∞ vi·ªán",
            "t√†i li·ªáu th∆∞ vi·ªán","t√†i li·ªáu m∆∞·ª£n th∆∞ vi·ªán","s√°ch th∆∞ vi·ªán"
        }),
        "giu_xe_ve_sinh": _no_diacritics_set({
            "b√£i gi·ªØ xe","nh√† gi·ªØ xe","g·ª≠i xe","th·∫ª xe","qu·∫πt th·∫ª","ph√≠ g·ª≠i xe",
            "nh√† v·ªá sinh","toilet","gi·∫•y v·ªá sinh","n∆∞·ªõc r·ª≠a tay",
            "·ªëng n∆∞·ªõc nh√† v·ªá sinh","c·ªëng tho√°t nh√† v·ªá sinh","s√†n nh√† v·ªá sinh","wc nh√† v·ªá sinh"
        }),
        "cong_quan_ly_dao_tao": _no_diacritics_set({
            "trang qu·∫£n l√Ω ƒë√†o t·∫°o","c·ªïng ƒë√†o t·∫°o","h·ªá th·ªëng ƒë√†o t·∫°o","portal","c·ªïng th√¥ng tin",
            "ƒëƒÉng nh·∫≠p c·ªïng ƒë√†o t·∫°o","qu√™n m·∫≠t kh·∫©u","reset m·∫≠t kh·∫©u","qu√° t·∫£i","treo","tra c·ª©u ƒëi·ªÉm",
            "web ƒë√†o t·∫°o","c·ªïng sinh vi√™n","h·ªá th·ªëng online","trang web ƒë√†o t·∫°o"
        }),
    },

    "khac": {
        "hoc_phi": _no_diacritics_set({
            "h·ªçc ph√≠","thu th√™m","bi√™n lai","mi·ªÖn gi·∫£m","ch√≠nh s√°ch h·ªçc ph√≠","c√¥ng khai h·ªçc ph√≠",
            "ƒë√≥ng ti·ªÅn","n·ªôp h·ªçc ph√≠","thu ti·ªÅn","ho√° ƒë∆°n h·ªçc ph√≠","ch√≠nh s√°ch","phi·∫øu thu","bi√™n nh·∫≠n",
            "ƒë√≥ng h·ªçc","n·ªôp l·ªá ph√≠","ph√≠ h·ªçc","thanh to√°n h·ªçc ph√≠","bi√™n lai h·ªçc ph√≠","phi·∫øu thu h·ªçc ph√≠"
        }),
        "hoc_bong": _no_diacritics_set({
            "h·ªçc b·ªïng","h·ªçc b·ªïng kkht","ti√™u ch√≠ h·ªçc b·ªïng","ƒëi·ªÉm chu·∫©n h·ªçc b·ªïng",
            "n·ªôp h·ªì s∆° h·ªçc b·ªïng","k·∫øt qu·∫£ h·ªçc b·ªïng","tr·ªÖ h·∫°n h·ªçc b·ªïng","x√©t h·ªçc b·ªïng",
            "ƒëi·ªÅu ki·ªán h·ªçc b·ªïng","qu·ªπ h·ªçc b·ªïng","th√¥ng b√°o h·ªçc b·ªïng","h·ªì s∆° h·ªçc b·ªïng","ƒëi·ªÉm x√©t"
        }),
        "hanh_chinh": _no_diacritics_set({
            "th·ªß t·ª•c h√†nh ch√≠nh","h√†nh ch√≠nh","gi·∫•y t·ªù h√†nh ch√≠nh","ƒë√≥ng d·∫•u","x√°c nh·∫≠n sinh vi√™n","gi·∫•y x√°c nh·∫≠n",
            "ph√≤ng ctsv","ti·∫øp nh·∫≠n h·ªì s∆° h√†nh ch√≠nh","tr·∫£ k·∫øt qu·∫£ h√†nh ch√≠nh","xin gi·∫•y t·ªù h√†nh ch√≠nh","n·ªôp h·ªì s∆° h√†nh ch√≠nh","bi·ªÉu m·∫´u h√†nh ch√≠nh",
            "ph√≤ng ƒë√†o t·∫°o h√†nh ch√≠nh","ch·ª©ng nh·∫≠n h√†nh ch√≠nh","x√°c minh h√†nh ch√≠nh","gi·∫•y ph√©p h√†nh ch√≠nh","b·∫£n sao h√†nh ch√≠nh","vƒÉn th∆∞ h√†nh ch√≠nh"
        }),
        "clb": _no_diacritics_set({
            "c√¢u l·∫°c b·ªô","clb","tuy·ªÉn th√†nh vi√™n","ho·∫°t ƒë·ªông clb","ngo·∫°i kh√≥a","s·ª± ki·ªán","workshop",
            "ƒëƒÉng k√Ω clb","ƒëo√†n h·ªôi","event","team","cu·ªôc thi","ho·∫°t ƒë·ªông sv",
            "ho·∫°t ƒë·ªông ngo·∫°i kho√°","nh√≥m sinh vi√™n","s·ª± ki·ªán tr∆∞·ªùng","ƒëƒÉng k√Ω tham gia"
        }),
        "ktx": _no_diacritics_set({
            "k√Ω t√∫c x√°","k√≠ t√∫c x√°","ktx","·ªü gh√©p","ph√≤ng ktx","b·∫£o v·ªá ktx","gi·ªù gi·ªõi nghi√™m",
            "ƒëi·ªán ktx","n∆∞·ªõc ktx","khu ·ªü ktx","an ninh ktx","ph√≤ng chung ktx",
            "to√† ktx","khu v·ª±c ·ªü ktx","qu·∫£n l√Ω ktx"
        }),
        "mot_cua": _no_diacritics_set({
            "vƒÉn ph√≤ng m·ªôt c·ª≠a","vp1c","ph√≤ng m·ªôt c·ª≠a","n·ªôp h·ªì s∆° m·ªôt c·ª≠a","s·ªë th·ª© t·ª±","l·∫•y gi·∫•y m·ªôt c·ª≠a","tr·∫£ gi·∫•y m·ªôt c·ª≠a","tr·∫£ k·∫øt qu·∫£ m·ªôt c·ª≠a",
            "h·ªì s∆° m·ªôt c·ª≠a","gi·∫•y t·ªù m·ªôt c·ª≠a","s·ªë l∆∞·ª£t m·ªôt c·ª≠a","qu·∫ßy ti·∫øp nh·∫≠n m·ªôt c·ª≠a","m·ªôt c·ª≠a"
        }),
        "dang_ky_tin": _no_diacritics_set({
            "ƒëƒÉng k√Ω m√¥n","ƒëƒÉng k√Ω t√≠n ch·ªâ","ƒëk t√≠n","ƒëk m√¥n","server ƒëƒÉng k√Ω",
            "x·∫øp l·ªãch t·ª± ƒë·ªông","l·ªçc tr√πng l·ªãch","h·ªá th·ªëng ƒëƒÉng k√Ω",
            "ƒëƒÉng k√Ω online","ch·ªçn m√¥n","m·ªü l·ªõp","ƒë√≥ng l·ªõp","s·∫Øp l·ªãch","h·ªá th·ªëng ƒëƒÉng k√Ω t√≠n ch·ªâ"
        }),
        "diem_ren_luyen": _no_diacritics_set({
            "ƒëi·ªÉm r√®n luy·ªán","drl","ƒë√°nh gi√° r√®n luy·ªán","minh ch·ª©ng drl","ch·∫•m drl",
            "minh ch·ª©ng","ƒëi·ªÉm rl","b·∫£ng drl","ƒë√°nh gi√° c√° nh√¢n","ƒë√°nh gi√° t·∫≠p th·ªÉ"
        }),
    },
}


def _aspect_has_kw(aspect_vi: str, s_norm: str) -> bool:
    """Ki·ªÉm tra aspect c√≥ keyword trong sentence kh√¥ng (ch·ªâ keywords >= 3 k√Ω t·ª±)"""
    for kws in SUBTOPIC_KW.get(aspect_vi, {}).values():
        for kw in kws:
            kw_norm = _norm_match(kw)
            if len(kw_norm) >= 3 and kw_norm in s_norm:
                return True
    return False

def _pick_subprompt(aspect: str, sentence: str) -> str:
    s = _norm_match(str(sentence))
    for sub, kws in SUBTOPIC_KW.get(aspect, {}).items():
        for kw in kws:
            kw_norm = _norm_match(kw)
            if len(kw_norm) >= 3 and kw_norm in s:
                return ASPECT_PROMPTS[aspect].get(sub, ASPECT_PROMPTS[aspect]["_default"])
    return ASPECT_PROMPTS[aspect]["_default"]


In [4]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


Device: cuda


# Load Data


In [5]:
from pathlib import Path
import pandas as pd

train_df = pd.read_csv("train_data.csv")
val_df   = pd.read_csv("train.csv")
train_df["sentence"] = train_df["sentence"].apply(_norm_store).astype(str)
val_df["sentence"]   = val_df["sentence"].apply(_norm_store).astype(str)

train_df = pd.concat([train_df, val_df], ignore_index=True)
train_df = train_df.drop_duplicates(subset=["sentence"]).reset_index(drop=True)

topic_map = {v: i for i, v in enumerate(sorted(train_df["topic"].dropna().unique()))}
sent_map  = {v: i for i, v in enumerate(sorted(train_df["sentiment"].dropna().unique()))}

train_df["topic_id"]     = train_df["topic"].map(topic_map).astype(int)
train_df["sentiment_id"] = train_df["sentiment"].map(sent_map).astype(int)

print(f"‚úÖ Loaded {len(train_df)} samples")


‚úÖ Loaded 20221 samples


# Build Pairs & Dataset


In [6]:
# ===== Build train pairs =====
def build_pairs_df(base_df, neg_per_sent=1, rng_seed=42, skip_hard_negatives=True):
    """
    Build training pairs from base dataframe.

    Args:
        base_df: DataFrame with columns [sentence, topic_id, sentiment_id]
        neg_per_sent: Number of negative samples per sentence
        rng_seed: Random seed
        skip_hard_negatives: If True, t·∫°o negative pairs c√≥ ƒëi·ªÅu ki·ªán:
                            - ∆Øu ti√™n easy negatives (aspects kh√¥ng c√≥ keywords)
                            - T·∫°o m·ªôt ph·∫ßn hard negatives (t·ª∑ l·ªá th·∫•p) ƒë·ªÉ model h·ªçc reject aspects c√≥ keywords nh∆∞ng kh√¥ng li√™n quan
                            - ƒêi·ªÅu n√†y gi√∫p model h·ªçc c·∫£ reject v√† multi-topic detection
    """
    rng = random.Random(rng_seed)
    rows = []

    for _, r in base_df.iterrows():
        s = str(r["sentence"])
        t_id = int(r["topic_id"])
        y = int(r["sentiment_id"])
        t_key = ASPECTS[t_id]

        rows.append({"text": s, "aspect": t_key, "label": y + 1})
       
        others = [a for a in ASPECTS if a != t_key]

        s_norm = _norm_match(s)
        hard = [a for a in others if _aspect_has_kw(a, s_norm)]
        easy = [a for a in others if a not in hard]

        if skip_hard_negatives:
            picks = []

   
            if easy:
                n_easy = min(neg_per_sent, len(easy))
                picks.extend(rng.sample(easy, k=n_easy))

            if hard and len(picks) < neg_per_sent:
                n_hard = min(1, neg_per_sent - len(picks))
                picks.extend(rng.sample(hard, k=min(n_hard, len(hard))))
            for a in picks:
                rows.append({"text": s, "aspect": a, "label": 0})
        else:
            picks = []
            if hard:
                hard_shuf = hard[:]
                rng.shuffle(hard_shuf)
                picks.extend(hard_shuf[:neg_per_sent])
            if len(picks) < neg_per_sent and easy:
                rest = neg_per_sent - len(picks)
                picks.extend(rng.sample(easy, k=min(rest, len(easy))))

            for a in picks[:neg_per_sent]:
                rows.append({"text": s, "aspect": a, "label": 0})

    return pd.DataFrame(rows)


# ---- tokenizer ----
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)

def encode_pair(tokenizer, text, prompt, max_len):
    return tokenizer(
        prompt, text,
        truncation="only_second",
        padding="max_length",
        max_length=max_len,
        return_tensors="pt",
        return_overflowing_tokens=False
    )


# ---- dataset ----
class PairABSADataset(Dataset):
    def __init__(self, df_pairs, tokenizer, max_len=MAX_LEN):
        self.df = df_pairs.reset_index(drop=True)
        self.tok = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.df)

    def __getitem__(self, i):
        s = self.df.loc[i, "text"]
        a = self.df.loc[i, "aspect"]

        prompt = _pick_subprompt(a, s)
        y = int(self.df.loc[i, "label"])
        enc = encode_pair(self.tok, s, prompt, self.max_len)

        item = {k: v.squeeze(0) for k, v in enc.items()}
        item["labels"] = torch.tensor(y, dtype=torch.long)
        return item


# ---- dataloader ----
def make_loader(df_pairs, tokenizer, batch_size=64, max_len=MAX_LEN, shuffle=True):
    ds = PairABSADataset(df_pairs, tokenizer, max_len=max_len)
    return DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=shuffle,
    )


# Define Model & Optimizer


In [7]:
# === Model: Head 2 t·∫ßng + GELU + LayerNorm ===
class PhoBERTPairABSA(nn.Module):
    def __init__(self, base_model="vinai/phobert-base", num_cls=4, dropout=0.3):
        super().__init__()
        self.backbone = AutoModel.from_pretrained(base_model)
        hidden_size = self.backbone.config.hidden_size
        self.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(hidden_size, hidden_size),
            nn.GELU(),
            nn.LayerNorm(hidden_size),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, num_cls)
        )

    def forward(self, input_ids, attention_mask):
        out = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
        cls = out.last_hidden_state[:, 0, :]
        return self.classifier(cls)

model = PhoBERTPairABSA(base_model=BASE_MODEL, num_cls=4, dropout=0.3).to(device)
optimizer = AdamW([
    {"params": model.backbone.parameters(), "lr": LR_BACKBONE},
    {"params": model.classifier.parameters(), "lr": LR_HEAD},
], weight_decay=WEIGHT_DECAY)
scaler = GradScaler(enabled=(device.type == "cuda"))
scheduler = CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS, eta_min=LR_BACKBONE/10)


# Train

In [8]:
from sklearn.metrics import f1_score, precision_recall_fscore_support
from collections import Counter
from sklearn.utils.class_weight import compute_class_weight


model.train()
for epoch in range(1, NUM_EPOCHS + 1):
    # ====== Build TRAIN  ======
    neg_ps = 1 if epoch <= 4 else NEG_PER_SENT
    seed_ep = SEED + epoch

    train_pairs_df = build_pairs_df(
        base_df=train_df[["sentence","topic_id","sentiment_id"]],
        neg_per_sent=neg_ps,
        rng_seed=seed_ep,
        skip_hard_negatives=True 
    )
    # class weight
    labels_4 = train_pairs_df["label"].values
    counts = np.bincount(labels_4, minlength=4).astype(np.int64)
    w = compute_class_weight("balanced", classes=np.arange(4), y=labels_4).astype(np.float32)

    alpha = 0.18 if epoch <= 2 else (0.15 if epoch <= 4 else 0.12)
    w0_floor = np.max(w[1:]) * alpha
    w[0] = max(w[0], w0_floor)
    print(f"\n[Epoch {epoch}] label counts (none,neg,neu,pos) = {counts.tolist()} | "
          f"class_weight = {w.round(3).tolist()} | neg_per_sent={neg_ps}")

    criterion = nn.CrossEntropyLoss(
        weight=torch.tensor(w, dtype=torch.float32, device=device),
        label_smoothing=LABEL_SMOOTH
    )

    train_loader = make_loader(
        train_pairs_df,
        tokenizer,
        batch_size=BATCH_SIZE,
        max_len=MAX_LEN,
        shuffle=True
    )


    # ====== Train one epoch ======
    running_loss = 0.0
    y_true_all, y_pred_all = [], []

    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{NUM_EPOCHS}",
                leave=False, dynamic_ncols=True, file=sys.stdout)

    for batch in pbar:
        input_ids      = batch["input_ids"].to(device, non_blocking=True)
        attention_mask = batch["attention_mask"].to(device, non_blocking=True)
        labels         = batch["labels"].to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        with autocast(device_type="cuda", dtype=torch.float16, enabled=(device.type == "cuda")):
            logits = model(input_ids, attention_mask)
            loss   = criterion(logits, labels)

        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP_GRAD_NORM)
        scaler.step(optimizer); scaler.update()

        running_loss += loss.item()
        preds = logits.argmax(dim=1)

        y_true_all.extend(labels.cpu().tolist())
        y_pred_all.extend(preds.cpu().tolist())

        pbar.set_postfix(loss=running_loss / max(1, pbar.n))

    print("üëâ Label distribution SEEN:", Counter(y_true_all))

    # === Train metrics ===
    y_true_all = np.array(y_true_all)
    y_pred_all = np.array(y_pred_all)

    acc4 = (y_pred_all == y_true_all).mean()
    mask_pos = (y_true_all != 0)
    acc_pos3 = (y_pred_all[mask_pos] == y_true_all[mask_pos]).mean() if mask_pos.any() else 0.0
    macro_f1_pos3 = f1_score(y_true_all[mask_pos], y_pred_all[mask_pos],
                             average="macro", labels=[1,2,3]) if mask_pos.any() else 0.0

    y_true_none = (y_true_all == 0).astype(int)
    y_pred_none = (y_pred_all == 0).astype(int)
    none_prec, none_rec, none_f1, _ = precision_recall_fscore_support(
        y_true_none, y_pred_none, average="binary", zero_division=0
    )

    avg_loss = running_loss / len(train_loader)

    print(
        f"\n‚úÖ Epoch {epoch}/{NUM_EPOCHS} | "
        f"TrainLoss={avg_loss:.4f} | Acc4={acc4:.4f} | "
        f"Acc_pos3={acc_pos3:.4f} | MacroF1_pos3={macro_f1_pos3:.4f} | "
        f"None(P/R/F1)={none_prec:.3f}/{none_rec:.3f}/{none_f1:.3f}"
    )

    scheduler.step()

print("\nüéâ Training completed.")



[Epoch 1] label counts (none,neg,neu,pos) = [20221, 9045, 1501, 9675] | class_weight = [1.2120000123977661, 1.1180000305175781, 6.736000061035156, 1.0449999570846558] | neg_per_sent=1


Epoch 1/10:   0%|          | 0/632 [00:00<?, ?it/s]

üëâ Label distribution SEEN: Counter({0: 20221, 3: 9675, 1: 9045, 2: 1501})

‚úÖ Epoch 1/10 | TrainLoss=1.0096 | Acc4=0.7619 | Acc_pos3=0.6430 | MacroF1_pos3=0.6318 | None(P/R/F1)=0.825/0.881/0.852

[Epoch 2] label counts (none,neg,neu,pos) = [20221, 9045, 1501, 9675] | class_weight = [1.2120000123977661, 1.1180000305175781, 6.736000061035156, 1.0449999570846558] | neg_per_sent=1


Epoch 2/10:   0%|          | 0/632 [00:00<?, ?it/s]

üëâ Label distribution SEEN: Counter({0: 20221, 3: 9675, 1: 9045, 2: 1501})

‚úÖ Epoch 2/10 | TrainLoss=0.8186 | Acc4=0.8717 | Acc_pos3=0.8193 | MacroF1_pos3=0.8095 | None(P/R/F1)=0.894/0.924/0.909

[Epoch 3] label counts (none,neg,neu,pos) = [20221, 9045, 1501, 9675] | class_weight = [1.0099999904632568, 1.1180000305175781, 6.736000061035156, 1.0449999570846558] | neg_per_sent=1


Epoch 3/10:   0%|          | 0/632 [00:00<?, ?it/s]

üëâ Label distribution SEEN: Counter({0: 20221, 3: 9675, 1: 9045, 2: 1501})

‚úÖ Epoch 3/10 | TrainLoss=0.7882 | Acc4=0.9001 | Acc_pos3=0.8750 | MacroF1_pos3=0.8558 | None(P/R/F1)=0.932/0.925/0.929

[Epoch 4] label counts (none,neg,neu,pos) = [20221, 9045, 1501, 9675] | class_weight = [1.0099999904632568, 1.1180000305175781, 6.736000061035156, 1.0449999570846558] | neg_per_sent=1


Epoch 4/10:   0%|          | 0/632 [00:00<?, ?it/s]

üëâ Label distribution SEEN: Counter({0: 20221, 3: 9675, 1: 9045, 2: 1501})

‚úÖ Epoch 4/10 | TrainLoss=0.7506 | Acc4=0.9194 | Acc_pos3=0.9024 | MacroF1_pos3=0.8892 | None(P/R/F1)=0.946/0.937/0.941

[Epoch 5] label counts (none,neg,neu,pos) = [40437, 9045, 1501, 9675] | class_weight = [1.2120000123977661, 1.6770000457763672, 10.102999687194824, 1.5670000314712524] | neg_per_sent=2


Epoch 5/10:   0%|          | 0/948 [00:00<?, ?it/s]

üëâ Label distribution SEEN: Counter({0: 40437, 3: 9675, 1: 9045, 2: 1501})

‚úÖ Epoch 5/10 | TrainLoss=0.8218 | Acc4=0.9327 | Acc_pos3=0.9016 | MacroF1_pos3=0.9015 | None(P/R/F1)=0.968/0.948/0.958

[Epoch 6] label counts (none,neg,neu,pos) = [40437, 9045, 1501, 9675] | class_weight = [1.2120000123977661, 1.6770000457763672, 10.102999687194824, 1.5670000314712524] | neg_per_sent=2


Epoch 6/10:   0%|          | 0/948 [00:00<?, ?it/s]

üëâ Label distribution SEEN: Counter({0: 40437, 3: 9675, 1: 9045, 2: 1501})

‚úÖ Epoch 6/10 | TrainLoss=0.8052 | Acc4=0.9405 | Acc_pos3=0.9114 | MacroF1_pos3=0.9126 | None(P/R/F1)=0.971/0.955/0.963

[Epoch 7] label counts (none,neg,neu,pos) = [40437, 9045, 1501, 9675] | class_weight = [1.2120000123977661, 1.6770000457763672, 10.102999687194824, 1.5670000314712524] | neg_per_sent=2


Epoch 7/10:   0%|          | 0/948 [00:00<?, ?it/s]

üëâ Label distribution SEEN: Counter({0: 40437, 3: 9675, 1: 9045, 2: 1501})

‚úÖ Epoch 7/10 | TrainLoss=0.7925 | Acc4=0.9465 | Acc_pos3=0.9184 | MacroF1_pos3=0.9236 | None(P/R/F1)=0.972/0.961/0.966

[Epoch 8] label counts (none,neg,neu,pos) = [40437, 9045, 1501, 9675] | class_weight = [1.2120000123977661, 1.6770000457763672, 10.102999687194824, 1.5670000314712524] | neg_per_sent=2


Epoch 8/10:   0%|          | 0/948 [00:00<?, ?it/s]

üëâ Label distribution SEEN: Counter({0: 40437, 3: 9675, 1: 9045, 2: 1501})

‚úÖ Epoch 8/10 | TrainLoss=0.7849 | Acc4=0.9491 | Acc_pos3=0.9232 | MacroF1_pos3=0.9288 | None(P/R/F1)=0.974/0.962/0.968

[Epoch 9] label counts (none,neg,neu,pos) = [40437, 9045, 1501, 9675] | class_weight = [1.2120000123977661, 1.6770000457763672, 10.102999687194824, 1.5670000314712524] | neg_per_sent=2


Epoch 9/10:   0%|          | 0/948 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Export Model


In [9]:
#Export
import os, torch

SAVE_DIR = "/content/phobert_pair_absa"
os.makedirs(SAVE_DIR, exist_ok=True)

model_bin_path = os.path.join(SAVE_DIR, "model.bin")
torch.save(model.state_dict(), model_bin_path)
print(f"‚úÖ Saved model weights to: {model_bin_path}")

tok_dir = os.path.join(SAVE_DIR, "tokenizer")
os.makedirs(tok_dir, exist_ok=True)
tokenizer.save_pretrained(tok_dir)
print(f"‚úÖ Saved tokenizer to: {tok_dir}")


‚úÖ Saved model weights to: /content/phobert_pair_absa/model.bin
‚úÖ Saved tokenizer to: /content/phobert_pair_absa/tokenizer


In [10]:
from huggingface_hub import HfApi, login
import os

MODEL_REPO = "Ptul2x5/Student_Feedback_Sentiment"
HF_TOKEN = "my-secret-token"

login(token=HF_TOKEN)
api = HfApi()

print("üì§ Uploading tokenizer...")
api.upload_folder(
    folder_path=os.path.join(SAVE_DIR, "tokenizer"),
    repo_id=MODEL_REPO,
    repo_type="model",
    commit_message="Upload tokenizer for inference"
)
print("‚úÖ Tokenizer uploaded successfully")

print("üì§ Uploading model.bin...")
api.upload_file(
    path_or_fileobj=os.path.join(SAVE_DIR, "model.bin"),
    path_in_repo="model.bin",
    repo_id=MODEL_REPO,
    repo_type="model",
    commit_message="Upload model weights (final model from training)"
)
print("‚úÖ Model weights uploaded successfully")

üì§ Uploading tokenizer...


No files have been modified since last commit. Skipping to prevent empty commit.


‚úÖ Tokenizer uploaded successfully
üì§ Uploading model.bin...


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...obert_pair_absa/model.bin:   0%|          |  733kB /  542MB            

‚úÖ Model weights uploaded successfully
