In [1]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import Optimizer
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel, get_linear_schedule_with_warmup
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

df = pd.read_csv("dataset.csv")
MODEL_NAME = "bert-base-uncased"
MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 30
LR = 2e-5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
try:
    import os
    os.environ["TF_ENABLE_ONEDNN_OPTS"]= "0"
except ImportError:
    print("Please check the environment.")
def check_input(df):
    print(df.isna().sum())
    print(df[df['feedback_text'].isna()])
    print(df.columns.tolist())
    print(df.head(10))

check_input(df=df)

  from .autonotebook import tqdm as notebook_tqdm
2025-11-10 22:12:02.067271: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-10 22:12:02.384318: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-10 22:12:08.203224: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


feedback_text    2
sentiment        0
topic            0
urgency          0
action           0
source           0
dtype: int64
    feedback_text sentiment    topic urgency action source
49            NaN   neutral  overall     low   none   real
139           NaN   neutral  overall     low   none   real
['feedback_text', 'sentiment', 'topic', 'urgency', 'action', 'source']
                                       feedback_text sentiment  \
0  We need to streamline the AI demos for all AI ...  negative   
1                                 Room size is small  negative   
2                 Noise outside the room in the hall  negative   
3              The lunch was so cold and food is dry  negative   
4                         No variety of food options  negative   
5                      Coffee machine is not working  negative   
6                         The content is so outdated  negative   
7                                          Excellent  positive   
8              The instructor i

In [2]:
# create data class
class FeedbackDataset(Dataset):
    def __init__(self, csv_file, tokenizer, rule_based_func=None):
        self.data = pd.read_csv(csv_file)
        self.tokenizer = tokenizer
        self.rule_based_func = rule_based_func or (lambda x: torch.zeros(20))  # ÈªòËÆ§ËßÑÂàôÁâπÂæÅÂêëÈáèÈïøÂ∫¶20

        # ÁºñÁ†ÅÊ†áÁ≠æ
        self.sentiment_encoder = LabelEncoder()
        self.sentiment = torch.tensor(self.sentiment_encoder.fit_transform(self.data['sentiment']))
        
        self.urgency_encoder = LabelEncoder()
        self.urgency = torch.tensor(self.urgency_encoder.fit_transform(self.data['urgency']))

        self.topic_encoder = LabelEncoder()
        self.topics = torch.tensor(
            self.topic_encoder.fit_transform(self.data['topic']))

        self.action_encoder = LabelEncoder()
        self.action = torch.tensor(self.action_encoder.fit_transform(self.data['action']))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data.loc[idx, 'feedback_text']
        rule_vec = self.rule_based_func(text)  # rule-based binary vector

        inputs = self.tokenizer(
            text, truncation=True, padding='max_length', max_length=MAX_LEN, return_tensors="pt"
        )
        item = {key: val.squeeze(0) for key, val in inputs.items()}
        item['rule_vec'] = rule_vec
        item['sentiment'] = self.sentiment[idx]
        item['urgency'] = self.urgency[idx]
        item['topic'] = self.topics[idx]
        item['action'] = self.action[idx]
        return item


In [3]:
# rule for bert class
def rule_based_features(text):
    sentiment_words = ["excellent", "terrible", "good", "bad", "love", "hate"]
    urgency_words = ["immediately", "urgent", "asap", "delay"]
    topic_words = ["trainer", "venue", "content", "equipment"]
    all_words = sentiment_words + urgency_words + topic_words

    vector = torch.zeros(len(all_words))
    # print(text, type(text))
    words = text.lower().split()
    for i, w in enumerate(all_words):
        if w in words:
            vector[i] = 1.0
    return vector

In [4]:
class MultiTaskBERT(nn.Module):
    def __init__(self, model_name, rule_dim=20):
        super().__init__()
        self.bert = BertModel.from_pretrained(model_name)
        self.rule_dim = rule_dim
        hidden_size = self.bert.config.hidden_size

        # ÂÜªÁªìÈô§ÊúÄÂêé‰∏§Â±ÇÂ§ñÁöÑTransformerÂ±Ç
        for name, param in self.bert.named_parameters():
            if "encoder.layer.10" not in name and "encoder.layer.11" not in name:
                param.requires_grad = False

        # ÂàÜÁ±ªÂ§¥
        self.dropout = nn.Dropout(0.2)
        self.sentiment_classifier = nn.Linear(hidden_size + rule_dim, 3)
        self.urgency_classifier = nn.Linear(hidden_size + rule_dim, 3)
        self.topic_classifier = nn.Linear(hidden_size + rule_dim, 7)  # Â§öÊ†áÁ≠æ
        self.action_classifier = nn.Linear(hidden_size + rule_dim, 6)

    def forward(self, input_ids, attention_mask, rule_vec):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        x = torch.cat([cls_output, rule_vec], dim=1)
        x = self.dropout(x)
        return {
            "sentiment": self.sentiment_classifier(x),
            "urgency": self.urgency_classifier(x),
            "topic": self.topic_classifier(x),
            "action": self.action_classifier(x)
        }


In [5]:
# do training curve
from sklearn.metrics import f1_score, accuracy_score
def evaluate(model, dataloader):
    model.eval()
    preds, labels = [], []
    urg_preds, urg_labels = [], []
    sent_preds, sent_labels = [], []
    f1_scores = {}

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            rule_vec = batch['rule_vec'].to(DEVICE)
            sentiment = batch['sentiment'].to(DEVICE)
            urgency = batch['urgency'].to(DEVICE)

            outputs = model(input_ids, attention_mask, rule_vec)
            s_pred = torch.argmax(outputs['sentiment'], dim=1)
            u_pred = torch.argmax(outputs['urgency'], dim=1)

            sent_preds.extend(s_pred.cpu().numpy())
            sent_labels.extend(sentiment.cpu().numpy())
            urg_preds.extend(u_pred.cpu().numpy())
            urg_labels.extend(urgency.cpu().numpy())

    f1_scores['sentiment'] = f1_score(sent_labels, sent_preds, average='macro')
    f1_scores['urgency'] = f1_score(urg_labels, urg_preds, average='macro')
    acc_sent = accuracy_score(sent_labels, sent_preds)
    acc_urg = accuracy_score(urg_labels, urg_preds)

    print(f"üìä Validation | Sentiment: F1={f1_scores['sentiment']:.3f}, Acc={acc_sent:.3f} | "
          f"Urgency: F1={f1_scores['urgency']:.3f}, Acc={acc_urg:.3f}")

    return {
        "f1_sentiment": f1_scores['sentiment'],
        "f1_urgency": f1_scores['urgency'],
        "acc_sentiment": acc_sent,
        "acc_urgency": acc_urg
    }


In [6]:
# running code
def train_model(model, dataloader, optimizer, scheduler, loss_fn, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            rule_vec = batch['rule_vec'].to(DEVICE)
            sentiment = batch['sentiment'].to(DEVICE)
            urgency = batch['urgency'].to(DEVICE)
            topic = batch['topic'].to(DEVICE)
            action = batch['action'].to(DEVICE)

            outputs = model(input_ids, attention_mask, rule_vec)
            loss = (
                0.2 * loss_fn['ce'](outputs['sentiment'], sentiment)
                + 0.3 * loss_fn['ce'](outputs['urgency'], urgency)
                + 0.4 * loss_fn['ce'](outputs['topic'], topic)
                + loss_fn['ce'](outputs['action'], action)
            )

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()
            total_loss += loss.item()

        print(f"Epoch {epoch+1} average loss: {total_loss / len(dataloader):.4f}")

In [7]:
# train - val loop
def train_model(model, train_loader, val_loader, optimizer, scheduler, loss_fn, num_epochs):
    train_losses, val_f1s = [], []

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            rule_vec = batch['rule_vec'].to(DEVICE)
            sentiment = batch['sentiment'].to(DEVICE)
            urgency = batch['urgency'].to(DEVICE)
            topic = batch['topic'].to(DEVICE)
            action = batch['action'].to(DEVICE)

            outputs = model(input_ids, attention_mask, rule_vec)
            loss = (
                0.25 * loss_fn['ce'](outputs['sentiment'], sentiment)
                + 0.25 * loss_fn['ce'](outputs['urgency'], urgency)
                + 0.35 * loss_fn['ce'](outputs['topic'], topic)
                + 0.15 * loss_fn['ce'](outputs['action'], action)
            )

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        train_losses.append(avg_loss)

        print(f"‚úÖ Epoch {epoch+1} | Train Loss: {avg_loss:.4f}")

        # È™åËØÅÈò∂ÊÆµ
        val_metrics = evaluate(model, val_loader)
        val_f1s.append((val_metrics['f1_sentiment'] + val_metrics['f1_urgency']) / 2)

    return train_losses, val_f1s



In [8]:
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

df = pd.read_csv("dataset.csv")
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, shuffle=True)
train_df.to_csv("train.csv", index=False)
val_df.to_csv("val.csv", index=False)

tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
train_dataset = FeedbackDataset("train.csv", tokenizer, rule_based_features)
val_dataset = FeedbackDataset("val.csv", tokenizer, rule_based_features)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
model = MultiTaskBERT(MODEL_NAME, rule_dim=14).to(DEVICE)

optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=LR)
total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

loss_fn = {"ce": nn.CrossEntropyLoss(), "bce": nn.BCEWithLogitsLoss()}
train_losses, val_f1s = train_model(model, train_loader, val_loader, optimizer, scheduler, loss_fn, EPOCHS)

# ÁªòÂà∂ËÆ≠ÁªÉÊçüÂ§±‰∏éÈ™åËØÅF1
plt.figure(figsize=(8,4))
plt.plot(train_losses, label='Training Loss', marker='o')
plt.plot(val_f1s, label='Validation F1 (avg)', marker='x')
plt.title("Training & Validation Progress")
plt.xlabel("Epoch")
plt.legend()
plt.grid(True)
plt.show()


Epoch 1/30:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 21/39 [00:27<00:23,  1.29s/it]


AttributeError: 'float' object has no attribute 'lower'