In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from collections import Counter
import pandas as pd
import numpy as np
import re
from underthesea import word_tokenize
import time # Added for timing epochs

# --- 1. Setup & Configuration ---
FILE_PATH = 'combined_cleaned_file.csv'
TEXT_COLUMN = 'Review'
SENTIMENT_COLUMN = 'sentiment'
ASPECT_COLUMN = 'aspect' # New column for aspect

# Hyperparameters (mostly same, adjusted VOCAB_SIZE slightly based on previous run)
VOCAB_SIZE = 3000 # Adjusted based on previous output (2797) - can increase if needed
MAX_LEN = 100
EMBEDDING_DIM = 128
NUM_FILTERS = 64
FILTER_SIZES = [2, 3, 4]
LSTM_HIDDEN_DIM = 128
N_LAYERS = 1
BIDIRECTIONAL = True
DROPOUT = 0.5
BATCH_SIZE = 64
LR = 0.001
EPOCHS = 10 # Can adjust as needed
# DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Keep CPU for now, or switch back to CUDA if CPU run was successful
DEVICE = torch.device('cpu') # Forcing CPU as requested before
print(f"Using device: {DEVICE}")

# --- 2. Data Loading and Initial Cleaning ---
try:
    df = pd.read_csv(FILE_PATH)
    print("File loaded successfully.")
    print("Original Columns:", df.columns)

    # Check for required columns
    required_cols = [TEXT_COLUMN, SENTIMENT_COLUMN, ASPECT_COLUMN]
    if not all(col in df.columns for col in required_cols):
        missing = [col for col in required_cols if col not in df.columns]
        raise ValueError(f"Required columns {missing} not found in the file.")

    # Drop rows with NaN in essential columns AFTER checking they exist
    df = df.dropna(subset=required_cols)
    df = df[required_cols].copy() # Keep only necessary columns
    print(f"Data shape after selecting columns and dropping NaN: {df.shape}")

    # (Optional but recommended) Filter empty/whitespace reviews
    df = df[df[TEXT_COLUMN].str.strip().astype(bool)]
    print(f"Data shape after removing empty/whitespace reviews: {df.shape}")


except FileNotFoundError:
    print(f"Error: File not found at {FILE_PATH}")
    exit()
except Exception as e:
    print(f"Error loading or processing file: {e}")
    exit()

# --- 3. Text Preprocessing and Vocabulary Building ---
def preprocess_text(text):
    text = str(text).lower()
    # Add any other cleaning steps if needed (e.g., removing punctuation, numbers)
    # text = re.sub(r'[^\w\s]', '', text) # Example: remove punctuation
    return text

# Tách từ tiếng Việt
def tokenize_vi(text):
    try:
        # Ensure text is a string
        return word_tokenize(str(text), format="text").split()
    except Exception as e:
        # Handle potential errors during tokenization (e.g., empty strings after cleaning)
        # print(f"Warning: Tokenization failed for text: '{text}'. Error: {e}") # Optional warning
        return []

df[TEXT_COLUMN] = df[TEXT_COLUMN].apply(preprocess_text)
df['tokens'] = df[TEXT_COLUMN].apply(tokenize_vi)

# Build Vocabulary (based only on training data after split is better, but simpler for now)
word_counts = Counter(word for tokens in df['tokens'] for word in tokens)
vocab = [word for word, count in word_counts.most_common(VOCAB_SIZE - 2)] # Reserve 0 for PAD, 1 for UNK
word_to_ix = {word: i+2 for i, word in enumerate(vocab)}
word_to_ix['<PAD>'] = 0
word_to_ix['<UNK>'] = 1
actual_vocab_size = len(word_to_ix)
print(f"Actual vocabulary size: {actual_vocab_size}")

# --- 4. Data Encoding (Tokens to Indices, Labels to Indices) ---
def tokens_to_indices(tokens, word_to_ix, max_len):
    indices = [word_to_ix.get(token, word_to_ix['<UNK>']) for token in tokens]
    indices = indices[:max_len] # Truncate
    padded_indices = indices + [word_to_ix['<PAD>']] * (max_len - len(indices)) # Pad
    return padded_indices

df['indices'] = df['tokens'].apply(lambda x: tokens_to_indices(x, word_to_ix, MAX_LEN))

# Encode Sentiment Labels
unique_sentiments = df[SENTIMENT_COLUMN].unique()
sentiment_to_ix = {label: i for i, label in enumerate(unique_sentiments)}
ix_to_sentiment = {i: label for label, i in sentiment_to_ix.items()}
num_sentiment_classes = len(unique_sentiments)
print(f"Sentiment Labels: {sentiment_to_ix}")
print(f"Number of sentiment classes: {num_sentiment_classes}")
df['sentiment_encoded'] = df[SENTIMENT_COLUMN].map(sentiment_to_ix)

# Encode Aspect Labels
unique_aspects = df[ASPECT_COLUMN].unique()
aspect_to_ix = {label: i for i, label in enumerate(unique_aspects)}
ix_to_aspect = {i: label for label, i in aspect_to_ix.items()}
num_aspect_classes = len(unique_aspects)
print(f"Aspect Labels: {aspect_to_ix}")
print(f"Number of aspect classes: {num_aspect_classes}")
df['aspect_encoded'] = df[ASPECT_COLUMN].map(aspect_to_ix)


# --- 5. Data Splitting and DataLoader Creation ---
X = list(df['indices'])
y_sentiment = list(df['sentiment_encoded'])
y_aspect = list(df['aspect_encoded'])

# Split data - stratify by sentiment (or aspect, or combination if more complex needed)
X_train, X_val, y_sentiment_train, y_sentiment_val, y_aspect_train, y_aspect_val = train_test_split(
    X, y_sentiment, y_aspect, test_size=0.2, random_state=42, stratify=y_sentiment # Stratify by sentiment
)

print(f"Train size: {len(X_train)}, Validation size: {len(X_val)}")

# Create Dataset
class AspectSentimentDataset(Dataset):
    def __init__(self, indices, sentiment_labels, aspect_labels):
        self.indices = indices
        self.sentiment_labels = sentiment_labels
        self.aspect_labels = aspect_labels

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        return (
            torch.tensor(self.indices[idx], dtype=torch.long),
            torch.tensor(self.sentiment_labels[idx], dtype=torch.long),
            torch.tensor(self.aspect_labels[idx], dtype=torch.long)
        )

train_dataset = AspectSentimentDataset(X_train, y_sentiment_train, y_aspect_train)
val_dataset = AspectSentimentDataset(X_val, y_sentiment_val, y_aspect_val)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

# --- 6. Model Definition (CNN_LSTM for Multi-Task Learning) ---
class CNN_LSTM_MTL(nn.Module):
    def __init__(self, vocab_size, embedding_dim, num_filters, filter_sizes,
                 lstm_hidden_dim, num_sentiment_classes, num_aspect_classes, # Two output dims
                 n_layers, bidirectional, dropout, pad_idx):
        super().__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)

        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=embedding_dim,
                      out_channels=num_filters,
                      kernel_size=fs,
                      padding=(fs - 1) // 2) # Maintain length approx.
            for fs in filter_sizes
        ])

        lstm_input_size = num_filters # Input to LSTM is output of one Conv layer
        self.lstm = nn.LSTM(lstm_input_size,
                            lstm_hidden_dim,
                            num_layers=n_layers,
                            bidirectional=bidirectional,
                            batch_first=True,
                            dropout=dropout if n_layers > 1 else 0)

        # Calculate input dimension for the fully connected layers
        fc_input_dim = lstm_hidden_dim * 2 if bidirectional else lstm_hidden_dim

        # Output layer for Sentiment Prediction
        self.fc_sentiment = nn.Linear(fc_input_dim, num_sentiment_classes)

        # Output layer for Aspect Prediction
        self.fc_aspect = nn.Linear(fc_input_dim, num_aspect_classes)

        self.dropout = nn.Dropout(dropout)

    def forward(self, text):
        # text = [batch size, seq len]
        embedded = self.dropout(self.embedding(text))
        # embedded = [batch size, seq len, emb dim]

        embedded = embedded.permute(0, 2, 1)
        # embedded = [batch size, emb dim, seq len]

        # Apply first convolution layer (as an example, could also combine features)
        conv_out = self.convs[0](embedded)
        # conv_out = [batch size, num filters, seq len]
        conved = torch.relu(conv_out)

        # Prepare for LSTM
        conved = conved.permute(0, 2, 1)
        # conved = [batch size, seq len, num filters]

        # LSTM layer
        lstm_output, (hidden, cell) = self.lstm(conved)
        # lstm_output = [batch size, seq len, hidden dim * num directions]
        # hidden = [num layers * num directions, batch size, hidden dim]

        # Get final hidden state (concat directions if bidirectional)
        if self.lstm.bidirectional:
            # Concatenate the final forward and backward hidden states
            hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=-1))
        else:
            hidden = self.dropout(hidden[-1,:,:])
        # hidden = [batch size, hidden dim * num directions]

        # --- Multi-Task Outputs ---
        output_sentiment = self.fc_sentiment(hidden) # Output for sentiment
        output_aspect = self.fc_aspect(hidden)       # Output for aspect
        # output_sentiment = [batch size, num_sentiment_classes]
        # output_aspect = [batch size, num_aspect_classes]

        return output_sentiment, output_aspect # Return both outputs

# --- 7. Initialization ---
PAD_IDX = word_to_ix['<PAD>']
model = CNN_LSTM_MTL(
    vocab_size=actual_vocab_size,
    embedding_dim=EMBEDDING_DIM,
    num_filters=NUM_FILTERS,
    filter_sizes=FILTER_SIZES,
    lstm_hidden_dim=LSTM_HIDDEN_DIM,
    num_sentiment_classes=num_sentiment_classes, # Pass sentiment classes
    num_aspect_classes=num_aspect_classes,       # Pass aspect classes
    n_layers=N_LAYERS,
    bidirectional=BIDIRECTIONAL,
    dropout=DROPOUT,
    pad_idx=PAD_IDX
).to(DEVICE)

print(model)
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} trainable parameters')

# --- 8. Training Setup ---
optimizer = optim.Adam(model.parameters(), lr=LR)
# Use separate criteria if needed, but CrossEntropy works for both classification tasks
criterion_sentiment = nn.CrossEntropyLoss().to(DEVICE)
criterion_aspect = nn.CrossEntropyLoss().to(DEVICE)

def calculate_accuracy(preds, y):
    """Calculates accuracy for a single task"""
    top_pred = preds.argmax(1, keepdim=True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

# --- 9. Training and Evaluation Loops (Modified for MTL) ---
def train_epoch(model, iterator, optimizer, criterion_sentiment, criterion_aspect):
    epoch_loss = 0
    epoch_acc_sentiment = 0
    epoch_acc_aspect = 0
    model.train()

    for batch in iterator:
        text, sentiment_labels, aspect_labels = batch # Unpack all three
        text = text.to(DEVICE)
        sentiment_labels = sentiment_labels.to(DEVICE)
        aspect_labels = aspect_labels.to(DEVICE)

        optimizer.zero_grad()

        # Forward pass - returns two outputs
        predictions_sentiment, predictions_aspect = model(text)

        # Calculate losses for each task
        loss_sentiment = criterion_sentiment(predictions_sentiment, sentiment_labels)
        loss_aspect = criterion_aspect(predictions_aspect, aspect_labels)

        # Combine losses (simple sum, can use weighting alpha*loss_sent + (1-alpha)*loss_aspect if needed)
        total_loss = loss_sentiment + loss_aspect

        # Calculate accuracies
        acc_sentiment = calculate_accuracy(predictions_sentiment, sentiment_labels)
        acc_aspect = calculate_accuracy(predictions_aspect, aspect_labels)

        # Backpropagate the combined loss
        total_loss.backward()
        optimizer.step()

        epoch_loss += total_loss.item()
        epoch_acc_sentiment += acc_sentiment.item()
        epoch_acc_aspect += acc_aspect.item()

    return (epoch_loss / len(iterator),
            epoch_acc_sentiment / len(iterator),
            epoch_acc_aspect / len(iterator))

def evaluate_epoch(model, iterator, criterion_sentiment, criterion_aspect):
    epoch_loss = 0
    epoch_acc_sentiment = 0
    epoch_acc_aspect = 0
    model.eval()

    with torch.no_grad():
        for batch in iterator:
            text, sentiment_labels, aspect_labels = batch
            text = text.to(DEVICE)
            sentiment_labels = sentiment_labels.to(DEVICE)
            aspect_labels = aspect_labels.to(DEVICE)

            predictions_sentiment, predictions_aspect = model(text)

            loss_sentiment = criterion_sentiment(predictions_sentiment, sentiment_labels)
            loss_aspect = criterion_aspect(predictions_aspect, aspect_labels)
            total_loss = loss_sentiment + loss_aspect

            acc_sentiment = calculate_accuracy(predictions_sentiment, sentiment_labels)
            acc_aspect = calculate_accuracy(predictions_aspect, aspect_labels)

            epoch_loss += total_loss.item()
            epoch_acc_sentiment += acc_sentiment.item()
            epoch_acc_aspect += acc_aspect.item()

    return (epoch_loss / len(iterator),
            epoch_acc_sentiment / len(iterator),
            epoch_acc_aspect / len(iterator))

# --- 10. Main Training Loop ---
best_valid_loss = float('inf')

print("\nStarting Training...")
for epoch in range(EPOCHS):
    start_time = time.time()

    train_loss, train_acc_sent, train_acc_aspect = train_epoch(
        model, train_loader, optimizer, criterion_sentiment, criterion_aspect
    )
    valid_loss, valid_acc_sent, valid_acc_aspect = evaluate_epoch(
        model, val_loader, criterion_sentiment, criterion_aspect
    )

    end_time = time.time()
    epoch_mins, epoch_secs = divmod(end_time - start_time, 60)

    # Save the best model based on combined validation loss
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'cnn_lstm_mtl_model.pt')

    print(f'Epoch: {epoch+1:02} | Time: {int(epoch_mins)}m {int(epoch_secs)}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc Sent: {train_acc_sent*100:.2f}% | Train Acc Aspect: {train_acc_aspect*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc Sent: {valid_acc_sent*100:.2f}% |  Val. Acc Aspect: {valid_acc_aspect*100:.2f}%')

print("Training finished.")

# --- 11. Evaluation and Prediction (Modified for MTL) ---
# Load the best model
try:
    model.load_state_dict(torch.load('cnn_lstm_mtl_model.pt', map_location=DEVICE)) # Ensure map_location if loading between CPU/GPU
    print("\nLoaded best model weights.")
except FileNotFoundError:
    print("\nWarning: Model file 'cnn_lstm_mtl_model.pt' not found. Using model with weights from last epoch.")
except Exception as e:
     print(f"\nWarning: Could not load model weights. Error: {e}. Using model with weights from last epoch.")


# Final evaluation
final_val_loss, final_val_acc_sent, final_val_acc_aspect = evaluate_epoch(
    model, val_loader, criterion_sentiment, criterion_aspect
)
print(f'\nFinal Validation Loss: {final_val_loss:.3f}')
print(f'Final Validation Sentiment Accuracy: {final_val_acc_sent*100:.2f}%')
print(f'Final Validation Aspect Accuracy: {final_val_acc_aspect*100:.2f}%')

# Prediction function for both tasks
def predict_aspect_sentiment(sentence, model, word_to_ix, max_len, device, ix_to_sentiment, ix_to_aspect):
    model.eval()
    processed_text = preprocess_text(sentence)
    tokens = tokenize_vi(processed_text)
    indexed = tokens_to_indices(tokens, word_to_ix, max_len)
    tensor = torch.LongTensor(indexed).unsqueeze(0).to(device)

    with torch.no_grad():
        prediction_sentiment, prediction_aspect = model(tensor) # Get both predictions

    probs_sentiment = torch.softmax(prediction_sentiment, dim=1)
    probs_aspect = torch.softmax(prediction_aspect, dim=1)

    # Sentiment Prediction
    predicted_class_sentiment = prediction_sentiment.argmax(dim=1).item()
    predicted_label_sentiment = ix_to_sentiment[predicted_class_sentiment]
    confidence_sentiment = probs_sentiment.max().item()

    # Aspect Prediction
    predicted_class_aspect = prediction_aspect.argmax(dim=1).item()
    predicted_label_aspect = ix_to_aspect[predicted_class_aspect]
    confidence_aspect = probs_aspect.max().item()

    return predicted_label_sentiment, confidence_sentiment, predicted_label_aspect, confidence_aspect

# Example Predictions
print("\n--- Example Predictions ---")
test_sentences = [
    "Giảng viên dạy rất hay và nhiệt tình.",
    "Nội dung môn học quá cũ, không cập nhật.",
    "Tài liệu học tập đầy đủ.",
    "Phòng học quá nóng và thiếu ánh sáng.",
    "Thầy cô hỗ trợ sinh viên rất tốt ngoài giờ học."
]

for sentence in test_sentences:
    pred_sent, conf_sent, pred_aspect, conf_aspect = predict_aspect_sentiment(
        sentence, model, word_to_ix, MAX_LEN, DEVICE, ix_to_sentiment, ix_to_aspect
    )
    print(f'\nSentence:  "{sentence}"')
    print(f'Predicted: Aspect = {pred_aspect} (Conf: {conf_aspect:.4f}), Sentiment = {pred_sent} (Conf: {conf_sent:.4f})')

Using device: cpu
File loaded successfully.
Original Columns: Index(['id', 'Review', 'Sentence Component', 'aspect_text', 'aspect',
       'sentiment_text', 'sentiment'],
      dtype='object')
Data shape after selecting columns and dropping NaN: (7778, 3)
Data shape after removing empty/whitespace reviews: (7778, 3)
Actual vocabulary size: 2797
Sentiment Labels: {'Negative': 0, 'Positive': 1, 'Neutral': 2}
Number of sentiment classes: 3
Aspect Labels: {'Teaching quality': 0, 'Workload': 1, 'General review': 2, 'Course information': 3, 'Support from lecturers': 4, 'Test and evaluation': 5, 'Learning environment': 6, 'Organization and management': 7}
Number of aspect classes: 8
Train size: 6222, Validation size: 1556
CNN_LSTM_MTL(
  (embedding): Embedding(2797, 128, padding_idx=0)
  (convs): ModuleList(
    (0): Conv1d(128, 64, kernel_size=(2,), stride=(1,))
    (1): Conv1d(128, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (2): Conv1d(128, 64, kernel_size=(4,), stride=(1,), paddi

KeyboardInterrupt: 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
# Added imports for evaluation metrics
from sklearn.metrics import classification_report, confusion_matrix
from collections import Counter
import pandas as pd
import numpy as np
import re
from underthesea import word_tokenize
import time

# --- 1. Setup & Configuration ---
FILE_PATH = 'combined_cleaned_file.csv'
TEXT_COLUMN = 'Review'
SENTIMENT_COLUMN = 'sentiment'
ASPECT_COLUMN = 'aspect'

# Hyperparameters
VOCAB_SIZE = 3000
MAX_LEN = 100
EMBEDDING_DIM = 128
NUM_FILTERS = 64
FILTER_SIZES = [2, 3, 4]
LSTM_HIDDEN_DIM = 128
N_LAYERS = 1
BIDIRECTIONAL = True
DROPOUT = 0.5
BATCH_SIZE = 64
LR = 0.001
EPOCHS = 20 # Increased epochs slightly, as early stopping might trigger
PATIENCE = 3 # Early stopping patience: stop after 3 epochs with no improvement
DEVICE = torch.device('cpu')
print(f"Using device: {DEVICE}")

# --- 2. Data Loading and Initial Cleaning ---
try:
    df = pd.read_csv(FILE_PATH)
    print("File loaded successfully.")
    required_cols = [TEXT_COLUMN, SENTIMENT_COLUMN, ASPECT_COLUMN]
    if not all(col in df.columns for col in required_cols):
        missing = [col for col in required_cols if col not in df.columns]
        raise ValueError(f"Required columns {missing} not found in the file.")
    df = df.dropna(subset=required_cols)
    df = df[required_cols].copy()
    df = df[df[TEXT_COLUMN].str.strip().astype(bool)]
    print(f"Data shape after loading and initial cleaning: {df.shape}")
except FileNotFoundError:
    print(f"Error: File not found at {FILE_PATH}")
    exit()
except Exception as e:
    print(f"Error loading or processing file: {e}")
    exit()

# --- 3. Text Preprocessing and Vocabulary Building ---
def preprocess_text(text):
    return str(text).lower()

def tokenize_vi(text):
    try:
        return word_tokenize(str(text), format="text").split()
    except Exception:
        return []

df[TEXT_COLUMN] = df[TEXT_COLUMN].apply(preprocess_text)
df['tokens'] = df[TEXT_COLUMN].apply(tokenize_vi)

word_counts = Counter(word for tokens in df['tokens'] for word in tokens)
vocab = [word for word, count in word_counts.most_common(VOCAB_SIZE - 2)]
word_to_ix = {word: i+2 for i, word in enumerate(vocab)}
word_to_ix['<PAD>'] = 0
word_to_ix['<UNK>'] = 1
actual_vocab_size = len(word_to_ix)
print(f"Actual vocabulary size: {actual_vocab_size}")

# --- 4. Data Encoding ---
def tokens_to_indices(tokens, word_to_ix, max_len):
    indices = [word_to_ix.get(token, word_to_ix['<UNK>']) for token in tokens]
    indices = indices[:max_len]
    return indices + [word_to_ix['<PAD>']] * (max_len - len(indices))

df['indices'] = df['tokens'].apply(lambda x: tokens_to_indices(x, word_to_ix, MAX_LEN))

unique_sentiments = sorted(df[SENTIMENT_COLUMN].unique()) # Sort for consistent mapping
sentiment_to_ix = {label: i for i, label in enumerate(unique_sentiments)}
ix_to_sentiment = {i: label for label, i in sentiment_to_ix.items()}
num_sentiment_classes = len(unique_sentiments)
print(f"Sentiment Labels: {sentiment_to_ix}")

unique_aspects = sorted(df[ASPECT_COLUMN].unique()) # Sort for consistent mapping
aspect_to_ix = {label: i for i, label in enumerate(unique_aspects)}
ix_to_aspect = {i: label for label, i in aspect_to_ix.items()}
num_aspect_classes = len(unique_aspects)
print(f"Aspect Labels: {aspect_to_ix}")

df['sentiment_encoded'] = df[SENTIMENT_COLUMN].map(sentiment_to_ix)
df['aspect_encoded'] = df[ASPECT_COLUMN].map(aspect_to_ix)

# --- 5. Data Splitting (Train/Validation/Test) ---
X = list(df['indices'])
y_sentiment = list(df['sentiment_encoded'])
y_aspect = list(df['aspect_encoded'])

# Combine labels for stratification during the first split
y_combined_for_stratify = [f"{s}_{a}" for s, a in zip(y_sentiment, y_aspect)]

# First split: Train+Validation (80%) vs Test (20%)
X_temp, X_test, y_sentiment_temp, y_sentiment_test, y_aspect_temp, y_aspect_test = train_test_split(
    X, y_sentiment, y_aspect, test_size=0.20, random_state=42, stratify=y_combined_for_stratify
)

# Combine labels again for stratification during the second split
y_combined_temp_for_stratify = [f"{s}_{a}" for s, a in zip(y_sentiment_temp, y_aspect_temp)]

# Second split: Train (80% of temp -> 64% of total) vs Validation (20% of temp -> 16% of total)
# test_size = 0.20 means 20% of the temp set (which is 80% of total) goes to validation -> 0.20 * 0.80 = 0.16
X_train, X_val, y_sentiment_train, y_sentiment_val, y_aspect_train, y_aspect_val = train_test_split(
    X_temp, y_sentiment_temp, y_aspect_temp, test_size=0.20, random_state=42, stratify=y_combined_temp_for_stratify
)

print(f"Train size: {len(X_train)}")
print(f"Validation size: {len(X_val)}")
print(f"Test size: {len(X_test)}")

# --- 6. Dataset and DataLoader Creation ---
class AspectSentimentDataset(Dataset):
    def __init__(self, indices, sentiment_labels, aspect_labels):
        self.indices = indices
        self.sentiment_labels = sentiment_labels
        self.aspect_labels = aspect_labels
    def __len__(self):
        return len(self.indices)
    def __getitem__(self, idx):
        return (torch.tensor(self.indices[idx], dtype=torch.long),
                torch.tensor(self.sentiment_labels[idx], dtype=torch.long),
                torch.tensor(self.aspect_labels[idx], dtype=torch.long))

train_dataset = AspectSentimentDataset(X_train, y_sentiment_train, y_aspect_train)
val_dataset = AspectSentimentDataset(X_val, y_sentiment_val, y_aspect_val)
test_dataset = AspectSentimentDataset(X_test, y_sentiment_test, y_aspect_test) # Test Dataset

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False) # Test DataLoader

# --- 7. Model Definition (CNN_LSTM_MTL) ---
# (Model definition remains the same as before)
class CNN_LSTM_MTL(nn.Module):
    def __init__(self, vocab_size, embedding_dim, num_filters, filter_sizes,
                 lstm_hidden_dim, num_sentiment_classes, num_aspect_classes,
                 n_layers, bidirectional, dropout, pad_idx):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=embedding_dim, out_channels=num_filters,
                      kernel_size=fs, padding=(fs - 1) // 2)
            for fs in filter_sizes
        ])
        lstm_input_size = num_filters
        self.lstm = nn.LSTM(lstm_input_size, lstm_hidden_dim, num_layers=n_layers,
                            bidirectional=bidirectional, batch_first=True,
                            dropout=dropout if n_layers > 1 else 0)
        fc_input_dim = lstm_hidden_dim * 2 if bidirectional else lstm_hidden_dim
        self.fc_sentiment = nn.Linear(fc_input_dim, num_sentiment_classes)
        self.fc_aspect = nn.Linear(fc_input_dim, num_aspect_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text):
        embedded = self.dropout(self.embedding(text))
        embedded = embedded.permute(0, 2, 1)
        # Using only the first conv layer's output for simplicity
        conved = torch.relu(self.convs[0](embedded))
        conved = conved.permute(0, 2, 1)
        lstm_output, (hidden, cell) = self.lstm(conved)
        if self.lstm.bidirectional:
            hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=-1))
        else:
            hidden = self.dropout(hidden[-1,:,:])
        output_sentiment = self.fc_sentiment(hidden)
        output_aspect = self.fc_aspect(hidden)
        return output_sentiment, output_aspect

# --- 8. Initialization ---
PAD_IDX = word_to_ix['<PAD>']
model = CNN_LSTM_MTL(
    vocab_size=actual_vocab_size, embedding_dim=EMBEDDING_DIM, num_filters=NUM_FILTERS,
    filter_sizes=FILTER_SIZES, lstm_hidden_dim=LSTM_HIDDEN_DIM,
    num_sentiment_classes=num_sentiment_classes, num_aspect_classes=num_aspect_classes,
    n_layers=N_LAYERS, bidirectional=BIDIRECTIONAL, dropout=DROPOUT, pad_idx=PAD_IDX
).to(DEVICE)

print(model)
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} trainable parameters')

# --- 9. Training Setup ---
optimizer = optim.Adam(model.parameters(), lr=LR)
criterion_sentiment = nn.CrossEntropyLoss().to(DEVICE)
criterion_aspect = nn.CrossEntropyLoss().to(DEVICE)

def calculate_accuracy(preds, y):
    top_pred = preds.argmax(1, keepdim=True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    return correct.float() / y.shape[0]

# --- 10. Training and Evaluation Loops (Modified for Early Stopping) ---
# (train_epoch remains the same as before)
def train_epoch(model, iterator, optimizer, criterion_sentiment, criterion_aspect):
    epoch_loss = 0
    epoch_acc_sentiment = 0
    epoch_acc_aspect = 0
    model.train()
    for batch in iterator:
        text, sentiment_labels, aspect_labels = batch
        text, sentiment_labels, aspect_labels = text.to(DEVICE), sentiment_labels.to(DEVICE), aspect_labels.to(DEVICE)
        optimizer.zero_grad()
        predictions_sentiment, predictions_aspect = model(text)
        loss_sentiment = criterion_sentiment(predictions_sentiment, sentiment_labels)
        loss_aspect = criterion_aspect(predictions_aspect, aspect_labels)
        total_loss = loss_sentiment + loss_aspect
        acc_sentiment = calculate_accuracy(predictions_sentiment, sentiment_labels)
        acc_aspect = calculate_accuracy(predictions_aspect, aspect_labels)
        total_loss.backward()
        optimizer.step()
        epoch_loss += total_loss.item()
        epoch_acc_sentiment += acc_sentiment.item()
        epoch_acc_aspect += acc_aspect.item()
    return (epoch_loss / len(iterator), epoch_acc_sentiment / len(iterator), epoch_acc_aspect / len(iterator))

# (evaluate_epoch remains the same as before - used for validation)
def evaluate_epoch(model, iterator, criterion_sentiment, criterion_aspect):
    epoch_loss = 0
    epoch_acc_sentiment = 0
    epoch_acc_aspect = 0
    model.eval()
    with torch.no_grad():
        for batch in iterator:
            text, sentiment_labels, aspect_labels = batch
            text, sentiment_labels, aspect_labels = text.to(DEVICE), sentiment_labels.to(DEVICE), aspect_labels.to(DEVICE)
            predictions_sentiment, predictions_aspect = model(text)
            loss_sentiment = criterion_sentiment(predictions_sentiment, sentiment_labels)
            loss_aspect = criterion_aspect(predictions_aspect, aspect_labels)
            total_loss = loss_sentiment + loss_aspect
            acc_sentiment = calculate_accuracy(predictions_sentiment, sentiment_labels)
            acc_aspect = calculate_accuracy(predictions_aspect, aspect_labels)
            epoch_loss += total_loss.item()
            epoch_acc_sentiment += acc_sentiment.item()
            epoch_acc_aspect += acc_aspect.item()
    return (epoch_loss / len(iterator), epoch_acc_sentiment / len(iterator), epoch_acc_aspect / len(iterator))

# --- 11. Main Training Loop with Early Stopping ---
best_valid_loss = float('inf')
epochs_no_improve = 0 # Counter for early stopping
saved_model_path = 'cnn_lstm_mtl_best_model.pt' # Path to save best model

print("\nStarting Training...")
for epoch in range(EPOCHS):
    start_time = time.time()

    train_loss, train_acc_sent, train_acc_aspect = train_epoch(
        model, train_loader, optimizer, criterion_sentiment, criterion_aspect
    )
    valid_loss, valid_acc_sent, valid_acc_aspect = evaluate_epoch(
        model, val_loader, criterion_sentiment, criterion_aspect
    )

    end_time = time.time()
    epoch_mins, epoch_secs = divmod(end_time - start_time, 60)

    print(f'Epoch: {epoch+1:02} | Time: {int(epoch_mins)}m {int(epoch_secs)}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc Sent: {train_acc_sent*100:.2f}% | Train Acc Aspect: {train_acc_aspect*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc Sent: {valid_acc_sent*100:.2f}% |  Val. Acc Aspect: {valid_acc_aspect*100:.2f}%')

    # --- Early Stopping Logic ---
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), saved_model_path)
        epochs_no_improve = 0 # Reset counter
        print(f'\tValidation loss decreased ({best_valid_loss:.3f}). Saving model...')
    else:
        epochs_no_improve += 1
        print(f'\tValidation loss did not decrease. ({epochs_no_improve}/{PATIENCE})')
        if epochs_no_improve >= PATIENCE:
            print(f'\nEarly stopping triggered after {epoch + 1} epochs.')
            break # Exit training loop

if epochs_no_improve < PATIENCE:
    print("\nTraining finished after completing all epochs.")

# --- 12. Test Set Evaluation ---
print(f"\nLoading best model from '{saved_model_path}' for test evaluation...")
try:
    model.load_state_dict(torch.load(saved_model_path, map_location=DEVICE))
except FileNotFoundError:
    print(f"Error: Best model file '{saved_model_path}' not found. Evaluation cannot proceed.")
    exit()
except Exception as e:
     print(f"Error loading model weights: {e}. Evaluation cannot proceed.")
     exit()


def evaluate_on_test_set(model, iterator, criterion_sentiment, criterion_aspect):
    """Evaluates the model on the test set and collects predictions."""
    test_loss = 0
    test_acc_sentiment = 0
    test_acc_aspect = 0

    all_predictions_sentiment = []
    all_predictions_aspect = []
    all_true_sentiment = []
    all_true_aspect = []

    model.eval()
    with torch.no_grad():
        for batch in iterator:
            text, sentiment_labels, aspect_labels = batch
            text, sentiment_labels, aspect_labels = text.to(DEVICE), sentiment_labels.to(DEVICE), aspect_labels.to(DEVICE)

            predictions_sentiment, predictions_aspect = model(text)

            loss_sentiment = criterion_sentiment(predictions_sentiment, sentiment_labels)
            loss_aspect = criterion_aspect(predictions_aspect, aspect_labels)
            total_loss = loss_sentiment + loss_aspect

            acc_sentiment = calculate_accuracy(predictions_sentiment, sentiment_labels)
            acc_aspect = calculate_accuracy(predictions_aspect, aspect_labels)

            test_loss += total_loss.item()
            test_acc_sentiment += acc_sentiment.item()
            test_acc_aspect += acc_aspect.item()

            # Store predictions and true labels for detailed metrics
            all_predictions_sentiment.extend(predictions_sentiment.argmax(1).cpu().numpy())
            all_predictions_aspect.extend(predictions_aspect.argmax(1).cpu().numpy())
            all_true_sentiment.extend(sentiment_labels.cpu().numpy())
            all_true_aspect.extend(aspect_labels.cpu().numpy())

    avg_test_loss = test_loss / len(iterator)
    avg_test_acc_sentiment = test_acc_sentiment / len(iterator)
    avg_test_acc_aspect = test_acc_aspect / len(iterator)

    return (avg_test_loss, avg_test_acc_sentiment, avg_test_acc_aspect,
            all_true_sentiment, all_predictions_sentiment,
            all_true_aspect, all_predictions_aspect)

# Run evaluation on the test set
(test_loss, test_acc_sent, test_acc_aspect,
 true_sent, pred_sent,
 true_aspect, pred_aspect) = evaluate_on_test_set(model, test_loader, criterion_sentiment, criterion_aspect)

print(f'\n--- Test Set Evaluation Results ---')
print(f'Test Loss: {test_loss:.3f}')
print(f'Test Sentiment Accuracy: {test_acc_sent*100:.2f}%')
print(f'Test Aspect Accuracy: {test_acc_aspect*100:.2f}%')

# --- 13. Detailed Test Metrics (Classification Report & Confusion Matrix) ---

# Get label names from the mappings (ensure they are sorted correctly)
sentiment_label_names = [ix_to_sentiment[i] for i in range(num_sentiment_classes)]
aspect_label_names = [ix_to_aspect[i] for i in range(num_aspect_classes)]

print("\n--- Sentiment Classification Report (Test Set) ---")
print(classification_report(true_sent, pred_sent, target_names=sentiment_label_names, zero_division=0))

print("\n--- Sentiment Confusion Matrix (Test Set) ---")
cm_sentiment = confusion_matrix(true_sent, pred_sent)
print("Labels:", sentiment_label_names)
print(cm_sentiment)

print("\n--- Aspect Classification Report (Test Set) ---")
print(classification_report(true_aspect, pred_aspect, target_names=aspect_label_names, zero_division=0))

print("\n--- Aspect Confusion Matrix (Test Set) ---")
cm_aspect = confusion_matrix(true_aspect, pred_aspect)
# Try to make aspect CM labels clearer if many classes
if len(aspect_label_names) <= 10:
    print("Labels:", aspect_label_names)
    print(cm_aspect)
else:
     print("(Aspect labels omitted due to large number)")
     print(cm_aspect)


# --- 14. Example Predictions (Using the best loaded model) ---
# (Prediction function remains the same as before)
def predict_aspect_sentiment(sentence, model, word_to_ix, max_len, device, ix_to_sentiment, ix_to_aspect):
    model.eval()
    processed_text = preprocess_text(sentence)
    tokens = tokenize_vi(processed_text)
    indexed = tokens_to_indices(tokens, word_to_ix, max_len)
    tensor = torch.LongTensor(indexed).unsqueeze(0).to(device)
    with torch.no_grad():
        prediction_sentiment, prediction_aspect = model(tensor)
    probs_sentiment = torch.softmax(prediction_sentiment, dim=1)
    probs_aspect = torch.softmax(prediction_aspect, dim=1)
    predicted_class_sentiment = prediction_sentiment.argmax(dim=1).item()
    predicted_label_sentiment = ix_to_sentiment[predicted_class_sentiment]
    confidence_sentiment = probs_sentiment.max().item()
    predicted_class_aspect = prediction_aspect.argmax(dim=1).item()
    predicted_label_aspect = ix_to_aspect[predicted_class_aspect]
    confidence_aspect = probs_aspect.max().item()
    return predicted_label_sentiment, confidence_sentiment, predicted_label_aspect, confidence_aspect

print("\n--- Example Predictions (using best model) ---")
test_sentences = [
    "Giảng viên dạy rất hay và nhiệt tình.",
    "Nội dung môn học quá cũ, không cập nhật.",
    "Tài liệu học tập đầy đủ.",
    "Phòng học quá nóng và thiếu ánh sáng.",
    "Thầy cô hỗ trợ sinh viên rất tốt ngoài giờ học."
]
for sentence in test_sentences:
    pred_sent, conf_sent, pred_aspect, conf_aspect = predict_aspect_sentiment(
        sentence, model, word_to_ix, MAX_LEN, DEVICE, ix_to_sentiment, ix_to_aspect
    )
    print(f'\nSentence:  "{sentence}"')
    print(f'Predicted: Aspect = {pred_aspect} (Conf: {conf_aspect:.4f}), Sentiment = {pred_sent} (Conf: {conf_sent:.4f})')

Using device: cpu
File loaded successfully.
Data shape after loading and initial cleaning: (7778, 3)
Actual vocabulary size: 2797
Sentiment Labels: {'Negative': 0, 'Neutral': 1, 'Positive': 2}
Aspect Labels: {'Course information': 0, 'General review': 1, 'Learning environment': 2, 'Organization and management': 3, 'Support from lecturers': 4, 'Teaching quality': 5, 'Test and evaluation': 6, 'Workload': 7}
Train size: 4977
Validation size: 1245
Test size: 1556
CNN_LSTM_MTL(
  (embedding): Embedding(2797, 128, padding_idx=0)
  (convs): ModuleList(
    (0): Conv1d(128, 64, kernel_size=(2,), stride=(1,))
    (1): Conv1d(128, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (2): Conv1d(128, 64, kernel_size=(4,), stride=(1,), padding=(1,))
  )
  (lstm): LSTM(64, 128, batch_first=True, bidirectional=True)
  (fc_sentiment): Linear(in_features=256, out_features=3, bias=True)
  (fc_aspect): Linear(in_features=256, out_features=8, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)
The mo