In [3]:
train_path = '/home/manh/code/nlp/src/data/VSMEC/train_VSMEC.csv'
test_path = '/home/manh/code/nlp/src/data/VSMEC/test_VSMEC.csv'
val_path = '/home/manh/code/nlp/src/data/VSMEC/val_VSMEC.csv'

In [4]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("mo_hinh_toan").getOrCreate()

Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/11/29 16:39:41 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [None]:
train_df = spark.read.csv(
    train_path,
    header=True,
    inferSchema=True,       # Tự đoán kiểu cột
    multiLine=True,         # Nếu có nhiều dòng trong 1 ô
    escape='"',             # Xử lý dấu ngoặc kép
    quote='"'               # Xử lý dấu ngoặc kép
)

test_df = spark.read.csv(test_path, header=True)
val_df = spark.read.csv(val_path, header=True)

In [None]:
test_df.groupby("Emotion").count().show()

In [None]:
import matplotlib.pyplot as plt

emotions = ['Anger', 'Surprise', 'Enjoyment', 'Other', 'Fear', 'Sadness', 'Disgust']
counts = [391, 242, 1558, 1021, 318, 947, 1071]
plt.figure(figsize=(8,5), facecolor='white')  # nền trắng
plt.bar(emotions, counts, color=['#FF6B6B','#FFA500','#FFD93D','#C0C0C0','#A569BD','#3498DB','#2ECC71'])
plt.title('Phân phối cảm xúc trong VSMEC', fontsize=14)
plt.ylabel('Số lượng', fontsize=12)
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)  # thêm lưới giúp đọc số liệu
plt.show()


In [None]:
train_df.head(5)

In [7]:
from pyspark.sql.functions import udf
from pyspark.sql.types import ArrayType, StringType, IntegerType
from underthesea import word_tokenize

def tokenize(text):
    return word_tokenize(text , format = "text").split()

tokenize_udf = udf(tokenize, ArrayType(StringType()))
train_df = train_df.withColumn("tokens" , tokenize_udf(train_df.Sentence))
test_df =test_df.withColumn("tokens" , tokenize_udf(test_df.Sentence))
val_df = val_df.withColumn("tokens" , tokenize_udf(val_df.Sentence))

In [None]:
train_df.head(5)

In [8]:
from pyspark.ml import pipeline
from pyspark.ml.feature import StringIndexer

indexer = StringIndexer(inputCol="Emotion", outputCol="label")
indexer = indexer.fit(train_df)


In [9]:
train_df = indexer.transform(train_df)
test_df = indexer.transform(test_df)
val_df = indexer.transform(val_df)

# TF-IDF + naive bayes

In [None]:
# remove stop word for tf-idf

In [None]:
from pyspark.ml.feature import  CountVectorizer , IDF

cv = CountVectorizer(inputCol="tokens", outputCol="spare_vec" , vocabSize=5000, minDF=5 )
idf = IDF(inputCol="spare_vec" , outputCol="idf" , minDocFreq=5)

In [None]:
from pyspark.ml import Pipeline
pipeline = Pipeline(stages= [cv, idf])
pipelineModel = pipeline.fit(train_df)

In [None]:
train_idf = pipelineModel.transform(train_df)
test_idf = pipelineModel.transform(test_df)
val_idf = pipelineModel.transform(val_df)

train naive bayes

In [None]:
from pyspark.ml.classification import NaiveBayes
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

nb = NaiveBayes(featuresCol="idf" , labelCol="label" , modelType="multinomial")
nb_model = nb.fit(train_idf)

evaluate model

val dataset evaluation

In [None]:
from sklearn.metrics import f1_score
predictions = nb_model.transform(val_idf)
evaluator = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction", metricName="accuracy")
accuracy = evaluator.evaluate(predictions)
print(f"Accuracy: {accuracy:.4f}")
# f1 macro
pdf = predictions.select("label", "prediction").toPandas()
macro_f1 = f1_score(pdf['prediction'], pdf['label'], average="macro")
print("macro_f1: "  ,   macro_f1 )

test evaluation

In [None]:
import numpy as np
tes_pred = nb_model.transform(test_idf)
y_pred = np.array(tes_pred.select('prediction').rdd.flatMap(lambda x: x).collect()).astype(int)
y_true = np.array(test_idf.select('label').rdd.flatMap(lambda x: x).collect()).astype(int)


In [None]:
# Tính Accuracy
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import numpy as np

accuracy = accuracy_score(y_true, y_pred)

# Tính F1-Score (Dùng 'macro' để đánh giá công bằng các lớp)
f1_macro = f1_score(y_true, y_pred, average='macro')

print(f"Accuracy cuối cùng: {accuracy:.4f}")
print(f"F1-Macro Score: {f1_macro:.4f}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

class_names = [
    'Anger',     #
    'Surprise',  #
    'Enjoyment', #
    'Other',     #
    'Fear',      #
    'Sadness',   #
    'Disgust'    #
]

# 1. Tính toán Confusion Matrix
cm = confusion_matrix(y_true, y_pred)

# 2. Thiết lập kích thước đồ thị
plt.figure(figsize=(10, 8)) # Tăng kích thước để dễ đọc tên lớp

# 3. Vẽ biểu đồ nhiệt (Heatmap)
sns.heatmap(
    cm,
    annot=True,
    fmt='d',
    cmap='Blues',
    xticklabels=class_names, # Sử dụng tên lớp mới
    yticklabels=class_names  # Sử dụng tên lớp mới
)

# 4. Đặt tiêu đề và nhãn
plt.title('Confusion Matrix: Phân loại Cảm xúc (Emotion Classification)')
plt.ylabel('Nhãn Thực tế (True Emotion)')
plt.xlabel('Nhãn Dự đoán (Predicted Emotion)')
plt.show()

# word2vec + dense

In [None]:
train_df.show(2)

train model embedding word2vec

In [None]:
from gensim.models import Word2Vec
sentences= train_df.select("tokens").rdd.map(lambda row : row.tokens).collect()
vocab = [[str(i) for i in sent ] for sent in sentences]
w2v = Word2Vec(
    sentences = sentences,
    vector_size= 100 ,
    window=5,
    min_count=1
)

calculate average vector of sentences

In [None]:
from pyspark.sql.types import FloatType
import numpy as np
def embedding_ave(sentence):
    vecs = []
    for word in sentence:
        if word in w2v.wv.key_to_index:
            vecs.append(w2v.wv[word])
        else :
            vecs.append(np.zeros(w2v.vector_size))
    avg =  np.mean(vecs , axis=0)
    return [float(i) for i in avg]
udf_emb = udf(embedding_ave , ArrayType(FloatType()) )
train_df = train_df.withColumn("vector_avg" , udf_emb(train_df.tokens ))
test_df = test_df.withColumn("vector_avg" , udf_emb(test_df.tokens ))
val_df = val_df.withColumn('vector_avg' , udf_emb(val_df.tokens ))

In [None]:
train_df.show(2)

In [None]:
from torch.utils.data import Dataset , DataLoader
class DenseVectorDataset(Dataset):
    def __init__(self , label , vector) :
        self.labels = torch.from_numpy(np.array(label)).long()
        self.vectors = torch.from_numpy(np.array(vector)).float()
    def __len__(self):
        return len(self.labels)
    def __getitem__(self , idx):
        return  self.vectors[idx] , self.labels[idx]

train loader

In [None]:
# get labels and vector from train spark dataframe
labels_train = train_df.select("label").rdd.map(lambda row : row.label).collect()
vectors_train = train_df.select("vector_avg").rdd.map(lambda row : row.vector_avg).collect()
train_loader = DataLoader(DenseVectorDataset(labels_train , vectors_train) , batch_size=128 , shuffle=True , num_workers = 4 )

val loader

In [None]:
labels = val_df.select("label").rdd.map(lambda row : row.label).collect()
vectors = val_df.select("vector_avg").rdd.map(lambda row : row.vector_avg).collect()
val_loader = DataLoader(DenseVectorDataset(labels , vectors) , batch_size=128 , shuffle=True , num_workers= 4)

test loader

In [None]:
labels = test_df.select("label").rdd.map(lambda row : row.label).collect()
vectors = test_df.select("vector_avg").rdd.map(lambda row : row.vector_avg).collect()
test_loader = DataLoader(DenseVectorDataset(labels , vectors) , batch_size=128 , shuffle=True)

build model

In [None]:
import torch.nn as nn
import torch

class Dense(nn.Module):
    def __init__(self , embedding_dim , hidden_dim , output_dim , drop_out = 0.5):
        super(Dense, self).__init__()
        self.fc1 = nn.Linear(embedding_dim , hidden_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(drop_out)
        self.fc2 = nn.Linear(hidden_dim , output_dim)
    def forward(self , x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

Training model

In [None]:
from torch.optim import Adam
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(log_dir='runs/intent_classification')

def fit(model , train_loader , val_loader , optimizer , criterion , epochs = 100 , device  = "cuda"):
    model.to(device)
    for i in range(epochs):
        model.train()
        train_loss  = 0.0
        train_correct = 0
        total_train = 0
        for x , y in train_loader:
            x , y = x.to(device), y.to(device)
            # reset gradient
            optimizer.zero_grad()
            #forward pass
            output = model(x)
            loss = criterion(output , y)

            #backward pass
            loss.backward()
            optimizer.step()

            #loss compute
            train_loss += loss.item()

            _, predicted = torch.max(output, 1)
            total_train += y.size(0)
            train_correct += (predicted == y).sum().item()
        #
        avg_train_loss = train_loss / len(train_loader)
        avg_train_acc = train_correct / total_train

        # valid
        model.eval()
        val_loss = 0.0
        val_correct = 0
        total_val = 0
        with torch.no_grad():
            for x , y in val_loader:
                x , y = x.to(device), y.to(device)
                output = model(x)
                loss = criterion(output , y)
                val_loss += loss.item()
                _, predicted = torch.max(output, 1)
                val_correct += (y==predicted).sum().item()
                total_val += y.size(0)
        avg_val_acc = val_correct / total_val
        avg_val_loss = val_loss / len(val_loader)
        # logging
        writer.add_scalar("Loss/Train" , avg_train_loss , i)
        writer.add_scalar("Accuracy/Train" , avg_train_acc , i)
        writer.add_scalar("Loss/Val" , avg_val_loss , i)
        writer.add_scalar("Accuracy/Val" , avg_val_acc , i)
        print(f"epoch {i} : loss/train: {avg_train_loss:.4f} , acc/train: {avg_train_acc:.4f} "f"loss/val: {avg_val_loss:.4f} acc/val:{avg_val_acc:.4f}")


In [None]:
from sklearn.utils.class_weight import  compute_class_weight
import numpy as np
classes = np.unique(labels_train)
weights = compute_class_weight(
    class_weight='balanced' ,
    classes=classes,
    y=labels_train
)
print(weights)

In [None]:
class_weights = torch.tensor(weights , dtype = torch.float)
class_weights = class_weights.to("cuda")

In [None]:
# from torch.utils.
model = Dense(embedding_dim=100 , hidden_dim=64 , output_dim=7 , drop_out=0.5)
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = Adam(model.parameters() , lr = 1e-3)
fit(model , train_loader , val_loader , optimizer , criterion , epochs =100)

test

In [None]:
import torch
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import numpy as np

def evaluate_metrics(model, loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)

            # Dự đoán (Output là Logits)
            out = model(x)

            # Lấy chỉ số lớp có xác suất cao nhất
            _, predicted = torch.max(out, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(y.cpu().numpy())

    return np.array(all_labels), np.array(all_preds)

# Giả sử bạn đã chạy đoạn này trong hàm fit, nếu chưa, chạy lại:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

# 1. Thu thập nhãn và dự đoán
y_true, y_pred = evaluate_metrics(model, test_loader, device = "cuda")

In [None]:
# Tính Accuracy
accuracy = accuracy_score(y_true, y_pred)

# Tính F1-Score (Dùng 'macro' để đánh giá công bằng các lớp)
f1_macro = f1_score(y_true, y_pred, average='macro')

print(f"Accuracy cuối cùng: {accuracy:.4f}")
print(f"F1-Macro Score: {f1_macro:.4f}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

class_names = [
    'Anger',     #
    'Surprise',  #
    'Enjoyment', #
    'Other',     #
    'Fear',      #
    'Sadness',   #
    'Disgust'    #
]

# 1. Tính toán Confusion Matrix
cm = confusion_matrix(y_true, y_pred)

# 2. Thiết lập kích thước đồ thị
plt.figure(figsize=(10, 8)) # Tăng kích thước để dễ đọc tên lớp

# 3. Vẽ biểu đồ nhiệt (Heatmap)
sns.heatmap(
    cm,
    annot=True,
    fmt='d',
    cmap='Blues',
    xticklabels=class_names, # Sử dụng tên lớp mới
    yticklabels=class_names  # Sử dụng tên lớp mới
)

# 4. Đặt tiêu đề và nhãn
plt.title('Confusion Matrix: Phân loại Cảm xúc (Emotion Classification)')
plt.ylabel('Nhãn Thực tế (True Emotion)')
plt.xlabel('Nhãn Dự đoán (Predicted Emotion)')
plt.show()

# embedding + lstm

In [None]:
train_df.show(2)

In [10]:
# hashing
from pyspark.sql.types import IntegerType , ArrayType
from pyspark.sql.functions import  udf , lit
import torch
def hashing(tokens , vocab_size  , max_seq_length):
    arr = []
    for token in tokens :
        raw_idx = hash(token)
        mod_idx = raw_idx % vocab_size
        if(mod_idx < 0):
            mod_idx += vocab_size
        arr.append(mod_idx+1)
    if(len(arr) < max_seq_length):
        arr += [0] * (max_seq_length - len(arr))
    else :
        arr = arr[:max_seq_length]
    return arr
hashing_udf = udf(hashing , ArrayType(IntegerType()))

In [11]:
train_df_lstm = train_df.withColumn("idx" , hashing_udf(train_df.tokens , lit(3000) , lit(30)))
val_df_lstm = val_df.withColumn('idx' , hashing_udf(val_df.tokens , lit(3000) , lit(30)))
test_df_lstm = test_df.withColumn('idx' , hashing_udf(test_df.tokens , lit(3000) , lit(30)))

In [None]:
train_df_lstm.head(2)

In [None]:
csv  =train_df_lstm.toPandas()
csv.to_csv("train.csv")

In [None]:
print(hash("ten")%500)

In [None]:
val_df_lstm.head(2)

data prepare

In [12]:
from torch.utils.data import DataLoader ,Dataset
class LSTMData(Dataset):
    def __init__(self , indexes   , label):
        self.indexes  = torch.tensor(indexes , dtype = torch.long)
        self.label = torch.tensor(label , dtype = torch.long)
    def __len__(self):
        return len(self.indexes)
    def __getitem__(self, idx):
        return self.indexes[idx] , self.label[idx] , sum(self.indexes[idx] != 0)


In [None]:
train_idx = train_df_lstm.select('idx').rdd.map(lambda x : x.idx).collect()
train_label = train_df_lstm.select("label").rdd.map(lambda x : x.label).collect()
dataset = LSTMData(train_idx , train_label)
train_loader_lstm = DataLoader(dataset,batch_size=128  , num_workers = 4 , shuffle=True)

[Stage 10:>                                                         (0 + 1) / 1]

In [None]:
for i in val_loader_lstm:
    print(i[0][:4])
    break

In [None]:
val_idx = val_df_lstm.select("idx").rdd.map(lambda x : x.idx).collect()
val_label = train_df_lstm.select("label").rdd.map(lambda x : x.label).collect()
dataset = LSTMData(val_idx , val_label)
val_loader_lstm = DataLoader(dataset,batch_size=64 , num_workers = 4 , shuffle=True )

In [None]:
test_idx = test_df_lstm.select("idx").rdd.map(lambda x : x.idx).collect()
test_label = test_df_lstm.select("label").rdd.map(lambda x : x.label).collect()
dataset = LSTMData(test_idx , test_label)
test_loader_lstm = DataLoader(dataset,batch_size=128 ,  num_workers = 4 , shuffle=True)

build LSTM model

In [None]:
from torch.nn.utils.rnn import pad_sequence , pack_padded_sequence
import torch.nn as nn
class LSTM_Intent(nn.Module):
    def __init__(self ,num_embedding , embedding_dim , hidden_dim , output_dim , n_layers , dropout_rate=0.5):
        super(LSTM_Intent, self).__init__()
        self.embedding = nn.Embedding(
            num_embeddings = num_embedding ,
            embedding_dim = embedding_dim ,
            padding_idx = 0
        )
        self.lstm = nn.LSTM(embedding_dim , hidden_dim , num_layers= n_layers , dropout=dropout_rate if n_layers > 1 else 0)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(in_features= hidden_dim , out_features= output_dim)
        self.batch_norm =nn.BatchNorm1d(hidden_dim)
    def forward(self , idx , length ):
        out = self.embedding(idx)
        # pack
        packed = pack_padded_sequence(out , length.cpu() , batch_first=True ,enforce_sorted=False)
        out , (h_n , c_n) = self.lstm(packed )
        h = h_n[-1]
        h = self.batch_norm(h)
        h = self.dropout(h)
        out = self.fc(h)
        return out

train

In [None]:
from torch.optim import Adam
from torch.utils.tensorboard import SummaryWriter
import torch

def fit(model, train_loader, val_loader, optimizer, loss_fn, device="cuda", epochs=200):
    writer = SummaryWriter(log_dir='runs/LSTM')

    # 1. Chuyển model sang device MỘT LẦN duy nhất ở ngoài vòng lặp
    model.to(device)

    for epoch in range(epochs):
        # --- TRAIN LOOP ---
        model.train() # Bật chế độ train (quan trọng cho Dropout, BatchNorm)
        train_loss = 0.0
        acc = 0
        total = 0

        for x, y, length in train_loader:
            # 2. Tối ưu: Không chuyển length sang GPU nếu model đã tự xử lý .cpu()
            # Hoặc chỉ chuyển x và y
            x, y = x.to(device), y.to(device)
            # length = length.to(device) # Bỏ dòng này nếu model đã handle CPU

            optimizer.zero_grad()
            out = model(x, length) # Truyền length (vẫn đang ở CPU hoặc GPU tùy bạn xử lý)
            loss = loss_fn(out, y)

            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(out, 1)
            acc += (y == predicted).sum().item()
            total += y.size(0)

        acc_avg = acc / total
        loss_avg = train_loss / len(train_loader)

        # --- VAL LOOP ---
        model.eval() # Bật chế độ eval
        val_loss = 0.0
        val_acc = 0
        val_total = 0

        # 3. QUAN TRỌNG: Dùng no_grad để tiết kiệm bộ nhớ và tăng tốc
        with torch.no_grad():
            for x, y, length in val_loader:
                x, y = x.to(device), y.to(device)

                out = model(x, length)
                ValLoss = loss_fn(out, y)

                _, val_predicted = torch.max(out, 1)
                val_loss += ValLoss.item()
                val_acc += (val_predicted == y).sum().item()
                val_total += y.size(0)

        val_acc_avg = val_acc / val_total
        val_loss_avg = val_loss / len(val_loader)

        # Log tensorboard
        writer.add_scalar("Train/Loss", loss_avg, epoch)
        writer.add_scalar("Train/Accuracy", acc_avg, epoch)
        writer.add_scalar("Val/Loss", val_loss_avg, epoch)
        writer.add_scalar("Val/Accuracy", val_acc_avg, epoch)

        # In ra màn hình để theo dõi tiến độ (Optional)
        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {loss_avg:.4f} | Val Loss: {val_loss_avg:.4f} | Val Acc: {val_acc_avg:.4f} | Train Acc: {acc_avg:.4f}")

    # 4. Đóng writer
    writer.close()

In [None]:
from sklearn.utils.class_weight import  compute_class_weight
import numpy as np
classes = np.unique(train_label)
weights = compute_class_weight(
    class_weight='balanced' ,
    classes=classes,
    y=train_label
)
print(weights)

In [None]:
class_weights = torch.tensor(weights , dtype = torch.float)
class_weights = class_weights.to("cuda")

In [None]:
# from torch.utils.
model = LSTM_Intent(num_embedding=3000 , embedding_dim=128 , hidden_dim=64 , output_dim=7 , n_layers=2, dropout_rate=0.4)
criterion = nn.CrossEntropyLoss(weight=class_weights , ignore_index=0)
optimizer = Adam(model.parameters() , lr = 1e-3, weight_decay = 1e-3)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
fit(model , train_loader_lstm , val_loader_lstm , optimizer  , criterion , epochs =100)

In [None]:
import torch
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import numpy as np

def evaluate_metrics(model, loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for x, y, length in loader:
            x, y = x.to(device), y.to(device)

            # Dự đoán (Output là Logits)
            out = model(x, length)

            # Lấy chỉ số lớp có xác suất cao nhất
            _, predicted = torch.max(out, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(y.cpu().numpy())

    return np.array(all_labels), np.array(all_preds)

# Giả sử bạn đã chạy đoạn này trong hàm fit, nếu chưa, chạy lại:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

# 1. Thu thập nhãn và dự đoán
y_true, y_pred = evaluate_metrics(model, test_loader_lstm, device)

In [None]:
# Tính Accuracy
accuracy = accuracy_score(y_true, y_pred)

# Tính F1-Score (Dùng 'macro' để đánh giá công bằng các lớp)
f1_macro = f1_score(y_true, y_pred, average='macro')

print(f"Accuracy cuối cùng: {accuracy:.4f}")
print(f"F1-Macro Score: {f1_macro:.4f}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Thay thế bằng danh sách tên cảm xúc theo thứ tự nhãn (thường là 0 đến 6)
# **RẤT QUAN TRỌNG: Thứ tự này phải khớp với cách bạn mã hóa các nhãn y (target labels) ban đầu.**
class_names = [
    'Anger',     #
    'Surprise',  #
    'Enjoyment', #
    'Other',     #
    'Fear',      #
    'Sadness',   #
    'Disgust'    #
]

# 1. Tính toán Confusion Matrix
cm = confusion_matrix(y_true, y_pred)

# 2. Thiết lập kích thước đồ thị
plt.figure(figsize=(10, 8)) # Tăng kích thước để dễ đọc tên lớp

# 3. Vẽ biểu đồ nhiệt (Heatmap)
sns.heatmap(
    cm,
    annot=True,
    fmt='d',
    cmap='Blues',
    xticklabels=class_names, # Sử dụng tên lớp mới
    yticklabels=class_names  # Sử dụng tên lớp mới
)

# 4. Đặt tiêu đề và nhãn
plt.title('Confusion Matrix: Phân loại Cảm xúc (Emotion Classification)')
plt.ylabel('Nhãn Thực tế (True Emotion)')
plt.xlabel('Nhãn Dự đoán (Predicted Emotion)')
plt.show()

In [2]:
import torch
torch.cuda.get_device_name(0)


'NVIDIA GeForce GTX 1650'