# **Tiki Book Aspect-based Sentiment Analysis (ABSA)**



In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Load Dataset

In [None]:
import pandas as pd
with open(r'/content/drive/MyDrive/nlp-vabsa-main/small_tiki_comment.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()

header = lines[0].strip()
lines = lines[1:]
df = pd.DataFrame(lines, columns=[header])


In [None]:
df.shape

In [None]:
df.iloc[4].values

# Preprocessing

In [None]:
import pandas as pd
import re
import string
file_path = r"/content/drive/MyDrive/nlp-vabsa-main/small_tiki_comment.txt"
df = pd.read_csv(file_path, sep=",", quotechar='"', on_bad_lines='skip', engine='python')
df = df.dropna(subset=["content"])

def normalize_money(sent):
    return re.sub(r'[0-9]+[.,0-9]*[kmb]', 'giá', sent, flags=re.IGNORECASE)

def normalize_hastag(sent):
    return re.sub(r'#+\w+', 'tag', sent)

def normalize_website(sent):
    result = re.sub(r'http[s]?://(?:[a-zA-Z0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', 'website', sent)
    return re.sub(r'\w+(\.(com|vn|me))+((\/+([\.\w\_\-]+)?)+)?', 'website', result)

def nomalize_emoji(sent):
    emoji_pattern = re.compile(
        "["
        u"\U0001F600-\U0001F64F"
        u"\U0001F300-\U0001F5FF"
        u"\U0001F680-\U0001F6FF"
        u"\U0001F1E0-\U0001F1FF"
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        u"\U0001f926-\U0001f937"
        u"\U00010000-\U0010ffff"
        u"\u200d"
        u"\u2640-\u2642"
        u"\u2600-\u2B55"
        u"\u23cf"
        u"\u23e9"
        u"\u231a"
        u"\u3030"
        u"\ufe0f"
        u"\u2764"
        "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', sent)

def normalize_acronyms(sent):
    replace_list = {
        'ô kêi': ' ok ', 'okie': ' ok ', ' o kê ': ' ok ',
        'okey': ' ok ', 'ôkê': ' ok ', 'oki': ' ok ', ' oke ':  ' ok ',
        'okay':' ok ','okê':' ok ', ' tks ':' cám ơn ', 'thks':' cám ơn ',
        'thanks':' cám ơn ', 'ths':' cám ơn ', 'thank':' cám ơn ',
        '⭐':'star ', '*':'star ', '🌟':'star ', '🎉':' tích cực ',
        'kg ':' không ','not':' không ',' kh ':' không ','kô':' không ',
        'hok':' không ',' kp ':' không phải ',' ko ':' không ',' k ':' không ',
        'khong':' không ', 'he he':' tích cực ','hehe':' tích cực ',
        'hihi':' tích cực ', 'haha':' tích cực ', 'hjhj':' tích cực ',
        ' lol ':' tiêu cực ',' cc ':' tiêu cực ','cute':' dễ thương ',
        'huhu':' tiêu cực ', ' vs ':' với ', 'wa':' quá ', 'wá':' quá',
        'j':' gì ', 'sz ':' cỡ ', 'size':' cỡ ', 'đx ':' được ',
        'dk':' được ', 'dc':' được ', 'đk':' được ', 'đc':' được ',
        'authentic':' chuẩn chính hãng ','auth ':' chuẩn chính hãng ',
        'thick':' tích cực ', 'store':' cửa hàng ', 'shop':' cửa hàng ',
        'sp':' sản phẩm ', 'gud':' tốt ','god':' tốt ','wel done':' tốt ',
        'good':' tốt ', 'sấu':' xấu ','gut':' tốt ', ' tot ':' tốt ',
        ' nice ':' tốt ', 'perfect':'rất tốt', 'bt':' bình thường ',
        'time':' thời gian ', 'qá':' quá ', ' ship ':' giao hàng ',
        ' m ':' mình ', ' mik ':' mình ', 'ể':'ể', 'product':'sản phẩm',
        'quality':'chất lượng','chat':' chất ', 'excelent':'hoàn hảo',
        'bad':'tệ','fresh':' tươi ','sad':' tệ ', 'date':' hạn sử dụng ',
        'hsd':' hạn sử dụng ','quickly':' nhanh ', 'quick':' nhanh ',
        'fast':' nhanh ','delivery':' giao hàng ',' síp ':' giao hàng ',
        'beautiful':' đẹp tuyệt vời ', ' tl ':' trả lời ', ' r ':' rồi ',
        ' shopE ':' cửa hàng ',' order ':' đặt hàng ', 'chất lg':' chất lượng ',
        ' sd ':' sử dụng ',' dt ':' điện thoại ',' nt ':' nhắn tin ',
        ' tl ':' trả lời ',' sài ':' xài ','bjo':' bao giờ ','thik':' thích ',
        ' sop ':' cửa hàng ', ' fb ':' facebook ', ' face ':' facebook ',
        ' very ':' rất ','quả ng ':' quảng  ','dep':' đẹp ',' xau ':' xấu ',
        'delicious':' ngon ','hàg':' hàng ','qủa':' quả ','iu':' yêu ',
        'fake':' giả mạo ', 'trl':'trả lời', '><':' tích cực ',
        ' por ':' tệ ',' poor ':' tệ ', 'ib':' nhắn tin ', 'rep':' trả lời ',
        'fback':' feedback ','fedback':' feedback '
    }
    text = sent
    for k, v in replace_list.items():
        text = text.replace(k, v)
    return text

def normalize(sent):
    result = normalize_money(sent)
    result = normalize_hastag(result)
    result = normalize_website(result)
    result = nomalize_emoji(result)
    result = normalize_acronyms(result)
    result = result.lower()
    result = result.translate(str.maketrans(string.punctuation, ' ' * len(string.punctuation)))
    result = re.sub(r'\s+', ' ', result).strip()
    return result

df['content_normalized'] = df['content'].apply(normalize)

print(df[['content', 'content_normalized']].head())

df.to_csv('/content/drive/MyDrive/nlp-vabsa-main/small_tiki_comment_normalized.csv', index=False)


In [None]:
!pip install underthesea

In [None]:
!pip install -q vncorenlp sentence-transformers pandas

# Aspects Extraction

In [None]:
import os
vncorenlp_path = 'vncorenlp/VnCoreNLP-1.1.1.jar'
if not os.path.exists(vncorenlp_path):
    os.makedirs('vncorenlp', exist_ok=True)
    !wget -q -O {vncorenlp_path} https://github.com/vncorenlp/VnCoreNLP/releases/download/v1.1.1/VnCoreNLP-1.1.1.jar

from vncorenlp import VnCoreNLP
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import re

try:
    rdrsegmenter = VnCoreNLP(vncorenlp_path, annotators="wseg", max_heap_size='-Xmx1g', quiet=True)
    print("VnCoreNLP khởi tạo thành công.")
except Exception as e:
    print(f"Lỗi khi khởi tạo VnCoreNLP: {e}")

try:
    model = SentenceTransformer('VoVanPhuc/sup-SimCSE-VietNamese-phobert-base')
    print("Sentence-BERT model đã tải thành công.")
except Exception as e:
    print(f"Lỗi tải SentenceTransformer model: {e}")

aspect_to_vietnamese = {
    "BOOK#GENERAL": "sách nói chung",
    "BOOK#PRICE": "giá sách",
    "BOOK#QUALITY": "chất lượng sách",
    "BOOK#CONTENT": "nội dung sách",
    "BOOK#FORMAT": "hình thức sách",
    "BOOK#READER_EXPERIENCE": "trải nghiệm người đọc",
    "BOOK#RECOMMENDATION": "khuyến nghị sách",
    "DELIVERY#SERVICE": "dịch vụ giao hàng",
    "SELLER#SERVICE": "dịch vụ người bán"
}

aspect_texts = list(aspect_to_vietnamese.values())
aspect_embeddings = model.encode(aspect_texts, convert_to_tensor=True)

# HÀM TIỀN XỬ LÝ & TRÍCH XUẤT
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\sáàảãạăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệíìỉĩịóòỏõọôốồổỗộơớờởỡợúùủũụưứừửữựýỳỷỹỵ!\.,;?:\'"\(\)\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def split_sentences(text):
    try:
        return [' '.join(sent) for sent in rdrsegmenter.tokenize(text)]
    except:
        return [text]

def extract_present_aspects_paragraph(paragraph, threshold=0.3):
    if not paragraph or pd.isna(paragraph):
        return []

    cleaned = clean_text(paragraph)
    sentences = split_sentences(cleaned)
    found_aspects = set()

    for sentence in sentences:
        if not sentence.strip():
            continue
        try:
            sent_embedding = model.encode(sentence, convert_to_tensor=True)
            cosine_scores = util.cos_sim(sent_embedding, aspect_embeddings)[0]
            for i, score in enumerate(cosine_scores):
                if score >= threshold:
                    found_aspects.add(list(aspect_to_vietnamese.keys())[i])
        except Exception as e:
            print(f"Lỗi xử lý câu: {sentence}, lỗi: {e}")
            continue

    return list(found_aspects)


# ĐỌC DỮ LIỆU

input_path = '/content/drive/MyDrive/nlp-vabsa-main/small_tiki_comment_normalized.csv'

if not os.path.exists(input_path):
    print(f"Không tìm thấy file tại {input_path}")
    df = pd.DataFrame({'content': []})
else:
    try:
        df = pd.read_csv(input_path)
        print(f"Đã tải dữ liệu từ: {input_path}")
    except Exception as e:
        print(f"Lỗi đọc file: {e}")
        df = pd.DataFrame({'content': []})

if not df.empty:
    print("Đang tiến hành trích xuất khía cạnh...")
    df['detected_aspects'] = df['content'].apply(lambda x: extract_present_aspects_paragraph(str(x)))
    print("Hoàn thành trích xuất.")
else:
    print("Dữ liệu rỗng. Không thực hiện trích xuất.")

output_path = "/content/drive/MyDrive/nlp-vabsa-main/Tiki_books_aspect_extracted_vncorenlp.csv"
if not df.empty:
    try:
        df.to_csv(output_path, index=False)
        print(f"Đã lưu kết quả vào: {output_path}")
    except Exception as e:
        print(f"Lỗi khi lưu file: {e}")
else:
    print("Không lưu vì dataframe rỗng.")

if not df.empty:
    print(df[['content', 'detected_aspects']].head())
else:
    print("Không có dữ liệu hiển thị.")


# Sentiment Labelling

In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import ast
import math

checkpoint = "mr4/phobert-base-vi-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()

file_path = "/content/drive/MyDrive/nlp-vabsa-main/Tiki_books_aspect_extracted_vncorenlp.csv"
df = pd.read_csv(file_path)

df['detected_aspects'] = df['detected_aspects'].apply(
    lambda x: ast.literal_eval(str(x)) if pd.notna(x) and x != "nan" else []
)

combined_inputs = []
content_list = []
aspect_list = []

for index, row in df.iterrows():
    text = row['content']
    aspects = row['detected_aspects']
    if not aspects:
        continue
    for aspect in aspects:
        combined_input = f"Văn bản: {text} | Khía cạnh: {aspect}"
        combined_inputs.append(combined_input)
        content_list.append(text)
        aspect_list.append(aspect)

batch_size = 32
num_batches = math.ceil(len(combined_inputs) / batch_size)
results = []
print(f"Processing {len(combined_inputs)} inputs in {num_batches} batches of size {batch_size}")

for i in range(num_batches):
    start_index = i * batch_size
    end_index = min((i + 1) * batch_size, len(combined_inputs))
    batch_inputs_text = combined_inputs[start_index:end_index]
    batch_content = content_list[start_index:end_index]
    batch_aspect = aspect_list[start_index:end_index]

    inputs = tokenizer(
        batch_inputs_text,
        padding=True,
        truncation=True,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)

    for j, score in enumerate(probs):
        predicted_label = torch.argmax(score).item()
        results.append({
            "content": batch_content[j],
            "aspect": batch_aspect[j],
            "sentiment_label": predicted_label,
            "score_positive": score[2].item(),
            "score_neutral": score[1].item(),
            "score_negative": score[0].item()
        })

results_df = pd.DataFrame(results)
output_path = "/content/drive/MyDrive/nlp-vabsa-main/Tiki_books_aspect_sentiment_labeled.csv"
results_df.to_csv(output_path, index=False, encoding='utf-8-sig', float_format='%.4f')

print("Đã lưu kết quả sentiment (có nhãn dạng số) vào:", output_path)
print(results_df.head()

In [None]:
results_df.info()

In [None]:
import pandas as pd
df = pd.read_csv("/content/drive/MyDrive/nlp-vabsa-main/Tiki_books_aspect_sentiment_labeled.csv")
summary = df.groupby(["aspect", "sentiment_label"]).size().unstack(fill_value=0)
summary.columns = ['0: negative', '1: neutral', '2: positive']
print(summary)


In [None]:
import matplotlib.pyplot as plt
aspect_counts = df['aspect'].value_counts()

plt.figure(figsize=(10, 6))
aspect_counts.plot(kind='bar', color='green')
plt.title("Aspect Distribution")
plt.xlabel("Aspect")
plt.ylabel("Count")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
summary.plot(kind="bar", stacked=True, figsize=(10, 6), colormap="viridis")
plt.title("Sentiments Distibution by Aspects")
plt.xlabel("Aspects")
plt.ylabel("Count")
plt.xticks(rotation=45, ha='right')
plt.legend(title="Sentiment")
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
sentiment_counts = df['sentiment_label'].value_counts()
plt.figure(figsize=(7, 7))
plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=['#4CAF50', '#FFC107', '#F44336'])
plt.title("Sentiment Distribution")
plt.show()


# Training Preparation

In [None]:
df = pd.read_csv('/content/drive/MyDrive/nlp-vabsa-main/Tiki_books_aspect_sentiment_labeled.csv')
df

In [None]:
df_train = df.drop(columns=['score_positive', 'score_neutral', 'score_negative'])


In [None]:
df_train.to_csv('/content/drive/MyDrive/nlp-vabsa-main/train_data.csv', index=False)
df = pd.read_csv('/content/drive/MyDrive/nlp-vabsa-main/train_data.csv')
df

# Sentiment Classification using VinAI-Phobert

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('/content/drive/MyDrive/nlp-vabsa-main/train_data.csv')
df = df.dropna(subset=["content", "aspect", "sentiment_label"])
df = df[df["sentiment_label"].isin([0, 1, 2])]
df['text_with_aspect'] = df.apply(
    lambda row: f"Văn bản: {row['content']} | Khía cạnh: {row['aspect']}",
    axis=1
)
X = df['text_with_aspect']
y = df['sentiment_label']

train_texts, test_texts, train_labels, test_labels = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y

train_texts = train_texts.tolist()
test_texts  = test_texts.tolist()
train_labels = train_labels.tolist()
test_labels  = test_labels.tolist()
print(f"Train samples: {len(train_texts)}, Test samples: {len(test_texts)}")
print("Class distribution in train:", pd.Series(train_labels).value_counts(normalize=True).to_dict())
print("Class distribution in test :", pd.Series(test_labels).value_counts(normalize=True).to_dict())


In [None]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base", use_fast=False)
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=128)

In [None]:
import torch

class PhoBERTDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = PhoBERTDataset(train_encodings, train_labels)
test_dataset = PhoBERTDataset(test_encodings, test_labels)


In [None]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix
import numpy as np
import torch
import torch.nn.functional as F
import pandas as pd

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    probs = F.softmax(torch.tensor(logits), dim=1).numpy()
    preds = np.argmax(probs, axis=1)

    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')

    try:
        roc_auc = roc_auc_score(labels, probs, multi_class='ovr', average='weighted')
    except:
        roc_auc = 0.0

    conf_matrix = confusion_matrix(labels, preds)

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'roc_auc': roc_auc,
        'confusion_matrix': conf_matrix.tolist(),
    }

model = AutoModelForSequenceClassification.from_pretrained("vinai/phobert-base", num_labels=3)

training_args = TrainingArguments(
    output_dir='./results',
    run_name='classification_task',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    logging_steps=50,
    save_strategy="epoch",
    save_total_limit=2,
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
predictions = trainer.predict(test_dataset)

pred_labels = np.argmax(predictions.predictions, axis=1)
probs = predictions.predictions
probs = F.softmax(torch.tensor(probs), dim=1).numpy()
results_df = pd.DataFrame({
    'true_label': predictions.label_ids,
    'pred_label': pred_labels,
    'prob_0': probs[:, 0],
    'prob_1': probs[:, 1],
    'prob_2': probs[:, 2],
})


print(results_df.head())


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
eval_results = trainer.evaluate()

eval_df = pd.DataFrame([{
    'Loss': eval_results['eval_loss'],
    'Accuracy': eval_results['eval_accuracy'],
    'Precision': eval_results['eval_precision'],
    'Recall': eval_results['eval_recall'],
    'ROC AUC': eval_results['eval_roc_auc'],
}])
print(eval_df)

conf_matrix = np.array(eval_results['eval_confusion_matrix'])
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['0', '1', '2'], yticklabels=['0', '1', '2'])
plt.title("Confusion Matrix")
plt.show()


# Imbalanced Solving Using Weight

In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import torch

labels = []
for i in range(len(train_dataset)):
    labels.append(train_dataset[i]['labels'].item())

labels = np.array(labels)
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}
class_weights_dict

In [None]:
from transformers import Trainer
from torch.nn import CrossEntropyLoss
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments
import torch
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

labels = []
for i in range(len(train_dataset)):
    labels.append(train_dataset[i]['labels'].item())

labels = np.array(labels)
class_weights_array = compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
class_weights_tensor = torch.tensor(class_weights_array, dtype=torch.float)

def compute_metrics(eval_pred):
    logits, labels = eval_pred

    if isinstance(logits, tuple):
        logits = logits[0]

    if logits.ndim == 3:
        logits = logits.mean(axis=1)

    probs = F.softmax(torch.tensor(logits), dim=1).numpy()
    preds = np.argmax(probs, axis=1)

    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')

    try:
        roc_auc = roc_auc_score(labels, probs, multi_class='ovr', average='weighted')
    except:
        roc_auc = 0.0

    conf_matrix = confusion_matrix(labels, preds)
    return {
        'eval_accuracy': accuracy,
        'eval_precision': precision,
        'eval_recall': recall,
        'eval_roc_auc': roc_auc,
        'eval_confusion_matrix': conf_matrix.tolist(),
    }


model = AutoModelForSequenceClassification.from_pretrained("vinai/phobert-base", num_labels=3)


class WeightedTrainer(Trainer):
    def __init__(self, class_weights, *args, **kwargs):
        super().__init__(*args, **kwargs)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.class_weights = class_weights.to(device)
        self.loss_fn = CrossEntropyLoss(weight=self.class_weights)
  def compute_loss(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss = self.loss_fn(logits, labels)
        return (loss, outputs) if return_outputs else loss


training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    eval_strategy="epoch",
    save_strategy="epoch",
    report_to="none",
trainer = WeightedTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    class_weights=class_weights_tensor,
    compute_metrics=compute_metrics
)

trainer.train()

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred

    if isinstance(logits, tuple):
        logits = logits[0]

    if logits.ndim == 3:
        logits = logits.mean(axis=1)

    probs = F.softmax(torch.tensor(logits), dim=1).numpy()
    preds = np.argmax(probs, axis=1)

    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')

    try:
        roc_auc = roc_auc_score(labels, probs, multi_class='ovr', average='weighted')
    except:
        roc_auc = 0.0

    conf_matrix = confusion_matrix(labels, preds)
    return {
        'eval_accuracy': accuracy,
        'eval_precision': precision,
        'eval_recall': recall,
        'eval_roc_auc': roc_auc,
        'eval_confusion_matrix': conf_matrix.tolist(),
    }
eval_results = trainer.evaluate()
eval_df = pd.DataFrame([{
    'Loss': eval_results['eval_loss'],
    'Accuracy': eval_results['eval_accuracy'],
    'Precision': eval_results['eval_precision'],
    'Recall': eval_results['eval_recall'],
    'ROC AUC': eval_results['eval_roc_auc'],
}])
print(eval_df)
conf_matrix = np.array(eval_results['eval_confusion_matrix'])
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['0', '1', '2'], yticklabels=['0', '1', '2'])
plt.title("Confusion Matrix")
plt.show()