In [40]:
__file__ = "__init__.py"

In [41]:
import sys, os, json5
import numpy as np
import pandas as pd
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
from transformers import pipeline
from torch.utils.data import DataLoader, Dataset

sys.path.append(str(Path(__file__).resolve().parents[1]))

from utils.nlp_utils import tokenize, stem, bag_of_words
from utils.ncomp import rlst, srlst, clst, glst, rrlst, dtlst, sslst, blst
from handlers.rcm import searching
# from nlp.ChatModel import NeuralNetwork, ChatDataset

In [42]:
project_root = Path(__file__).resolve().parents[1]

paths = {
    "intents": os.path.abspath(f"{project_root}/intents"),
    "patterns": os.path.abspath(f"{project_root}/intents/patterns.json"),
    "responses": os.path.abspath(f"{project_root}/intents/responses.json"),
    "replymodel": os.path.abspath(f"{project_root}/models/replymodel.pth"),
}

In [43]:
intent_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

Device set to use cuda:0


In [44]:
class CustomNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, dropout_rate=0.3):
        super(CustomNN, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.ln1 = nn.LayerNorm(hidden_size)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.ln2 = nn.LayerNorm(hidden_size)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        out = self.l1(x)
        out = self.ln1(out)
        out = self.relu(out)
        out = self.dropout1(out)
        out = self.l2(out)
        out = self.ln2(out)
        out = self.relu(out)
        out = self.dropout2(out)
        out = self.l3(out)
        return out

    def optimize(self, pruning_amount=0.3):
        for _, module in self.named_modules():
            if isinstance(module, nn.Linear):
                prune.l1_unstructured(module, name='weight', amount=pruning_amount)
        self.half()
        scripted_model = torch.jit.script(self)
        return scripted_model
    
    def __getstate__(self):
        state = {
            'input_size': self.input_size,
            'hidden_size': self.hidden_size,
            'num_classes': self.num_classes,
            'dropout_rate': self.dropout_rate,
            'state_dict': self.state_dict()
        }
        return state

    def __setstate__(self, state):
        self.__init__(state['input_size'], state['hidden_size'], state['num_classes'], state['dropout_rate'])
        self.load_state_dict(state['state_dict'])
        

class DialogueDataset(Dataset):
    def __init__(self, X_train, Y_train):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = Y_train

    # Hỗ trợ việc truy cập bằng chỉ số để có thể lấy mẫu thứ i trong tập dữ liệu bằng cách dùng dataset[i].
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    # Chúng ta có thể gọi len(dataset) để trả về kích thước.
    def __len__(self):
        return self.n_samples

with open(paths["patterns"], "r", encoding="utf-8") as f:
    intents = json5.load(f)

all_words, tags, xy = [], [], []
for intent in intents['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent['patterns']:
        word = tokenize(pattern)
        all_words.extend(word)
        xy.append((word, tag))
        
# Xóa các từ trùng lặp và sắp xếp chúng
all_words = sorted(set(all_words))
tags = sorted(set(tags))

# Tạo dữ liệu huấn luyện
X_train = []
Y_train = []
for (pattern_sentence, tag) in xy:
    # X: Túi từ cho mỗi câu mẫu (pattern_sentence).
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    # Y: PyTorch CrossEntropyLoss chỉ cần class labels, không cần one-hot.
    label = tags.index(tag)
    Y_train.append(label)
X_train = np.array(X_train)
Y_train = np.array(Y_train)

# Hyper-parameters - Các siêu tham số
num_epochs = 500 # Số lần lặp qua toàn bộ dữ liệu huấn luyện
batch_size = 8 # Số mẫu dữ liệu được xử lý cùng một lúc trong mỗi lần cập nhật trọng số.
learning_rate = 0.001 # Tốc độ học, điều chỉnh độ lớn của bước cập nhật trọng số.
input_size = len(X_train[0]) # Kích thước đầu vào của mô hình, bằng số lượng từ trong bộ từ vựng (all_words).
hidden_size = 8 # Kích thước của lớp ẩn (hidden layer) trong mô hình.
output_size = len(tags) # Kích thước đầu ra của mô hình, bằng số lượng nhãn (tags).

dataset = DialogueDataset(X_train, Y_train)
train_loader = DataLoader(
    dataset=dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0
    )

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Nếu có GPU thì sử dụng, nếu không thì sử dụng CPU
model = CustomNN(input_size, hidden_size, output_size).to(device)

# Loss function và optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Huấn luyện mô hình qua nhiều epoch.
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)

        # Forward pass - Lan truyền tiến là quá trình mô hình tính toán đầu ra (output) dựa trên dữ liệu đầu vào (input)
        outputs = model(words)
        loss = criterion(outputs, labels) # Tính toán loss giữa outputs và labels.

        # Backward and optimize - Lan truyền ngược và tối ưu hóa
        optimizer.zero_grad() # Xóa gradient từ trước đó, tránh việc gradient tích lũy.
        loss.backward() # thực hiện lan truyền ngược, tính toán gradient của loss đối với tất cả các tham số của mô hình bằng cách sử dụng quy tắc chuỗi (chain rule).
        optimizer.step() # cập nhật trọng số của mô hình dựa trên gradient đã tính toán.

    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print(f'final loss: {loss.item():.4f}')

data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}

torch.save(data, paths["replymodel"]) 
print(f'training complete. file saved to {paths["replymodel"]}')

Epoch [100/500], Loss: 0.5783
Epoch [200/500], Loss: 0.6499
Epoch [300/500], Loss: 0.5844
Epoch [400/500], Loss: 1.2764
Epoch [500/500], Loss: 0.7132
final loss: 0.7132
training complete. file saved to C:\Users\trtie\OneDrive - camann\Documents\GitHub - Repository\Astorine\src\chatbot\models\replymodel.pth


In [None]:
data = torch.load(paths["replymodel"])
model = CustomNN(data["input_size"], data["hidden_size"], data["output_size"]).to(device)
model.load_state_dict(data["model_state"])
model.eval()
all_words = data["all_words"]
tags = data["tags"]

def predict_intent(user_input):
    words = tokenize(user_input)
    bag = bag_of_words(words, all_words)
    bag = torch.from_numpy(bag).float().to(device)
    bag = bag.unsqueeze(0)  # Thêm batch dimension: [input_size] -> [1, input_size]
    with torch.no_grad():
        output = model(bag)  # output sẽ có kích thước [1, num_classes]
        _, predicted = torch.max(output, dim=1)  # predicted sẽ là [1]
        tag = tags[predicted.item()]
        probs = torch.softmax(output, dim=1)
        prob = probs[0][predicted.item()]
        if prob.item() > 0.75:  # Ngưỡng tin cậy
            return tag
        return "unknown"

sessions = {}

class ChatbotSession:
    def __init__(self):
        self.current_flow = "guided"  # 'guided' hoặc 'search'
        self.context = None           # ví dụ: "Need_help", "collecting_criteria", v.v.
        self.criteria = {}            # lưu trữ các tiêu chí: brand, gpu, cpu, ...
        self.previous_flow = None     # lưu flow tạm thời khi trả lời FAQ

def get_session(user_id):
    if user_id not in sessions:
        sessions[user_id] = ChatbotSession()
    return sessions[user_id]

def is_faq_question(text: str) -> bool:
    faq_triggers = ["what is", "explain", "how does"]
    return any(text.lower().startswith(trigger) for trigger in faq_triggers)

def is_direct_search_query(text: str) -> bool:
    # Nếu câu có nhiều từ và chứa các từ khóa chỉ ra yêu cầu tìm kiếm laptop
    return len(text.split()) > 15 and any(term in text.lower() for term in ["suggest", "recommend", "find", "laptop"])

def is_faq_question(text: str) -> bool:
    faq_triggers = ["what is", "explain", "how does"]
    return any(text.lower().startswith(trigger) for trigger in faq_triggers)

def get_faq_answer(query: str) -> str:
    faq_dict = {
        "what is gpu": "GPU (Graphics Processing Unit).",
        "what is cpu": "CPU (Central Processing Unit).",
        "what is ram": "RAM (Random Access Memory)."
    }
    for key in faq_dict:
        if key in query.lower():
            return faq_dict[key]
    return "I don't have the answer to that question."

def update_session_criteria(session: ChatbotSession, user_input: str):
    lower_input = user_input.lower()
    if "brand" in lower_input or any(brand in lower_input for brand in blst()):
        session.criteria["brand"] = user_input
    else:
        session.criteria["brand"] = None
    if "gpu" in lower_input or any(x in lower_input for x in glst()):
        session.criteria["gpu"] = user_input
    else:
        session.criteria["gpu"] = None
    if "cpu" in lower_input or any(x in lower_input for x in clst()):
        session.criteria["cpu"] = user_input
    if "ram" in lower_input or any(x in lower_input for x in rlst()):
        session.criteria["ram"] = user_input
    else:
        session.criteria["ram"] = None
    if "resolution" in lower_input or any(x in lower_input for x in srlst()):
        session.criteria["resolution"] = user_input
    else:
        session.criteria["resolution"] = None
    if "refresh" in lower_input or any(x in lower_input for x in rrlst()):
        session.criteria["refresh rate"] = user_input
    else:
        session.criteria["refresh rate"] = None
    if "display" in lower_input or any(x in lower_input for x in dtlst()):
        session.criteria["display type"] = user_input
    else:
        session.criteria["display type"] = None
    if "screen" in lower_input or any(x in lower_input for x in sslst()):
        session.criteria["screen size"] = user_input
    else:
        session.criteria["screen size"] = None

def get_next_question(session: ChatbotSession) -> str:
    questions = {
        "brand": "Of course! Let's start with the brand. Any preferences?",
        "gpu": "Noted! Which GPU are you aiming for?",
        "cpu": "Cool! Which processor do you prefer?",
        "ram": "Alright! What's your RAM requirement?",
        "resolution": "What screen resolution do you want?",
        "refresh rate": "What is the desired screen refresh rate?",
        "display type": "What type of screen do you prefer (IPS, OLED, ...)?",
        "screen size": "Great choice! What screen size are you looking for?"
    }
    for key in questions:
        if key not in session.criteria:
            return questions[key]
    return "🔍 Searching for the best options..."

def process_user_input(user_id: str, user_input: str):
    session = get_session(user_id)
    intent = predict_intent(user_input)

    # Xử lý các ý định cụ thể
    if intent == "search" or is_direct_search_query(user_input):
        session.current_flow = "search"
        return searching(user_input)
    
    elif intent == "faq" or is_faq_question(user_input):
        prev_flow = session.current_flow
        faq_answer = get_faq_answer(user_input)
        session.current_flow = prev_flow
        return faq_answer
    
    elif intent == "start_guided":
        session.current_flow = "guided"
        session.context = "collecting_brand"
        return "Let's start with the brand. Any preferences?"
    
    elif session.current_flow == "guided":
        update_session_criteria(session, user_input)
        next_q = get_next_question(session)
        if next_q == "🔍 Searching for the best options...":
            return searching(session.criteria)
        return next_q
    
    return "I'm sorry, I don't understand that question."

# Sau khi huấn luyện và lưu mô hình
data = torch.load(paths["replymodel"])
model = CustomNN(data["input_size"], data["hidden_size"], data["output_size"]).to(device)
model.load_state_dict(data["model_state"])
model.eval()
all_words = data["all_words"]
tags = data["tags"]

user_id = "user_123"
print("Chatbot: halo")

while True:
    user_input = input("User: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Chatbot: thanks for chatting!")
        break
    response = process_user_input(user_id, user_input)
    print("Chatbot:", response)

Chatbot: halo
Chatbot: Cool! Which processor do you prefer?
Chatbot: Cool! Which processor do you prefer?
Chatbot: Cool! Which processor do you prefer?
Chatbot: Cool! Which processor do you prefer?
Chatbot: Cool! Which processor do you prefer?
Chatbot: Cool! Which processor do you prefer?
Chatbot: Cool! Which processor do you prefer?
Chatbot: thanks for chatting!


In [None]:
# def is_direct_search_query(text: str) -> bool:
#     # Nếu câu có nhiều từ và chứa các từ khóa chỉ ra yêu cầu tìm kiếm laptop
#     return len(text.split()) > 15 and any(term in text.lower() for term in ["suggest", "recommend", "find", "laptop"])

# def is_faq_question(text: str) -> bool:
#     faq_triggers = ["what is", "explain", "how does"]
#     return any(text.lower().startswith(trigger) for trigger in faq_triggers)
# def get_faq_answer(query: str) -> str:
#     faq_dict = {
#         "what is gpu": "GPU (Graphics Processing Unit).",
#         "what is cpu": "CPU (Central Processing Unit).",
#         "what is ram": "RAM (Random Access Memory)."
#     }
#     for key in faq_dict:
#         if key in query.lower():
#             return faq_dict[key]
#     return "I don't have the answer to that question."

# def update_session_criteria(session: ChatbotSession, user_input: str):
#     lower_input = user_input.lower()
#     if "brand" in lower_input or any(brand in lower_input for brand in blst()):
#         session.criteria["brand"] = user_input
#     else:
#         session.criteria["brand"] = None
#     if "gpu" in lower_input or any(x in lower_input for x in glst()):
#         session.criteria["gpu"] = user_input
#     else:
#         session.criteria["gpu"] = None
#     if "cpu" in lower_input or any(x in lower_input for x in clst()):
#         session.criteria["cpu"] = user_input
#     if "ram" in lower_input or any(x in lower_input for x in rlst()):
#         session.criteria["ram"] = user_input
#     else:
#         session.criteria["ram"] = None
#     if "resolution" in lower_input or any(x in lower_input for x in srlst()):
#         session.criteria["resolution"] = user_input
#     else:
#         session.criteria["resolution"] = None
#     if "refresh" in lower_input or any(x in lower_input for x in rrlst()):
#         session.criteria["refresh rate"] = user_input
#     else:
#         session.criteria["refresh rate"] = None
#     if "display" in lower_input or any(x in lower_input for x in dtlst()):
#         session.criteria["display type"] = user_input
#     else:
#         session.criteria["display type"] = None
#     if "screen" in lower_input or any(x in lower_input for x in sslst()):
#         session.criteria["screen size"] = user_input
#     else:
#         session.criteria["screen size"] = None

# def get_next_question(session: ChatbotSession) -> str:
#     questions = {
#         "brand": "Of course! Let's start with the brand. Any preferences?",
#         "gpu": "Noted! Which GPU are you aiming for?",
#         "cpu": "Cool! Which processor do you prefer?",
#         "ram": "Alright! What's your RAM requirement?",
#         "resolution": "What screen resolution do you want?",
#         "refresh rate": "What is the desired screen refresh rate?",
#         "display type": "What type of screen do you prefer (IPS, OLED, ...)?",
#         "screen size": "Great choice! What screen size are you looking for?"
#     }
#     for key in questions:
#         if key not in session.criteria:
#             return questions[key]
#     return "🔍 Searching for the best options..."

# def process_user_input(user_id: str, user_input: str):
#     session = get_session(user_id)
    
#     # Nếu câu nhập dài và chứa thông tin mô tả chi tiết, chuyển sang flow tìm kiếm
#     if is_direct_search_query(user_input):
#         session.current_flow = "search"
#         return searching(user_input)
    
#     # Nếu câu hỏi dạng FAQ (ví dụ: "what is gpu?")
#     if is_faq_question(user_input):
#         prev_flow = session.current_flow
#         faq_answer = get_faq_answer(user_input)
#         session.current_flow = prev_flow
#         return faq_answer
    
#     # Nếu đang trong guided flow, cập nhật tiêu chí và hỏi câu tiếp theo
#     if session.current_flow == "guided":
#         update_session_criteria(session, user_input)
#         next_q = get_next_question(session)
#         return next_q
    
#     return "I'm sorry, I don't understand that question."

In [None]:
# user_id = "user_123"
# print("Chatbot: halo")

# while True:
#     user_input = input("User: ")
#     if user_input.lower() in ["exit", "quit"]:
#         print("Chatbot: thanks for chatting!")
#         break
#     response = process_user_input(user_id, user_input)
#     print("Chatbot:", response)

Chatbot: halo
Chatbot: Of course! Let's start with the brand. Any preferences?
Chatbot: Noted! Which GPU are you aiming for?
Chatbot: Cool! Which processor do you prefer?
Chatbot: Cool! Which processor do you prefer?
Chatbot: Alright! What's your RAM requirement?
Chatbot: What screen resolution do you want?
Chatbot: What screen resolution do you want?
Chatbot: What screen resolution do you want?
Chatbot: What screen resolution do you want?
Chatbot: What screen resolution do you want?
Chatbot: What is the desired screen refresh rate?
Chatbot: What is the desired screen refresh rate?
Chatbot: What type of screen do you prefer (IPS, OLED, ...)?
Chatbot: 🔍 Searching for the best options...
Chatbot: 🔍 Searching for the best options...
Chatbot: 🔍 Searching for the best options...
Chatbot: 🔍 Searching for the best options...
Chatbot: 🔍 Searching for the best options...
Chatbot: 🔍 Searching for the best options...
Chatbot: 🔍 Searching for the best options...
Chatbot: 🔍 Searching for the best o