In [None]:
import sys
import os
import spacy
import torch
import numpy as np
from functools import lru_cache
from typing import List
from concurrent.futures import ThreadPoolExecutor
import random
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.utils.prune as prune
from tqdm.auto import tqdm

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

try:
    nlp = spacy.load("en_core_web_trf" if torch.cuda.is_available() else "en_core_web_sm")
except OSError:
    print("Could not load en_core_web_trf or en_core_web_sm. Falling back to en_core_web_sm.")
    nlp = spacy.load("en_core_web_sm")
except Exception as e:
    print(f"Error loading spaCy models: {e}")
    sys.exit(1)

nlp_pool = [nlp]
try:
    if torch.cuda.is_available():
        for _ in range(min(3, os.cpu_count() or 1)):
            try:
                nlp_pool.append(spacy.load("en_core_web_trf"))
            except OSError:
                nlp_pool.append(spacy.load("en_core_web_sm"))
    else:
        for _ in range(os.cpu_count() or 1):
            nlp_pool.append(spacy.load("en_core_web_sm"))
except OSError:
    pass

def process_documents(texts: List[str]) -> List:
    return list(nlp.pipe(texts, batch_size=64))

@lru_cache(maxsize=10000)
def lemmatize_cached(word: str) -> str:
    doc = nlp(word)
    return doc[0].lemma_.lower()

COMMON_LEMMAS = {}
def populate_common_lemmas():
    """Populate dictionary with common English words to avoid repeated computation."""
    common_words = [
        "the", "be", "to", "of", "and", "a", "in", "that", "have", "I", 
        "it", "for", "not", "on", "with", "he", "as", "you", "do", "at",
        "this", "but", "his", "by", "from", "they", "we", "say", "her", "she"
        ]
    for word in common_words:
        COMMON_LEMMAS[word] = lemmatize_cached(word)
populate_common_lemmas()

def tokenize_parallel(sentences: List[str]) -> List[List[str]]:
    def process_sentence(args):
        sentence, nlp_instance = args
        doc = nlp_instance(sentence)
        return [token.text for token in doc]

    with ThreadPoolExecutor(max_workers=len(nlp_pool)) as executor:
        work_items = [(sentence, nlp_pool[i % len(nlp_pool)]) for i, sentence in enumerate(sentences)]
        results = list(executor.map(process_sentence, work_items))
    return results

def tokenize(sentence: str) -> list:
    doc = nlp(sentence)
    return [token.text for token in doc]

def stem(word: str) -> str:
    if word in COMMON_LEMMAS:
        return COMMON_LEMMAS[word]
    return lemmatize_cached(word)

def batch_process_stems(words: List[str]) -> List[str]:
    results = []
    batch_words = []
    batch_indices = []
    for i, word in enumerate(words):
        if word in COMMON_LEMMAS:
            results.append(COMMON_LEMMAS[word])
        else:
            batch_words.append(word)
            batch_indices.append(i)
    if batch_words:
        docs = list(nlp.pipe(batch_words, batch_size=64))
        for i, doc in enumerate(docs):
            lemma = doc[0].lemma_.lower()
            original_idx = batch_indices[i]
            results.insert(original_idx, lemma)
            if batch_words[i] not in COMMON_LEMMAS:
                COMMON_LEMMAS[batch_words[i]] = lemma
    
    return results

def create_bow_matrix(tokenized_sentences: List[List[str]], all_words: List[str]) -> np.ndarray:
    word_to_idx = {word: i for i, word in enumerate(all_words)}
    matrix_cpu = np.zeros((len(tokenized_sentences), len(all_words)), dtype=np.float32)
    for i, sentence in enumerate(tokenized_sentences):
        lemmas = set(batch_process_stems(sentence))
        for lemma in lemmas:
            if lemma in word_to_idx:
                matrix_cpu[i, word_to_idx[lemma]] = 1.0
    return matrix_cpu

def bag_of_words(tokenized_sentence: list, all_words: list) -> np.ndarray:
    lemmas = set(batch_process_stems(tokenized_sentence))
    word_to_idx = getattr(bag_of_words, "word_to_idx", None)
    if word_to_idx is None or len(word_to_idx) != len(all_words):
        bag_of_words.word_to_idx = {word: i for i, word in enumerate(all_words)}
        word_to_idx = bag_of_words.word_to_idx
    bag_cpu = np.zeros(len(all_words), dtype=np.float32)
    for lemma in lemmas:
        if lemma in word_to_idx:
            bag_cpu[word_to_idx[lemma]] = 1.0
    
    return bag_cpu

In [None]:
class CustomNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, dropout_rate=0.3):
        super(CustomNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        self.dropout_rate = dropout_rate
        self.l1 = nn.Linear(input_size, hidden_size)
        self.ln1 = nn.LayerNorm(hidden_size)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.l2 = nn.Linear(hidden_size, hidden_size * 2) 
        self.ln2 = nn.LayerNorm(hidden_size * 2)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.l3 = nn.Linear(hidden_size * 2, hidden_size) 
        self.ln3 = nn.LayerNorm(hidden_size)
        self.dropout3 = nn.Dropout(dropout_rate)
        self.l4 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        out = self.l1(x)
        out = self.ln1(out)
        out = self.relu(out)
        out = self.dropout1(out)
        identity = out
        out = self.l2(out)
        out = self.ln2(out)
        out = self.relu(out)
        out = self.dropout2(out)
        out = self.l3(out)
        out = self.ln3(out)
        out = self.relu(out)
        if hasattr(self, 'input_size') and self.input_size == self.hidden_size:
            out = out + identity 
        out = self.dropout3(out)

        out = self.l4(out)
        return out

    def optimize(self, pruning_amount=0.3):
        for _, module in self.named_modules():
            if isinstance(module, nn.Linear) and hasattr(module, 'weight_orig'):
                prune.remove(module, 'weight')
        for _, module in self.named_modules():
            if isinstance(module, nn.Linear):
                prune.l1_unstructured(module, name='weight', amount=pruning_amount)
        half_model = self.half()
        try:
            scripted_model = torch.jit.script(half_model)
            return scripted_model
        except Exception as e:
            print(f"Error during scripting: {e}")
            return half_model
    
    def __getstate__(self):
        state = {
            'input_size': self.input_size,
            'hidden_size': self.hidden_size,
            'num_classes': self.num_classes,
            'dropout_rate': self.dropout_rate,
            'state_dict': self.state_dict()
        }
        return state

    def __setstate__(self, state):
        self.__init__(state['input_size'], state['hidden_size'], state['num_classes'], state['dropout_rate'])
        self.load_state_dict(state['state_dict'])

class DialogueDataset(Dataset):
    def __init__(self, X_train, Y_train):
        self.n_samples = len(X_train)
        self.x_data = torch.FloatTensor(np.array(X_train))
        self.y_data = torch.LongTensor(np.array(Y_train))

            
        if isinstance(Y_train, np.ndarray):
            self.y_data = torch.LongTensor(Y_train)
        else:
            self.y_data = torch.LongTensor(np.array(Y_train))
            
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
        
    def __len__(self):
        return self.n_samples

In [None]:
class chatbottraining:
    def __init__(
        self,         
        path: str,
        training_data: dict,      
        hidden_size: int = 64,
        batch_size: int = 32,
        dropout_rate: float = 0.2,
        learning_rate: float = 3e-4,
        num_epochs: int = 200,
        validation_split: float = 0.2, 
        patience: int = 10,
        use_cuda: bool = True
    ) -> None:
        
        self.intents_data = training_data
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs    
        self.validation_split = validation_split
        self.patience = patience
        self.use_cuda = use_cuda and torch.cuda.is_available()
        X_train, Y_train = self._prepare_dataset()
        self.input_size = len(X_train[0])
        self.output_size = len(self.tags)
        dataset_size = len(X_train)
        indices = list(range(dataset_size))
        val_split = int(np.floor(self.validation_split * dataset_size))
        np.random.seed(42)
        np.random.shuffle(indices)
        
        train_indices, val_indices = indices[val_split:], indices[:val_split]
        X_train_split = [X_train[i] for i in train_indices]
        Y_train_split = [Y_train[i] for i in train_indices]
        X_val = [X_train[i] for i in val_indices]
        Y_val = [Y_train[i] for i in val_indices]
        
        train_dataset = DialogueDataset(X_train_split, Y_train_split)
        val_dataset = DialogueDataset(X_val, Y_val)
        num_workers = 0 

        self.train_loader = DataLoader(
            dataset=train_dataset, 
            batch_size=self.batch_size, 
            shuffle=True,
            num_workers=num_workers,
            pin_memory=self.use_cuda
        )
        
        self.val_loader = DataLoader(
            dataset=val_dataset, 
            batch_size=self.batch_size, 
            shuffle=False,
            num_workers=num_workers,
            pin_memory=self.use_cuda
        )
        
        self._training(path)

    def _prepare_dataset(self):
        self.all_words, self.tags, xy = [], [], []
        ignore_words = ['?', '!', '.', ',']
        
        patterns_list = []
        tags_list = []

        total_steps = 100
        with tqdm(total=total_steps, desc="Preparing the training dataset") as main_bar:
            with tqdm(total=len(self.intents_data['intents']), desc="Tokenizing patterns in parallel", leave=False) as sub_bar1:
                for intent in self.intents_data['intents']:
                    if "tag" in intent and "patterns" in intent:
                        tag = intent['tag']
                        self.tags.append(tag)
                        for pattern in intent['patterns']:
                            patterns_list.append(pattern.lower())
                            tags_list.append(tag)
                        
                        all_tokenized_patterns = tokenize_parallel(patterns_list)
                        for i, tokenized_pattern in enumerate(all_tokenized_patterns):
                            self.all_words.extend(tokenized_pattern)
                            xy.append((tokenized_pattern, tags_list[i]))
                        sub_bar1.update(1)
                main_bar.update(25) 
                    
            with tqdm(total=100, desc="Processing word stems", leave=False) as sub_bar2:           
                filtered_words = [word for word in self.all_words if word not in ignore_words]
                sub_bar2.update(25)
                
                stemmed_words = batch_process_stems(filtered_words)
                sub_bar2.update(50)
                
                self.all_words = sorted(set(stemmed_words))
                sub_bar2.update(25)
                main_bar.update(25)

            with tqdm(total=len(xy), desc="Augmenting dataset", leave=False) as sub_bar3:
                augmented_xy = []
                for pattern_words, tag in xy:
                    augmented_xy.append((pattern_words, tag))
                    if len(pattern_words) > 3: 
                        dropout_words = pattern_words.copy()
                        drop_idx = np.random.randint(0, len(pattern_words))
                        dropout_words.pop(drop_idx)
                        augmented_xy.append((dropout_words, tag))
                    if len(pattern_words) > 3: 
                        shuffled_words = pattern_words.copy()
                        if len(shuffled_words) > 1:
                            i, j = np.random.choice(range(len(shuffled_words)), 2, replace=False)
                            shuffled_words[i], shuffled_words[j] = shuffled_words[j], shuffled_words[i]
                        augmented_xy.append((shuffled_words, tag))
                    sub_bar3.update(1)
                main_bar.update(25)
                    
            with tqdm(total=len(xy), desc="Creating bag-of-words vectors", leave=False) as sub_bar4:
                xy = augmented_xy
                X_train = []
                Y_train = []
                batch_size = 100 
                for i in range(0, len(xy), batch_size):
                    batch_xy = xy[i:i+batch_size]
                    batch_sentences = [pattern for pattern, _ in batch_xy]
                    batch_tags = [tag for _, tag in batch_xy]
                    batch_bow = create_bow_matrix(batch_sentences, self.all_words)
                    for j, tag in enumerate(batch_tags):
                        X_train.append(batch_bow[j])
                        label = self.tags.index(tag)
                        Y_train.append(label)
                    sub_bar4.update(len(batch_xy))
                main_bar.update(25)
        return X_train, Y_train

    def _training(self, path: str) -> None:
        device = torch.device('cuda' if self.use_cuda else 'cpu')
        model = CustomNN(
            self.input_size, 
            self.hidden_size, 
            self.output_size, 
            self.dropout_rate
        ).to(device)
        
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.AdamW(
            model.parameters(), 
            lr=self.learning_rate,
            weight_decay=1e-4  
        )
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, 
            mode='min', 
            factor=0.5, 
            patience=5
        )
        scaler = torch.amp.GradScaler('cuda') if self.use_cuda else None
        best_val_loss = float('inf')
        early_stop_counter = 0
        best_model_state = None

        device_info = {}
        if self.use_cuda:
            current_device = torch.cuda.current_device()
            device_info['Device'] = f"GPU: {torch.cuda.get_device_name(current_device)}"
            device_info['VRAM'] = f"{torch.cuda.get_device_properties(current_device).total_memory / 1e9:.2f} GB"
        else:
            device_info['Device'] = "CPU"

        with tqdm(total=self.num_epochs, desc="Training") as epoch_bar:
            try:
                for epoch in range(self.num_epochs):
                    model.train()
                    epoch_loss = 0.0
                    batch_count = 0
                    correct = 0
                    total = 0
                    train_loader_iter = tqdm(self.train_loader, desc=f"Epoch {epoch+1}/{self.num_epochs} [Train]", leave=False)
                    
                    for (words, labels) in train_loader_iter:
                        words = words.to(device, dtype=torch.float)
                        labels = labels.to(device, dtype=torch.long)
                        
                        if self.use_cuda:
                            with torch.amp.autocast('cuda'):
                                outputs = model(words)
                                loss = criterion(outputs, labels)
                        
                            optimizer.zero_grad()
                            scaler.scale(loss).backward()
                            scaler.unscale_(optimizer)
                            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                            
                            scaler.step(optimizer)
                            scaler.update()
                        else:
                            outputs = model(words)
                            loss = criterion(outputs, labels)
                            
                            optimizer.zero_grad()
                            loss.backward()
                            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                            optimizer.step()
                        epoch_loss += loss.item()
                        batch_count += 1
                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()
                        train_acc = correct / total if total > 0 else 0
                        train_loader_iter.set_postfix({
                            'loss': f"{loss.item():.4f}",
                            'acc': f"{train_acc:.4f}"
                        })
                    
                    train_loss = epoch_loss / batch_count if batch_count > 0 else 0
                    train_acc = correct / total if total > 0 else 0
                    model.eval()
                    val_loss = 0.0
                    val_batch_count = 0
                    val_correct = 0
                    val_total = 0
                    
                    val_loader_iter = tqdm(self.val_loader, desc=f"Epoch {epoch+1}/{self.num_epochs} [Val]", leave=False)
                    
                    with torch.no_grad():
                        for (words, labels) in val_loader_iter:
                            words = words.to(device, dtype=torch.float) 
                            labels = labels.to(device, dtype=torch.long)
                            
                            if self.use_cuda:
                                with torch.amp.autocast('cuda'):
                                    outputs = model(words)
                                    loss = criterion(outputs, labels)
                            else:
                                outputs = model(words)
                                loss = criterion(outputs, labels)
                            
                            val_loss += loss.item()
                            val_batch_count += 1
                            _, predicted = torch.max(outputs.data, 1)
                            val_total += labels.size(0)
                            val_correct += (predicted == labels).sum().item()
                            val_acc = val_correct / val_total if val_total > 0 else 0
                            val_loader_iter.set_postfix({
                                'loss': f"{loss.item():.4f}",
                                'acc': f"{val_acc:.4f}"
                            })
                    
                    avg_val_loss = val_loss / val_batch_count if val_batch_count > 0 else 0
                    val_acc = val_correct / val_total if val_total > 0 else 0
                    scheduler.step(avg_val_loss)

                    epoch_bar.update(1)
                    epoch_bar.set_description(f"Epoch {epoch+1}/{self.num_epochs}")
                    epoch_bar.set_postfix({
                        **device_info,
                        'Train Loss': f"{train_loss:.4f}",
                        'Train Acc': f"{train_acc:.4f}",
                        'Val Loss': f"{avg_val_loss:.4f}",
                        'Val Acc': f"{val_acc:.4f}"
                    })

                    if early_stop_counter >= self.patience:                              
                        print(f"Early stopping at epoch {epoch+1} with validation loss {best_val_loss:.4f}")
                        break

                    if train_loss < 0.01 and avg_val_loss < 0.1:
                        print(f"Reached desired performance at epoch {epoch+1}")
                        break
                    
                    if avg_val_loss < best_val_loss:
                        best_val_loss = avg_val_loss
                        early_stop_counter = 0
                        best_model_state = model.state_dict().copy()
                    else:
                        early_stop_counter += 1
                
                if best_model_state:
                    model.load_state_dict(best_model_state)
                    
                print(f'Final validation loss: {best_val_loss:.4f}')
                checkpoint = {
                    "model_state": model.state_dict(),
                    "input_size": self.input_size,
                    "hidden_size": self.hidden_size,
                    "output_size": self.output_size,
                    "all_words": self.all_words,
                    "tags": self.tags
                }
                torch.save(checkpoint, path)
                print(f"Model saved to {path}")
            
            except RuntimeError as e:
                print(f"RuntimeError occurred: {str(e)}")
                print(f"Current device: {device}")
                if self.use_cuda:
                    print(f"CUDA available: {torch.cuda.is_available()}")
                    print(f"Current CUDA device: {torch.cuda.current_device()}")
                if self.use_cuda:
                    print("Attempting to fall back to CPU...")
                    self.use_cuda = False
                    self._training(path)
                else:
                    raise

In [None]:
class chatbot():
    hidden_size: int = 64 
    batch_size: int = 32 
    dropout_rate: float = 0.2 
    learning_rate: float = 3e-4  
    num_epochs: int = 200      
    validation_split: float = 0.2
    patience: int = 10
    use_cuda: bool = True

    def __init__(
        self,
        path: str,
        training_data: dict, 
        train: bool = False, 
        fine_tune: bool = False,
        hidden_size: int = None,
        batch_size: int = None,
        dropout_rate: float = None,
        learning_rate: float = None,
        num_epochs: int = None,
        validation_split: float = None,
        patience: int = None,
        use_cuda: bool = True
        ) -> None:
        
        self.hidden_size = hidden_size if hidden_size is not None else chatbot.hidden_size
        self.batch_size = batch_size if batch_size is not None else chatbot.batch_size
        self.dropout_rate = dropout_rate if dropout_rate is not None else chatbot.dropout_rate
        self.learning_rate = learning_rate if learning_rate is not None else chatbot.learning_rate
        self.num_epochs = num_epochs if num_epochs is not None else chatbot.num_epochs
        self.validation_split = validation_split if validation_split is not None else chatbot.validation_split
        self.patience = patience if patience is not None else chatbot.patience
        self.use_cuda = use_cuda if use_cuda is not None else chatbot.use_cuda
        
        if train == True and training_data and path:
            _ = chatbottraining(
                path=path,
                training_data=training_data,
                hidden_size=self.hidden_size,
                batch_size=self.batch_size,
                dropout_rate=self.dropout_rate,
                learning_rate=self.learning_rate,
                num_epochs=self.num_epochs,
                validation_split=0.2,
                patience=15 
            )    
        
        if fine_tune and training_data and path:
            self._fine_tune_model(path, training_data)
            
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.intents_data = training_data
        self.responses_dict = {
            intent["tag"]: intent["responses"]
            for intent in self.intents_data["intents"]
            if "tag" in intent and "responses" in intent
        }
        self._load_model(path = path)

    def _fine_tune_model(self, path, training_data):
        checkpoint = torch.load(path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        _ = chatbottraining(
            path=path,
            training_data=training_data,
            hidden_size=checkpoint["hidden_size"],
            batch_size=self.batch_size,
            dropout_rate=self.dropout_rate,
            learning_rate=self.learning_rate / 10, 
            num_epochs=self.num_epochs // 2,
            validation_split=self.validation_split,
            patience=self.patience,
            use_cuda=self.use_cuda
        )

    def _load_model(self, path: str) -> None:
        """Load the trained model with error handling."""
        try:
            checkpoint = torch.load(path, map_location=self.device)
            self.model = CustomNN(
                checkpoint["input_size"], 
                checkpoint["hidden_size"], 
                checkpoint["output_size"],
                dropout_rate=self.dropout_rate
            ).to(self.device)
            self.model.load_state_dict(checkpoint["model_state"])
            self.model.eval()
            self.all_words = checkpoint["all_words"]
            self.tags = checkpoint["tags"]
        except Exception as e:
            print(f"Error loading model: {e}")
            raise RuntimeError(f"Failed to load model from {path}: {str(e)}")
            
    def predict_intent(self, query: str = None) -> str:
        if not query:
            return "unknown"
            
        words = tokenize(query)
        stemmed_words = [stem(word) for word in words]
        bag = bag_of_words(stemmed_words, self.all_words)
        bag = torch.from_numpy(bag).float().to(self.device).unsqueeze(0)
        
        with torch.no_grad():
            output = self.model(bag)
            probs = torch.softmax(output, dim=1)
            _, predicted = torch.max(output, dim=1)
            
        tag_idx = predicted.item()
        prob = probs[0][tag_idx].item()
        top_probs, _ = torch.topk(probs, 2, dim=1)
        top_prob_diff = top_probs[0][0].item() - top_probs[0][1].item()
        confidence_threshold = 0.6 if top_prob_diff > 0.3 else 0.75
        
        if prob > confidence_threshold:
            return self.tags[tag_idx]
        return "unknown"

    def get_response(self, tag: str):
        """Get response for the predicted intent."""
        if tag in self.responses_dict:
            responses = self.responses_dict[tag]
            # Select response with weighting toward more specific responses
            if len(responses) > 1:
                # Calculate response length as a rough approximation of specificity
                response_weights = [len(r) for r in responses]
                total_weight = sum(response_weights)
                probabilities = [w/total_weight for w in response_weights]
                return np.random.choice(responses, p=probabilities)
            return random.choice(responses)
        else:
            return "I'm sorry, I don't understand that."
        
def reply(query: str, path: str, training_data: dict) -> str:        
    chatbot_name = "Astorine"
    chat = chatbot(
        path = path,
        training_data = training_data
    )
    intent = chat.predict_intent(query)
    response = chat.get_response(intent)
    return f"{chatbot_name}: {response}"

In [None]:
import os
from astorine import reply, chatbot
from nlp.helper.ibuilder import igenerate_lite
from nlp.extractor import extract
from handlers.rcm import searching
from IPython.display import clear_output  
from decimal import Decimal
from pathlib import Path

In [None]:
model_dir = os.path.join(os.getcwd(), "models\\chatbotmodel.pth" ) 
intents_dir = os.path.join(os.getcwd(), "intents\\intents_lite.json" )
data =  igenerate_lite(save=True, save_dir=intents_dir)

In [None]:
sessions = {}

required_fields = [
    "brand", "gpu", "cpu", "ram", "resolution", "refresh rate", 
    "display type", "screen size", "use_for", "price"
]

class ChatbotSession:
    def __init__(self):
        self.current_flow = "Nothing"  # "guided", "search", hoặc "faq"
        self.context = None            # Ví dụ: "price" khi hỏi về giá
        self.criteria = {              # Lưu trữ tiêu chí
            "brand": None,
            "gpu": None,
            "cpu": None,
            "ram": None,
            "resolution": None,
            "refresh rate": None,
            "display type": None,
            "screen size": None,
            "use_for": None,
            "price": {'min': Decimal('0'), 'max': Decimal('0')}
        }
        self.previous_flow = None   

def get_session(user_id):
    if user_id not in sessions:
        sessions[user_id] = ChatbotSession()
    return sessions[user_id]

In [None]:
def update_session(session: ChatbotSession, user_input: str):
    extracted = extract(user_input)
    for key, value in extracted.items():
        if value is not None and (key != "price" or session.context == "price" or session.current_flow == "search"):
            session.criteria[key] = value

    tag_response = reply(user_input)
    tag = tag_response.get("tag")
    response = tag_response.get("response")

    if tag == "help":
        session.current_flow = "guided"
        session.context = None
        return response

    collected_criteria = sum(
        1
        for key, value in session.criteria.items()
        if key not in ["price", "use_for"] and value is not None and value != ""
    )

    if collected_criteria >= 3:
        session.current_flow = "search"
        if (session.criteria.get("price") == {'min': Decimal('0'), 'max': Decimal('0')} or tag == "use_for"):
            session.context = "price"
            return "Please specify your price range for the laptop."
        else:
            criteria_string = " | ".join(str(session.criteria.get(field) or "") for field in required_fields)
            results = searching(criteria_string)
            if results:
                formatted_results = "\n".join([f"{i+1}. {laptop}" for i, laptop in enumerate(results)])
                return f"Here are the laptops I found:\n{formatted_results}\nI think they are good for you."
            else:
                return "I couldn't find any laptops matching your criteria."

    if session.context == "price":
        if extracted.get("price") is not None and extracted["price"] != {'min': Decimal('0'), 'max': Decimal('0')}:
            session.criteria["price"] = extracted["price"]
            criteria_string = " | ".join(str(session.criteria.get(field) or "") for field in required_fields)
            results = searching(criteria_string)
            session.current_flow = "search"
            if results:
                formatted_results = "\n".join([f"{i+1}. {laptop}" for i, laptop in enumerate(results)])
                return f"Here are the laptops I found:\n{formatted_results}\nI think they are good for you."
            else:
                return "I couldn't find any laptops matching your criteria."
        else:
            return "Please provide your desired price range."

    faq_tags = ["gpu_question", "cpu_question", "ram_question"]
    if session.current_flow == "guided" and tag in faq_tags:
        faq_response = response
        resume_message = f"{faq_response} So, what {tag.split('_')[0]} do you prefer?"
        return resume_message

    if tag == "use_for":
        session.criteria["use_for"] = extracted.get("use_for", user_input)
        session.context = "price"
        return "Please specify your price range for the laptop."

    if session.current_flow == "guided":
        for field in required_fields:
            if field not in session.criteria or session.criteria[field] is None or session.criteria[field] == "":
                if field == "use_for":
                    return "What will you use the laptop for?"
                elif field == "price":
                    return "Please specify your price range for the laptop."
                else:
                    return f"Please tell me your preferred {field}."

        criteria_string = " | ".join(str(session.criteria.get(field) or "") for field in required_fields)
        results = searching(criteria_string)
        session.current_flow = "search"
        if results:
            formatted_results = "\n".join([f"{i+1}. {laptop}" for i, laptop in enumerate(results)])
            return f"Here are the laptops I found:\n{formatted_results}\nI think they are good for you."
        else:
            return "I couldn't find any laptops matching your criteria."

    return response

def chatbot_handle(user_id: str, user_input: str):
    session = get_session(user_id)
    response = update_session(session, user_input)
    return response

110

In [None]:
user_id = "userdeptraisiucapvutru"

print(chatbot_handle(user_id, "recommend me a laptop have rtx 4060, intel core i9 12th, 32gb ram, 17 inch display"))
print(chatbot_handle(user_id, "price range is $1000 to $2000"))