In [86]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from typing import List, Tuple
import time
import json
import os


class OperatorAssistant:
    def __init__(self, model_name: str = './paraphrase-multilingual-MiniLM-L12-v2'):
        """
        –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –ø–æ–º–æ—â–Ω–∏–∫–∞ –æ–ø–µ—Ä–∞—Ç–æ—Ä–∞ —Å –º–æ–¥–µ–ª—å—é —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤
        """
        self.model = SentenceTransformer(model_name)
        self.knowledge_base = []
        self.embeddings = None
        self.index = None
        self.dimension = 0

    def initialize_knowledge_base(self, phrases: List[str]) -> None:
        """
        –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –±–∞–∑—ã –∑–Ω–∞–Ω–∏–π –∏ —Å–æ–∑–¥–∞–Ω–∏–µ FAISS –∏–Ω–¥–µ–∫—Å–∞
        """
        self.knowledge_base = phrases
        if not phrases:
            raise ValueError("–ë–∞–∑–∞ –∑–Ω–∞–Ω–∏–π –Ω–µ –º–æ–∂–µ—Ç –±—ã—Ç—å –ø—É—Å—Ç–æ–π")

        # –ö–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–µ —Ñ—Ä–∞–∑ –≤ —ç–º–±–µ–¥–¥–∏–Ω–≥–∏
        start_time = time.time()
        self.embeddings = self.model.encode(phrases, show_progress_bar=True)
        self.embeddings = np.array(self.embeddings)
        self.dimension = self.embeddings.shape[1]

        # –°–æ–∑–¥–∞–Ω–∏–µ –∏ –∑–∞–ø–æ–ª–Ω–µ–Ω–∏–µ –∏–Ω–¥–µ–∫—Å–∞ FAISS
        self.index = faiss.IndexFlatL2(self.dimension)
        self.index.add(self.embeddings)

        print(f"‚úÖ –ë–∞–∑–∞ –∑–Ω–∞–Ω–∏–π –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–∞: {len(phrases)} —Ñ—Ä–∞–∑, "
              f"—Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å {self.dimension}, –≤—Ä–µ–º—è –æ–±—Ä–∞–±–æ—Ç–∫–∏: {time.time() - start_time:.2f} —Å–µ–∫")

    def find_similar_responses(self, query: str, top_k: int = 3) -> Tuple[List[str], List[float]]:
        """
        –ü–æ–∏—Å–∫ –Ω–∞–∏–±–æ–ª–µ–µ –ø–æ–¥—Ö–æ–¥—è—â–∏—Ö –æ—Ç–≤–µ—Ç–æ–≤ –Ω–∞ –∑–∞–ø—Ä–æ—Å –∫–ª–∏–µ–Ω—Ç–∞
        –í–æ–∑–≤—Ä–∞—â–∞–µ—Ç —Å–ø–∏—Å–æ–∫ —Ñ—Ä–∞–∑ –∏ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É—é—â–∏–µ scores (—Å—Ö–æ–∂–µ—Å—Ç–∏)
        """
        if self.index is None or self.embeddings is None:
            raise RuntimeError("–ë–∞–∑–∞ –∑–Ω–∞–Ω–∏–π –Ω–µ –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–∞")

        if not query.strip():
            raise ValueError("–ó–∞–ø—Ä–æ—Å –Ω–µ –º–æ–∂–µ—Ç –±—ã—Ç—å –ø—É—Å—Ç—ã–º")

        # –ö–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–µ –∑–∞–ø—Ä–æ—Å–∞
        query_embedding = self.model.encode([query])
        query_embedding = np.array(query_embedding)

        # –ü–æ–∏—Å–∫ –≤ –∏–Ω–¥–µ–∫—Å–µ
        distances, indices = self.index.search(query_embedding, top_k)

        # –ü—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ —Ä–∞—Å—Å—Ç–æ—è–Ω–∏–π –≤ —Å—Ö–æ–∂–µ—Å—Ç—å
        responses = [self.knowledge_base[i] for i in indices[0]]
        similarities = [1 - (dist / (1 + dist)) for dist in distances[0]]

        return responses, similarities

    def add_to_knowledge_base(self, new_phrases: List[str]) -> None:
        """
        –î–æ–±–∞–≤–ª–µ–Ω–∏–µ –Ω–æ–≤—ã—Ö —Ñ—Ä–∞–∑ –≤ –±–∞–∑—É –∑–Ω–∞–Ω–∏–π —Å –æ–±–Ω–æ–≤–ª–µ–Ω–∏–µ–º –∏–Ω–¥–µ–∫—Å–∞
        """
        if not new_phrases:
            return

        # –ö–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–µ –Ω–æ–≤—ã—Ö —Ñ—Ä–∞–∑
        new_embeddings = self.model.encode(new_phrases)
        new_embeddings = np.array(new_embeddings)

        # –ü—Ä–æ–≤–µ—Ä–∫–∞ —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç–∏
        if self.index is None:
            self.dimension = new_embeddings.shape[1]
            self.index = faiss.IndexFlatL2(self.dimension)
            self.embeddings = new_embeddings
        else:
            if new_embeddings.shape[1] != self.dimension:
                raise ValueError("–†–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å –Ω–æ–≤—ã—Ö —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤ –Ω–µ —Å–æ–≤–ø–∞–¥–∞–µ—Ç —Å —Å—É—â–µ—Å—Ç–≤—É—é—â–µ–π")
            self.embeddings = np.vstack([self.embeddings, new_embeddings])
            self.index.add(new_embeddings)

        self.knowledge_base.extend(new_phrases)
        print(f"‚úÖ –î–æ–±–∞–≤–ª–µ–Ω–æ {len(new_phrases)} –Ω–æ–≤—ã—Ö —Ñ—Ä–∞–∑ –≤ –±–∞–∑—É –∑–Ω–∞–Ω–∏–π")

    def save_knowledge_base(self, save_dir: str = "knowledge_base") -> None:
        """
        –°–æ—Ö—Ä–∞–Ω—è–µ—Ç –±–∞–∑—É –∑–Ω–∞–Ω–∏–π, —ç–º–±–µ–¥–¥–∏–Ω–≥–∏ –∏ –∏–Ω–¥–µ–∫—Å –≤ —É–∫–∞–∑–∞–Ω–Ω—É—é –¥–∏—Ä–µ–∫—Ç–æ—Ä–∏—é
        """
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        # –°–æ—Ö—Ä–∞–Ω—è–µ–º —Ñ—Ä–∞–∑—ã
        with open(os.path.join(save_dir, "phrases.json"), "w", encoding="utf-8") as f:
            json.dump(self.knowledge_base, f, ensure_ascii=False, indent=2)

        # –°–æ—Ö—Ä–∞–Ω—è–µ–º —ç–º–±–µ–¥–¥–∏–Ω–≥–∏
        if self.embeddings is not None:
            np.save(os.path.join(save_dir, "embeddings.npy"), self.embeddings)

        # –°–æ—Ö—Ä–∞–Ω—è–µ–º FAISS –∏–Ω–¥–µ–∫—Å
        if self.index is not None:
            faiss.write_index(self.index, os.path.join(save_dir, "index.faiss"))

        print(f"‚úÖ –ë–∞–∑–∞ –∑–Ω–∞–Ω–∏–π —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞ –≤ –¥–∏—Ä–µ–∫—Ç–æ—Ä–∏—é: {save_dir}")

    def load_knowledge_base(self, save_dir: str = "knowledge_base") -> bool:
        """
        –ó–∞–≥—Ä—É–∂–∞–µ—Ç –±–∞–∑—É –∑–Ω–∞–Ω–∏–π, —ç–º–±–µ–¥–¥–∏–Ω–≥–∏ –∏ –∏–Ω–¥–µ–∫—Å –∏–∑ —É–∫–∞–∑–∞–Ω–Ω–æ–π –¥–∏—Ä–µ–∫—Ç–æ—Ä–∏–∏
        """
        phrases_path = os.path.join(save_dir, "phrases.json")
        embeddings_path = os.path.join(save_dir, "embeddings.npy")
        index_path = os.path.join(save_dir, "index.faiss")

        if not os.path.exists(phrases_path) or not os.path.exists(embeddings_path) or not os.path.exists(index_path):
            print(f"‚ö† –§–∞–π–ª—ã –±–∞–∑—ã –∑–Ω–∞–Ω–∏–π –Ω–µ –Ω–∞–π–¥–µ–Ω—ã –≤ {save_dir}")
            return False

        try:
            # –ó–∞–≥—Ä—É–∂–∞–µ–º —Ñ—Ä–∞–∑—ã
            with open(phrases_path, "r", encoding="utf-8") as f:
                self.knowledge_base = json.load(f)

            # –ó–∞–≥—Ä—É–∂–∞–µ–º —ç–º–±–µ–¥–¥–∏–Ω–≥–∏
            self.embeddings = np.load(embeddings_path)
            self.dimension = self.embeddings.shape[1]

            # –ó–∞–≥—Ä—É–∂–∞–µ–º –∏–Ω–¥–µ–∫—Å
            self.index = faiss.read_index(index_path)

            print(f"‚úÖ –ë–∞–∑–∞ –∑–Ω–∞–Ω–∏–π –∑–∞–≥—Ä—É–∂–µ–Ω–∞: {len(self.knowledge_base)} —Ñ—Ä–∞–∑, "
                  f"—Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å {self.dimension}")
            return True
        except Exception as e:
            print(f"‚ö† –û—à–∏–±–∫–∞ –ø—Ä–∏ –∑–∞–≥—Ä—É–∑–∫–µ –±–∞–∑—ã –∑–Ω–∞–Ω–∏–π: {str(e)}")
            return False

        
        
# –ü—Ä–∏–º–µ—Ä –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è
if __name__ == "__main__":
    # –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –±–∞–∑—ã –∑–Ω–∞–Ω–∏–π
    knowledge_base = [
    # –ü—Ä–∏–≤–µ—Ç—Å—Ç–≤–∏—è –∏ —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω—ã–µ —Ñ—Ä–∞–∑—ã
    "–î–æ–±—Ä—ã–π –¥–µ–Ω—å! –ß–µ–º –º–æ–≥—É –ø–æ–º–æ—á—å?",
    "–ó–¥—Ä–∞–≤—Å—Ç–≤—É–π—Ç–µ! –°–ª—É—à–∞—é –≤–∞—Å.",
    "–†–∞–¥—ã –≤–∞—Å —Å–ª—ã—à–∞—Ç—å! –ö–∞–∫–æ–π —É –≤–∞—Å –≤–æ–ø—Ä–æ—Å?",
    "–ü—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ–º –≤ –Ω–∞—à–µ–º —Å–µ—Ä–≤–∏—Å–µ! –ß–µ–º –º–æ–≥—É –±—ã—Ç—å –ø–æ–ª–µ–∑–µ–Ω?",
    
    # –£—Ç–æ—á–Ω—è—é—â–∏–µ –≤–æ–ø—Ä–æ—Å—ã
    "–ü–æ–∂–∞–ª—É–π—Å—Ç–∞, —É—Ç–æ—á–Ω–∏—Ç–µ –Ω–æ–º–µ—Ä –¥–æ–≥–æ–≤–æ—Ä–∞.",
    "–î–ª—è –ø–æ–º–æ—â–∏ –º–Ω–µ –Ω—É–∂–Ω–æ —É–∑–Ω–∞—Ç—å –≤–∞—à –∏–¥–µ–Ω—Ç–∏—Ñ–∏–∫–∞—Ç–æ—Ä –∫–ª–∏–µ–Ω—Ç–∞.",
    "–ù–µ –º–æ–≥–ª–∏ –±—ã –≤—ã —É—Ç–æ—á–Ω–∏—Ç—å –¥–µ—Ç–∞–ª–∏ –≤–∞—à–µ–≥–æ –∑–∞–ø—Ä–æ—Å–∞?",
    "–ö–∞–∫–æ–π –∏–º–µ–Ω–Ω–æ —Å–µ—Ä–≤–∏—Å –≤–∞—Å –∏–Ω—Ç–µ—Ä–µ—Å—É–µ—Ç?",
    "–í—ã –æ–±—Ä–∞—â–∞–µ—Ç–µ—Å—å –ø–æ –≤–æ–ø—Ä–æ—Å—É –∫—Ä–µ–¥–∏—Ç–∞, –¥–µ–ø–æ–∑–∏—Ç–∞ –∏–ª–∏ –¥—Ä—É–≥–æ–≥–æ –ø—Ä–æ–¥—É–∫—Ç–∞?",
    
    # –û–ø–ª–∞—Ç–∞ –∏ –ø–ª–∞—Ç–µ–∂–∏
    "–í—ã –º–æ–∂–µ—Ç–µ –æ–ø–ª–∞—Ç–∏—Ç—å —á–µ—Ä–µ–∑ –º–æ–±–∏–ª—å–Ω–æ–µ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–µ –∏–ª–∏ —Ç–µ—Ä–º–∏–Ω–∞–ª.",
    "–î–ª—è –æ–ø–ª–∞—Ç—ã –¥–æ—Å—Ç—É–ø–Ω—ã –æ–Ω–ª–∞–π–Ω-–±–∞–Ω–∫–∏–Ω–≥, –º–æ–±–∏–ª—å–Ω–æ–µ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–µ –∏ –ø–ª–∞—Ç–µ–∂–Ω—ã–µ —Ç–µ—Ä–º–∏–Ω–∞–ª—ã.",
    "–û–ø–ª–∞—Ç–∏—Ç—å –º–æ–∂–Ω–æ —á–µ—Ä–µ–∑ –ª–∏—á–Ω—ã–π –∫–∞–±–∏–Ω–µ—Ç –Ω–∞ –Ω–∞—à–µ–º —Å–∞–π—Ç–µ.",
    "–†–µ–∫–≤–∏–∑–∏—Ç—ã –¥–ª—è –æ–ø–ª–∞—Ç—ã –±—É–¥—É—Ç –æ—Ç–ø—Ä–∞–≤–ª–µ–Ω—ã –≤–∞–º –≤ SMS.",
    "–ê–≤—Ç–æ–ø–ª–∞—Ç–µ–∂ –º–æ–∂–Ω–æ –Ω–∞—Å—Ç—Ä–æ–∏—Ç—å –≤ –ª–∏—á–Ω–æ–º –∫–∞–±–∏–Ω–µ—Ç–µ.",
    "–ü—Ä–∏ –æ–ø–ª–∞—Ç–µ —á–µ—Ä–µ–∑ —Ç–µ—Ä–º–∏–Ω–∞–ª –∫–æ–º–∏—Å—Å–∏—è –Ω–µ –≤–∑–∏–º–∞–µ—Ç—Å—è.",
    
    # –ö—Ä–µ–¥–∏—Ç—ã –∏ –∑–∞–¥–æ–ª–∂–µ–Ω–Ω–æ—Å—Ç–∏
    "–ú—ã –º–æ–∂–µ–º –ø—Ä–µ–¥–ª–æ–∂–∏—Ç—å –≤–∞–º —Ä–µ—Å—Ç—Ä—É–∫—Ç—É—Ä–∏–∑–∞—Ü–∏—é –∑–∞–¥–æ–ª–∂–µ–Ω–Ω–æ—Å—Ç–∏.",
    "–ò–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –æ —Ç–µ–∫—É—â–µ–π –∑–∞–¥–æ–ª–∂–µ–Ω–Ω–æ—Å—Ç–∏ –º–æ–∂–Ω–æ —É–∑–Ω–∞—Ç—å –≤ –ª–∏—á–Ω–æ–º –∫–∞–±–∏–Ω–µ—Ç–µ.",
    "–ì—Ä–∞—Ñ–∏–∫ –ø–ª–∞—Ç–µ–∂–µ–π –±—ã–ª –æ—Ç–ø—Ä–∞–≤–ª–µ–Ω –≤–∞–º –Ω–∞ email –ø—Ä–∏ –æ—Ñ–æ—Ä–º–ª–µ–Ω–∏–∏ –∫—Ä–µ–¥–∏—Ç–∞.",
    "–í—ã –º–æ–∂–µ—Ç–µ –ø–æ–¥–∞—Ç—å –∑–∞—è–≤–∫—É –Ω–∞ —É–≤–µ–ª–∏—á–µ–Ω–∏–µ –∫—Ä–µ–¥–∏—Ç–Ω–æ–≥–æ –ª–∏–º–∏—Ç–∞.",
    "–î–æ—Å—Ä–æ—á–Ω–æ–µ –ø–æ–≥–∞—à–µ–Ω–∏–µ –∫—Ä–µ–¥–∏—Ç–∞ –¥–æ—Å—Ç—É–ø–Ω–æ –±–µ–∑ –∫–æ–º–∏—Å—Å–∏–π.",
    
    # –ë–µ–∑–æ–ø–∞—Å–Ω–æ—Å—Ç—å –∏ –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏—è
    "–î–ª—è –±–µ–∑–æ–ø–∞—Å–Ω–æ—Å—Ç–∏ –º–Ω–µ –Ω—É–∂–Ω–æ –ø—Ä–æ–≤–µ—Å—Ç–∏ –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏—é.",
    "–û–∂–∏–¥–∞–π—Ç–µ –°–ú–° —Å –∫–æ–¥–æ–º –ø–æ–¥—Ç–≤–µ—Ä–∂–¥–µ–Ω–∏—è.",
    "–ö —Å–æ–∂–∞–ª–µ–Ω–∏—é, —è –Ω–µ –º–æ–≥—É —Ä–∞–∑–≥–ª–∞—à–∞—Ç—å —ç—Ç—É –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –±–µ–∑ –≤–µ—Ä–∏—Ñ–∏–∫–∞—Ü–∏–∏.",
    "–ü–æ–∂–∞–ª—É–π—Å—Ç–∞, –Ω–∞–∑–æ–≤–∏—Ç–µ –∫–æ–¥–æ–≤–æ–µ —Å–ª–æ–≤–æ –∏–∑ –¥–æ–≥–æ–≤–æ—Ä–∞.",
    "–î–æ—Å—Ç—É–ø –∫ —ç—Ç–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –≤–æ–∑–º–æ–∂–µ–Ω —Ç–æ–ª—å–∫–æ –ø–æ—Å–ª–µ –ø–æ–¥—Ç–≤–µ—Ä–∂–¥–µ–Ω–∏—è –ª–∏—á–Ω–æ—Å—Ç–∏.",
    
    # –¢–µ—Ö–Ω–∏—á–µ—Å–∫–∏–µ –ø—Ä–æ–±–ª–µ–º—ã
    "–ü—Ä–∏–Ω–æ—Å–∏–º –∏–∑–≤–∏–Ω–µ–Ω–∏—è –∑–∞ —Ç–µ—Ö–Ω–∏—á–µ—Å–∫–∏–µ –Ω–µ–ø–æ–ª–∞–¥–∫–∏. –ú—ã —É–∂–µ —Ä–∞–±–æ—Ç–∞–µ–º –Ω–∞–¥ —Ä–µ—à–µ–Ω–∏–µ–º.",
    "–ü–æ–ø—Ä–æ–±—É–π—Ç–µ –æ–±–Ω–æ–≤–∏—Ç—å —Å—Ç—Ä–∞–Ω–∏—Ü—É –∏–ª–∏ –∑–∞–π—Ç–∏ –ø–æ–∑–∂–µ.",
    "–î–ª—è —Ä–µ—à–µ–Ω–∏—è –ø—Ä–æ–±–ª–µ–º—ã –ø–æ–ø—Ä–æ–±—É–π—Ç–µ –æ—á–∏—Å—Ç–∏—Ç—å –∫—ç—à –±—Ä–∞—É–∑–µ—Ä–∞.",
    "–ù–∞—à–∏ —Ç–µ—Ö–Ω–∏—á–µ—Å–∫–∏–µ —Å–ø–µ—Ü–∏–∞–ª–∏—Å—Ç—ã —É–∂–µ –≤ –∫—É—Ä—Å–µ –ø—Ä–æ–±–ª–µ–º—ã.",
    
    # –ü–µ—Ä–µ–∫–ª—é—á–µ–Ω–∏–µ –Ω–∞ —Å–ø–µ—Ü–∏–∞–ª–∏—Å—Ç–∞
    "–ü–µ—Ä–µ–≤–æ–∂—É –≤–∞—Å –Ω–∞ —Å–ø–µ—Ü–∏–∞–ª–∏—Å—Ç–∞ –ø–æ –¥–∞–Ω–Ω–æ–º—É –≤–æ–ø—Ä–æ—Å—É.",
    "–ú–æ–π –∫–æ–ª–ª–µ–≥–∞ –ª—É—á—à–µ –ø–æ–º–æ–∂–µ—Ç –≤–∞–º —Å —ç—Ç–∏–º –≤–æ–ø—Ä–æ—Å–æ–º.",
    "–°–æ–µ–¥–∏–Ω—è—é —Å –æ—Ç–¥–µ–ª–æ–º –ø–æ —Ä–∞–±–æ—Ç–µ —Å –∫–ª–∏–µ–Ω—Ç–∞–º–∏.",
    "–≠—Ç–æ—Ç –≤–æ–ø—Ä–æ—Å —Ç—Ä–µ–±—É–µ—Ç —É—Ç–æ—á–Ω–µ–Ω–∏—è, –ø–µ—Ä–µ–≤–æ–∂—É –Ω–∞ –ø—Ä–æ—Ñ–∏–ª—å–Ω–æ–≥–æ —Å–ø–µ—Ü–∏–∞–ª–∏—Å—Ç–∞.",
    
    # –ó–∞–≤–µ—Ä—à–µ–Ω–∏–µ —Ä–∞–∑–≥–æ–≤–æ—Ä–∞
    "–°–ø–∞—Å–∏–±–æ –∑–∞ –æ–±—Ä–∞—â–µ–Ω–∏–µ, —Ö–æ—Ä–æ—à–µ–≥–æ –¥–Ω—è!",
    "–ë–ª–∞–≥–æ–¥–∞—Ä–∏–º –∑–∞ –≤–∞—à –∑–≤–æ–Ω–æ–∫!",
    "–ï—Å–ª–∏ —É –≤–∞—Å –±—É–¥—É—Ç –µ—â–µ –≤–æ–ø—Ä–æ—Å—ã - –º—ã –≤—Å–µ–≥–¥–∞ –Ω–∞ —Å–≤—è–∑–∏!",
    "–ñ–µ–ª–∞–µ–º –≤–∞–º –ø—Ä–∏—è—Ç–Ω–æ–≥–æ –¥–Ω—è!",
    "–í—Å–µ–≥–æ –¥–æ–±—Ä–æ–≥–æ!",
    
    # –ò–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –æ –ø—Ä–æ–¥—É–∫—Ç–∞—Ö
    "–£ –Ω–∞—Å –µ—Å—Ç—å —Å–ø–µ—Ü–∏–∞–ª—å–Ω—ã–µ —É—Å–ª–æ–≤–∏—è –¥–ª—è –Ω–æ–≤—ã—Ö –∫–ª–∏–µ–Ω—Ç–æ–≤.",
    "–ü–æ–¥—Ä–æ–±–Ω–µ–µ –æ —Ç–∞—Ä–∏—Ñ–∞—Ö –º–æ–∂–Ω–æ —É–∑–Ω–∞—Ç—å –Ω–∞ –Ω–∞—à–µ–º —Å–∞–π—Ç–µ.",
    "–ê–∫—Ç—É–∞–ª—å–Ω—ã–µ –∞–∫—Ü–∏–∏ –æ—Ç—Ä–∞–∂–µ–Ω—ã –≤ –ª–∏—á–Ω–æ–º –∫–∞–±–∏–Ω–µ—Ç–µ.",
    "–í—ã –º–æ–∂–µ—Ç–µ –æ—Ñ–æ—Ä–º–∏—Ç—å –ø—Ä–æ–¥—É–∫—Ç –æ–Ω–ª–∞–π–Ω –±–µ–∑ –≤–∏–∑–∏—Ç–∞ –≤ –æ—Ñ–∏—Å.",
    
    # –†–∞–±–æ—Ç–∞ —Å –ø—Ä–µ—Ç–µ–Ω–∑–∏—è–º–∏
    "–í–∞—à–∞ –∂–∞–ª–æ–±–∞ –∑–∞—Ä–µ–≥–∏—Å—Ç—Ä–∏—Ä–æ–≤–∞–Ω–∞ –ø–æ–¥ –Ω–æ–º–µ—Ä–æ–º #XXXX.",
    "–ú—ã —Ä–∞—Å—Å–º–æ—Ç—Ä–∏–º –≤–∞—à–µ –æ–±—Ä–∞—â–µ–Ω–∏–µ –≤ —Ç–µ—á–µ–Ω–∏–µ 3 —Ä–∞–±–æ—á–∏—Ö –¥–Ω–µ–π.",
    "–ü—Ä–∏–Ω–æ—Å–∏–º –∏–∑–≤–∏–Ω–µ–Ω–∏—è –∑–∞ –¥–æ—Å—Ç–∞–≤–ª–µ–Ω–Ω—ã–µ –Ω–µ—É–¥–æ–±—Å—Ç–≤–∞.",
    "–í–∞—à–µ –æ–±—Ä–∞—â–µ–Ω–∏–µ –±—É–¥–µ—Ç –ø–µ—Ä–µ–¥–∞–Ω–æ –≤ —Å–ª—É–∂–±—É –∫–æ–Ω—Ç—Ä–æ–ª—è –∫–∞—á–µ—Å—Ç–≤–∞.",
    
    # –ö–∞—Ä—Ç—ã –∏ —Å—á–µ—Ç–∞
    "–ò–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –æ –≤–∞—à–µ–π –∫–∞—Ä—Ç–µ –¥–æ—Å—Ç—É–ø–Ω–∞ –≤ –º–æ–±–∏–ª—å–Ω–æ–º –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–∏.",
    "–í—ã –º–æ–∂–µ—Ç–µ –∑–∞–∫–∞–∑–∞—Ç—å –ø–µ—Ä–µ–≤—ã–ø—É—Å–∫ –∫–∞—Ä—Ç—ã –≤ –ª–∏—á–Ω–æ–º –∫–∞–±–∏–Ω–µ—Ç–µ.",
    "–õ–∏–º–∏—Ç—ã –ø–æ –∫–∞—Ä—Ç–µ –º–æ–∂–Ω–æ —É–≤–µ–ª–∏—á–∏—Ç—å, –ø—Ä–µ–¥–æ—Å—Ç–∞–≤–∏–≤ —Å–ø—Ä–∞–≤–∫—É –æ –¥–æ—Ö–æ–¥–∞—Ö.",
    "–í—ã–ø–∏—Å–∫—É –ø–æ —Å—á–µ—Ç—É –º–æ–∂–Ω–æ –ø–æ–ª—É—á–∏—Ç—å –≤ –æ—Ç–¥–µ–ª–µ–Ω–∏–∏ –∏–ª–∏ –æ–Ω–ª–∞–π–Ω.",
    
    # –ú–æ–±–∏–ª—å–Ω–æ–µ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–µ
    "–ú–æ–±–∏–ª—å–Ω–æ–µ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–µ –¥–æ—Å—Ç—É–ø–Ω–æ –≤ AppStore –∏ Google Play.",
    "–í –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–∏ –¥–æ—Å—Ç—É–ø–Ω—ã –≤—Å–µ –æ—Å–Ω–æ–≤–Ω—ã–µ –æ–ø–µ—Ä–∞—Ü–∏–∏.",
    "–í—ã –º–æ–∂–µ—Ç–µ –Ω–∞—Å—Ç—Ä–æ–∏—Ç—å —É–≤–µ–¥–æ–º–ª–µ–Ω–∏—è –≤ –Ω–∞—Å—Ç—Ä–æ–π–∫–∞—Ö –ø—Ä–∏–ª–æ–∂–µ–Ω–∏—è.",
    "–î–ª—è –≤—Ö–æ–¥–∞ –≤ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–µ –∏—Å–ø–æ–ª—å–∑—É–π—Ç–µ —Ç–µ –∂–µ –¥–∞–Ω–Ω—ã–µ, —á—Ç–æ –∏ –≤ –ª–∏—á–Ω–æ–º –∫–∞–±–∏–Ω–µ—Ç–µ.",
    
    # –í–∞–ª—é—Ç–Ω—ã–µ –æ–ø–µ—Ä–∞—Ü–∏–∏
    "–ê–∫—Ç—É–∞–ª—å–Ω—ã–µ –∫—É—Ä—Å—ã –≤–∞–ª—é—Ç –æ—Ç–æ–±—Ä–∞–∂–∞—é—Ç—Å—è –≤ –ª–∏—á–Ω–æ–º –∫–∞–±–∏–Ω–µ—Ç–µ.",
    "–í—ã –º–æ–∂–µ—Ç–µ –∑–∞—Ä–µ–∑–µ—Ä–≤–∏—Ä–æ–≤–∞—Ç—å –≤–∞–ª—é—Ç—É –æ–Ω–ª–∞–π–Ω –∏ –∑–∞–±—Ä–∞—Ç—å –≤ –æ—Ç–¥–µ–ª–µ–Ω–∏–∏.",
    "–õ–∏–º–∏—Ç –Ω–∞ –≤–∞–ª—é—Ç–Ω—ã–µ –æ–ø–µ—Ä–∞—Ü–∏–∏ —Å–æ—Å—Ç–∞–≤–ª—è–µ—Ç $10,000 –≤ –º–µ—Å—è—Ü.",
    
    # –°—Ç—Ä–∞—Ö–æ–≤—ã–µ –ø—Ä–æ–¥—É–∫—Ç—ã
    "–ú—ã –º–æ–∂–µ–º –ø—Ä–µ–¥–ª–æ–∂–∏—Ç—å –≤–∞–º –Ω–µ—Å–∫–æ–ª—å–∫–æ –≤–∞—Ä–∏–∞–Ω—Ç–æ–≤ —Å—Ç—Ä–∞—Ö–æ–≤–∞–Ω–∏—è.",
    "–°—Ç—Ä–∞—Ö–æ–≤–æ–π –ø–æ–ª–∏—Å –º–æ–∂–Ω–æ –æ—Ñ–æ—Ä–º–∏—Ç—å –æ–Ω–ª–∞–π–Ω –∑–∞ 5 –º–∏–Ω—É—Ç.",
    "–£ –Ω–∞—Å –µ—Å—Ç—å —Å–ø–µ—Ü–∏–∞–ª—å–Ω—ã–µ —É—Å–ª–æ–≤–∏—è –ø–æ —Å—Ç—Ä–∞—Ö–æ–≤–∞–Ω–∏—é –¥–ª—è –¥–µ—Ä–∂–∞—Ç–µ–ª–µ–π –∫–∞—Ä—Ç.",
    
    # –ò–Ω–≤–µ—Å—Ç–∏—Ü–∏–∏
    "–ù–∞—à–∏ –∫–æ–Ω—Å—É–ª—å—Ç–∞–Ω—Ç—ã –ø–æ–º–æ–≥—É—Ç –≤–∞–º –ø–æ–¥–æ–±—Ä–∞—Ç—å –∏–Ω–≤–µ—Å—Ç–∏—Ü–∏–æ–Ω–Ω—ã–π –ø—Ä–æ–¥—É–∫—Ç.",
    "–í—ã –º–æ–∂–µ—Ç–µ –Ω–∞—á–∞—Ç—å –∏–Ω–≤–µ—Å—Ç–∏—Ä–æ–≤–∞—Ç—å —Å —Å—É–º–º—ã –æ—Ç 1000 —Ä—É–±–ª–µ–π.",
    "–î–æ—Ö–æ–¥–Ω–æ—Å—Ç—å –∑–∞ –ø—Ä–æ—à–ª—ã–π –≥–æ–¥ —Å–æ—Å—Ç–∞–≤–∏–ª–∞ 7.2% –≥–æ–¥–æ–≤—ã—Ö."
]
    
    # –°–æ–∑–¥–∞–Ω–∏–µ –ø–æ–º–æ—â–Ω–∏–∫–∞
    assistant = OperatorAssistant()
    assistant.initialize_knowledge_base(knowledge_base)
    


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:00<00:00,  4.53it/s]

‚úÖ –ë–∞–∑–∞ –∑–Ω–∞–Ω–∏–π –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–∞: 63 —Ñ—Ä–∞–∑, —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å 384, –≤—Ä–µ–º—è –æ–±—Ä–∞–±–æ—Ç–∫–∏: 0.45 —Å–µ–∫





In [87]:
    # –ü—Ä–∏–º–µ—Ä –∑–∞–ø—Ä–æ—Å–∞
queries = [
        "–ö–∞–∫ —è –º–æ–≥—É –æ–ø–ª–∞—Ç–∏—Ç—å –∫—Ä–µ–¥–∏—Ç?",
        "–ú–Ω–µ –Ω—É–∂–Ω–∞ –ø–æ–º–æ—â—å —Å –¥–æ–≥–æ–≤–æ—Ä–æ–º",
        "–ß—Ç–æ –¥–µ–ª–∞—Ç—å –µ—Å–ª–∏ –ø—Ä–∏—à–ª–æ —Å–º—Å?",
        "–î–æ–±—Ä—ã–π –¥–µ–Ω—å, —Ö–æ—á—É —Ä–µ—Å—Ç—Ä—É–∫—Ç—É—Ä–∏–∑–∏—Ä–æ–≤–∞—Ç—å –¥–æ–ª–≥"
    ]
    
for query in queries:
    print(f"\nüë® –ö–ª–∏–µ–Ω—Ç: '{query}'")
    responses, similarities = assistant.find_similar_responses(query)
        
    print("üîç –ü–æ–¥—Ö–æ–¥—è—â–∏–µ –ø–æ–¥—Å–∫–∞–∑–∫–∏ –æ–ø–µ—Ä–∞—Ç–æ—Ä—É:")
    for i, (resp, sim) in enumerate(zip(responses, similarities), 1):
        print(f"{i}. [{sim:.2f}] {resp}")


üë® –ö–ª–∏–µ–Ω—Ç: '–ö–∞–∫ —è –º–æ–≥—É –æ–ø–ª–∞—Ç–∏—Ç—å –∫—Ä–µ–¥–∏—Ç?'
üîç –ü–æ–¥—Ö–æ–¥—è—â–∏–µ –ø–æ–¥—Å–∫–∞–∑–∫–∏ –æ–ø–µ—Ä–∞—Ç–æ—Ä—É:
1. [0.06] –ì—Ä–∞—Ñ–∏–∫ –ø–ª–∞—Ç–µ–∂–µ–π –±—ã–ª –æ—Ç–ø—Ä–∞–≤–ª–µ–Ω –≤–∞–º –Ω–∞ email –ø—Ä–∏ –æ—Ñ–æ—Ä–º–ª–µ–Ω–∏–∏ –∫—Ä–µ–¥–∏—Ç–∞.
2. [0.06] –î–æ—Å—Ä–æ—á–Ω–æ–µ –ø–æ–≥–∞—à–µ–Ω–∏–µ –∫—Ä–µ–¥–∏—Ç–∞ –¥–æ—Å—Ç—É–ø–Ω–æ –±–µ–∑ –∫–æ–º–∏—Å—Å–∏–π.
3. [0.05] –í—ã –æ–±—Ä–∞—â–∞–µ—Ç–µ—Å—å –ø–æ –≤–æ–ø—Ä–æ—Å—É –∫—Ä–µ–¥–∏—Ç–∞, –¥–µ–ø–æ–∑–∏—Ç–∞ –∏–ª–∏ –¥—Ä—É–≥–æ–≥–æ –ø—Ä–æ–¥—É–∫—Ç–∞?

üë® –ö–ª–∏–µ–Ω—Ç: '–ú–Ω–µ –Ω—É–∂–Ω–∞ –ø–æ–º–æ—â—å —Å –¥–æ–≥–æ–≤–æ—Ä–æ–º'
üîç –ü–æ–¥—Ö–æ–¥—è—â–∏–µ –ø–æ–¥—Å–∫–∞–∑–∫–∏ –æ–ø–µ—Ä–∞—Ç–æ—Ä—É:
1. [0.11] –ü–æ–∂–∞–ª—É–π—Å—Ç–∞, –Ω–∞–∑–æ–≤–∏—Ç–µ –∫–æ–¥–æ–≤–æ–µ —Å–ª–æ–≤–æ –∏–∑ –¥–æ–≥–æ–≤–æ—Ä–∞.
2. [0.08] –ü–æ–∂–∞–ª—É–π—Å—Ç–∞, —É—Ç–æ—á–Ω–∏—Ç–µ –Ω–æ–º–µ—Ä –¥–æ–≥–æ–≤–æ—Ä–∞.
3. [0.08] –ü—Ä–∏–≤–µ—Ç—Å—Ç–≤—É–µ–º –≤ –Ω–∞—à–µ–º —Å–µ—Ä–≤–∏—Å–µ! –ß–µ–º –º–æ–≥—É –±—ã—Ç—å –ø–æ–ª–µ–∑–µ–Ω?

üë® –ö–ª–∏–µ–Ω—Ç: '–ß—Ç–æ –¥–µ–ª–∞—Ç—å –µ—Å–ª–∏ –ø—Ä–∏—à–ª–æ —Å–º—Å?'
üîç 

In [73]:
assistant = OperatorAssistant()
assistant.initialize_knowledge_base(knowledge_base)  # –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ–º –Ω–∞—á–∞–ª—å–Ω–æ–π –±–∞–∑–æ–π

# –ù–æ–≤—ã–µ —Ñ—Ä–∞–∑—ã –¥–ª—è –¥–æ–±–∞–≤–ª–µ–Ω–∏—è
new_phrases = [
    # –ù–æ–≤—ã–µ –ø—Ä–∏–≤–µ—Ç—Å—Ç–≤–∏—è
    "–î–æ–±—Ä–æ–≥–æ –≤—Ä–µ–º–µ–Ω–∏ —Å—É—Ç–æ–∫! –ß–µ–º –≤–∞–º –ø–æ–º–æ—á—å?",
    "–ü—Ä–∏–≤–µ—Ç! –Ø –≤–∞—à –≤–∏—Ä—Ç—É–∞–ª—å–Ω—ã–π –ø–æ–º–æ—â–Ω–∏–∫, –∑–∞–¥–∞–≤–∞–π—Ç–µ –≤–æ–ø—Ä–æ—Å.",
    
    # –ù–æ–≤—ã–µ –≤–∞—Ä–∏–∞–Ω—Ç—ã –ø–æ –ø–ª–∞—Ç–µ–∂–∞–º
    "–í—ã –º–æ–∂–µ—Ç–µ –Ω–∞—Å—Ç—Ä–æ–∏—Ç—å –∞–≤—Ç–æ–ø–ª–∞—Ç–µ–∂ –≤ —Ä–∞–∑–¥–µ–ª–µ '–†–µ–≥—É–ª—è—Ä–Ω—ã–µ –ø–ª–∞—Ç–µ–∂–∏' –º–æ–±–∏–ª—å–Ω–æ–≥–æ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏—è.",
    "–ü—Ä–∏ –æ–ø–ª–∞—Ç–µ —á–µ—Ä–µ–∑ –Ω–∞—à —Å–∞–π—Ç –¥–µ–π—Å—Ç–≤—É–µ—Ç cashback 1%.",
    
    # –ù–æ–≤—ã–µ —Å—Ü–µ–Ω–∞—Ä–∏–∏
    "–î–ª—è –≤–æ—Å—Å—Ç–∞–Ω–æ–≤–ª–µ–Ω–∏—è –¥–æ—Å—Ç—É–ø–∞ –∫ –ª–∏—á–Ω–æ–º—É –∫–∞–±–∏–Ω–µ—Ç—É –Ω–∞–∂–º–∏—Ç–µ '–ó–∞–±—ã–ª–∏ –ø–∞—Ä–æ–ª—å' –Ω–∞ —Å—Ç—Ä–∞–Ω–∏—Ü–µ –≤—Ö–æ–¥–∞.",
    "–í—ã –º–æ–∂–µ—Ç–µ –∑–∞–∫–∞–∑–∞—Ç—å –≤—ã–ø–∏—Å–∫—É –ø–æ email –≤ –Ω–∞—Å—Ç—Ä–æ–π–∫–∞—Ö –ø—Ä–æ—Ñ–∏–ª—è.",
    "–î–ª—è –æ—Ñ–æ—Ä–º–ª–µ–Ω–∏—è –∫—Ä–µ–¥–∏—Ç–Ω–æ–π –∫–∞—Ä—Ç—ã –ø–æ—Ç—Ä–µ–±—É–µ—Ç—Å—è –ø–∞—Å–ø–æ—Ä—Ç –∏ –ò–ù–ù.",
    
    # –û—Ç–≤–µ—Ç—ã –Ω–∞ —á–∞—Å—Ç—ã–µ –∂–∞–ª–æ–±—ã
    "–ü—Ä–∏–Ω–æ—Å–∏–º –∏–∑–≤–∏–Ω–µ–Ω–∏—è –∑–∞ –¥–æ–ª–≥–æ–µ –æ–∂–∏–¥–∞–Ω–∏–µ, –≤–∞—à –∑–∞–ø—Ä–æ—Å –¥–µ–π—Å—Ç–≤–∏—Ç–µ–ª—å–Ω–æ –≤–∞–∂–µ–Ω –¥–ª—è –Ω–∞—Å.",
    "–ú—ã —Ü–µ–Ω–∏–º –≤–∞—à–µ —Ç–µ—Ä–ø–µ–Ω–∏–µ –∏ –æ–±—è–∑–∞—Ç–µ–ª—å–Ω–æ —Ä–µ—à–∏–º –≤–∞—à –≤–æ–ø—Ä–æ—Å."
]

# –î–æ–±–∞–≤–ª—è–µ–º –Ω–æ–≤—ã–µ —Ñ—Ä–∞–∑—ã –≤ –±–∞–∑—É –∑–Ω–∞–Ω–∏–π
assistant.add_to_knowledge_base(new_phrases)

# –ü—Ä–æ–≤–µ—Ä—è–µ–º —Ä–∞–±–æ—Ç—É —Å –Ω–æ–≤—ã–º–∏ —Ñ—Ä–∞–∑–∞–º–∏
test_query = "–ö–∞–∫ –≤–æ—Å—Å—Ç–∞–Ω–æ–≤–∏—Ç—å –¥–æ—Å—Ç—É–ø –∫ –ª–∏—á–Ω–æ–º—É –∫–∞–±–∏–Ω–µ—Ç—É?"
responses, similarities = assistant.find_similar_responses(test_query)

print(f"\nüë® –ö–ª–∏–µ–Ω—Ç: '{test_query}'")
print("üîç –ü–æ–¥—Ö–æ–¥—è—â–∏–µ –ø–æ–¥—Å–∫–∞–∑–∫–∏ –æ–ø–µ—Ä–∞—Ç–æ—Ä—É:")
for i, (resp, sim) in enumerate(zip(responses, similarities), 1):
    print(f"{i}. [{sim:.2f}] {resp}")

Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:00<00:00,  4.27it/s]


‚úÖ –ë–∞–∑–∞ –∑–Ω–∞–Ω–∏–π –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω–∞: 63 —Ñ—Ä–∞–∑, —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å 384, –≤—Ä–µ–º—è –æ–±—Ä–∞–±–æ—Ç–∫–∏: 0.48 —Å–µ–∫
‚úÖ –î–æ–±–∞–≤–ª–µ–Ω–æ 9 –Ω–æ–≤—ã—Ö —Ñ—Ä–∞–∑ –≤ –±–∞–∑—É –∑–Ω–∞–Ω–∏–π

üë® –ö–ª–∏–µ–Ω—Ç: '–ö–∞–∫ –≤–æ—Å—Å—Ç–∞–Ω–æ–≤–∏—Ç—å –¥–æ—Å—Ç—É–ø –∫ –ª–∏—á–Ω–æ–º—É –∫–∞–±–∏–Ω–µ—Ç—É?'
üîç –ü–æ–¥—Ö–æ–¥—è—â–∏–µ –ø–æ–¥—Å–∫–∞–∑–∫–∏ –æ–ø–µ—Ä–∞—Ç–æ—Ä—É:
1. [0.10] –î–ª—è –≤–æ—Å—Å—Ç–∞–Ω–æ–≤–ª–µ–Ω–∏—è –¥–æ—Å—Ç—É–ø–∞ –∫ –ª–∏—á–Ω–æ–º—É –∫–∞–±–∏–Ω–µ—Ç—É –Ω–∞–∂–º–∏—Ç–µ '–ó–∞–±—ã–ª–∏ –ø–∞—Ä–æ–ª—å' –Ω–∞ —Å—Ç—Ä–∞–Ω–∏—Ü–µ –≤—Ö–æ–¥–∞.
2. [0.07] –í—ã –º–æ–∂–µ—Ç–µ –∑–∞–∫–∞–∑–∞—Ç—å –ø–µ—Ä–µ–≤—ã–ø—É—Å–∫ –∫–∞—Ä—Ç—ã –≤ –ª–∏—á–Ω–æ–º –∫–∞–±–∏–Ω–µ—Ç–µ.
3. [0.07] –ê–∫—Ç—É–∞–ª—å–Ω—ã–µ –∞–∫—Ü–∏–∏ –æ—Ç—Ä–∞–∂–µ–Ω—ã –≤ –ª–∏—á–Ω–æ–º –∫–∞–±–∏–Ω–µ—Ç–µ.


In [74]:
# –î–æ–±–∞–≤–ª—è–µ–º –Ω–æ–≤—É—é –∫–∞—Ç–µ–≥–æ—Ä–∏—é - –æ—Ç–≤–µ—Ç—ã –æ –∫—ç—à–±—ç–∫–µ –∏ –±–æ–Ω—É—Å–∞—Ö
cashback_phrases = [
    "–†–∞–∑–º–µ—Ä –∫—ç—à–±—ç–∫–∞ –∑–∞–≤–∏—Å–∏—Ç –æ—Ç —Ç–∏–ø–∞ –≤–∞—à–µ–π –∫–∞—Ä—Ç—ã.",
    "–ë–æ–Ω—É—Å–Ω—ã–µ –±–∞–ª–ª—ã –Ω–∞—á–∏—Å–ª—è—é—Ç—Å—è –≤ –∫–æ–Ω—Ü–µ –∫–∞–∂–¥–æ–≥–æ –º–µ—Å—è—Ü–∞.",
    "–í—ã –º–æ–∂–µ—Ç–µ –ø–æ—Ç—Ä–∞—Ç–∏—Ç—å –Ω–∞–∫–æ–ø–ª–µ–Ω–Ω—ã–µ –±–∞–ª–ª—ã –≤ –Ω–∞—à–µ–º –º–∞—Ä–∫–µ—Ç–ø–ª–µ–π—Å–µ.",
    "–ö—ç—à–±—ç–∫ –¥–æ 5% –¥–µ–π—Å—Ç–≤—É–µ—Ç –Ω–∞ –∫–∞—Ç–µ–≥–æ—Ä–∏—é '–°—É–ø–µ—Ä–º–∞—Ä–∫–µ—Ç—ã'.",
    "–ü–æ–¥—Ä–æ–±–Ω—ã–µ —É—Å–ª–æ–≤–∏—è –±–æ–Ω—É—Å–Ω–æ–π –ø—Ä–æ–≥—Ä–∞–º–º—ã –≤ —Ä–∞–∑–¥–µ–ª–µ '–ü—Ä–µ–∏–º—É—â–µ—Å—Ç–≤–∞' –º–æ–±–∏–ª—å–Ω–æ–≥–æ –ø—Ä–∏–ª–æ–∂–µ–Ω–∏—è."
]

assistant.add_to_knowledge_base(cashback_phrases)

# –ü—Ä–æ–≤–µ—Ä—è–µ–º –Ω–æ–≤—É—é –∫–∞—Ç–µ–≥–æ—Ä–∏—é
test_query = "–ö–∞–∫ —Ä–∞–±–æ—Ç–∞–µ—Ç –∫—ç—à–±—ç–∫?"
responses, similarities = assistant.find_similar_responses(test_query, top_k=2)

print(f"\nüë® –ö–ª–∏–µ–Ω—Ç: '{test_query}'")
print("üîç –ü–æ–¥—Ö–æ–¥—è—â–∏–µ –ø–æ–¥—Å–∫–∞–∑–∫–∏ –æ–ø–µ—Ä–∞—Ç–æ—Ä—É:")
for i, (resp, sim) in enumerate(zip(responses, similarities), 1):
    print(f"{i}. [{sim:.2f}] {resp}")

‚úÖ –î–æ–±–∞–≤–ª–µ–Ω–æ 5 –Ω–æ–≤—ã—Ö —Ñ—Ä–∞–∑ –≤ –±–∞–∑—É –∑–Ω–∞–Ω–∏–π

üë® –ö–ª–∏–µ–Ω—Ç: '–ö–∞–∫ —Ä–∞–±–æ—Ç–∞–µ—Ç –∫—ç—à–±—ç–∫?'
üîç –ü–æ–¥—Ö–æ–¥—è—â–∏–µ –ø–æ–¥—Å–∫–∞–∑–∫–∏ –æ–ø–µ—Ä–∞—Ç–æ—Ä—É:
1. [0.07] –î–ª—è —Ä–µ—à–µ–Ω–∏—è –ø—Ä–æ–±–ª–µ–º—ã –ø–æ–ø—Ä–æ–±—É–π—Ç–µ –æ—á–∏—Å—Ç–∏—Ç—å –∫—ç—à –±—Ä–∞—É–∑–µ—Ä–∞.
2. [0.06] –†–∞–∑–º–µ—Ä –∫—ç—à–±—ç–∫–∞ –∑–∞–≤–∏—Å–∏—Ç –æ—Ç —Ç–∏–ø–∞ –≤–∞—à–µ–π –∫–∞—Ä—Ç—ã.


In [75]:
assistant.save_knowledge_base()

‚úÖ –ë–∞–∑–∞ –∑–Ω–∞–Ω–∏–π —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞ –≤ –¥–∏—Ä–µ–∫—Ç–æ—Ä–∏—é: knowledge_base


In [34]:
test_query = "–ö–∞–∫ —è –º–æ–≥—É –æ–ø–ª–∞—Ç–∏—Ç—å –µ–∂–µ–º–µ—Å—è—á–Ω—ã–π –ø–ª–∞—Ç–µ–∂ –ø–æ –∫—Ä–µ–¥–∏—Ç—É?"
responses, similarities = assistant.find_similar_responses(test_query, top_k=3)

print(f"\nüë® –ö–ª–∏–µ–Ω—Ç: '{test_query}'")
print("üîç –ü–æ–¥—Ö–æ–¥—è—â–∏–µ –ø–æ–¥—Å–∫–∞–∑–∫–∏ –æ–ø–µ—Ä–∞—Ç–æ—Ä—É:")
for i, (resp, sim) in enumerate(zip(responses, similarities), 1):
    print(f"{i}. [{sim:.2f}] {resp}")


üë® –ö–ª–∏–µ–Ω—Ç: '–ö–∞–∫ —è –º–æ–≥—É –æ–ø–ª–∞—Ç–∏—Ç—å –µ–∂–µ–º–µ—Å—è—á–Ω—ã–π –ø–ª–∞—Ç–µ–∂ –ø–æ –∫—Ä–µ–¥–∏—Ç—É?'
üîç –ü–æ–¥—Ö–æ–¥—è—â–∏–µ –ø–æ–¥—Å–∫–∞–∑–∫–∏ –æ–ø–µ—Ä–∞—Ç–æ—Ä—É:
1. [0.07] –ì—Ä–∞—Ñ–∏–∫ –ø–ª–∞—Ç–µ–∂–µ–π –±—ã–ª –æ—Ç–ø—Ä–∞–≤–ª–µ–Ω –≤–∞–º –Ω–∞ email –ø—Ä–∏ –æ—Ñ–æ—Ä–º–ª–µ–Ω–∏–∏ –∫—Ä–µ–¥–∏—Ç–∞.
2. [0.06] –î–æ—Å—Ä–æ—á–Ω–æ–µ –ø–æ–≥–∞—à–µ–Ω–∏–µ –∫—Ä–µ–¥–∏—Ç–∞ –¥–æ—Å—Ç—É–ø–Ω–æ –±–µ–∑ –∫–æ–º–∏—Å—Å–∏–π.
3. [0.05] –ë–æ–Ω—É—Å–Ω—ã–µ –±–∞–ª–ª—ã –Ω–∞—á–∏—Å–ª—è—é—Ç—Å—è –≤ –∫–æ–Ω—Ü–µ –∫–∞–∂–¥–æ–≥–æ –º–µ—Å—è—Ü–∞.


In [76]:
# –ü—Ä–∏–º–µ—Ä –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è –∏ –∑–∞–≥—Ä—É–∑–∫–∏
if __name__ == "__main__":
    # –°–æ–∑–¥–∞–Ω–∏–µ –ø–æ–º–æ—â–Ω–∏–∫–∞
    assistant = OperatorAssistant()
    
    # –ü–æ–ø—ã—Ç–∫–∞ –∑–∞–≥—Ä—É–∑–∏—Ç—å —Å—É—â–µ—Å—Ç–≤—É—é—â—É—é –±–∞–∑—É –∑–Ω–∞–Ω–∏–π
    if not assistant.load_knowledge_base():
        # –ï—Å–ª–∏ –Ω–µ —É–¥–∞–ª–æ—Å—å –∑–∞–≥—Ä—É–∑–∏—Ç—å, –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∏—Ä—É–µ–º –Ω–æ–≤—É—é
        assistant.initialize_knowledge_base(knowledge_base)
        # –°–æ—Ö—Ä–∞–Ω—è–µ–º –¥–ª—è –±—É–¥—É—â–µ–≥–æ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è
        assistant.save_knowledge_base()
    
    # –¢–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏–µ –ø–æ–∏—Å–∫–∞
    test_queries = [
        "–ö–∞–∫ –º–Ω–µ –æ–ø–ª–∞—Ç–∏—Ç—å –∫—Ä–µ–¥–∏—Ç?",
        "–ö–∞–∫–∏–µ —É –≤–∞—Å –µ—Å—Ç—å –∏–Ω–≤–µ—Å—Ç–∏—Ü–∏–æ–Ω–Ω—ã–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏—è?",
        "–£ –º–µ–Ω—è –ø—Ä–æ–±–ª–µ–º–∞ —Å –º–æ–±–∏–ª—å–Ω—ã–º –ø—Ä–∏–ª–æ–∂–µ–Ω–∏–µ–º"
    ]
    
    for query in test_queries:
        print(f"\nüîç –ó–∞–ø—Ä–æ—Å: '{query}'")
        responses, scores = assistant.find_similar_responses(query)
        for resp, score in zip(responses, scores):
            print(f"‚Üí {resp} (—Å—Ö–æ–∂–µ—Å—Ç—å: {score:.2f})")
    
    # –î–æ–±–∞–≤–ª–µ–Ω–∏–µ –Ω–æ–≤—ã—Ö —Ñ—Ä–∞–∑ –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –æ–±–Ω–æ–≤–ª–µ–Ω–Ω–æ–π –±–∞–∑—ã
    new_phrases = [
        "–î–ª—è —É—Ç–æ—á–Ω–µ–Ω–∏—è –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –ø–æ –≤–∫–ª–∞–¥–∞–º –Ω–∞–∂–º–∏—Ç–µ 1",
        "–ò–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é –ø–æ –∏–ø–æ—Ç–µ–∫–µ –º–æ–∂–Ω–æ –ø–æ–ª—É—á–∏—Ç—å –≤ —Ä–∞–∑–¥–µ–ª–µ '–ö—Ä–µ–¥–∏—Ç—ã'"
    ]
    assistant.add_to_knowledge_base(new_phrases)
    assistant.save_knowledge_base()

‚úÖ –ë–∞–∑–∞ –∑–Ω–∞–Ω–∏–π –∑–∞–≥—Ä—É–∂–µ–Ω–∞: 77 —Ñ—Ä–∞–∑, —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å 384

üîç –ó–∞–ø—Ä–æ—Å: '–ö–∞–∫ –º–Ω–µ –æ–ø–ª–∞—Ç–∏—Ç—å –∫—Ä–µ–¥–∏—Ç?'
‚Üí –ì—Ä–∞—Ñ–∏–∫ –ø–ª–∞—Ç–µ–∂–µ–π –±—ã–ª –æ—Ç–ø—Ä–∞–≤–ª–µ–Ω –≤–∞–º –Ω–∞ email –ø—Ä–∏ –æ—Ñ–æ—Ä–º–ª–µ–Ω–∏–∏ –∫—Ä–µ–¥–∏—Ç–∞. (—Å—Ö–æ–∂–µ—Å—Ç—å: 0.06)
‚Üí –î–æ—Å—Ä–æ—á–Ω–æ–µ –ø–æ–≥–∞—à–µ–Ω–∏–µ –∫—Ä–µ–¥–∏—Ç–∞ –¥–æ—Å—Ç—É–ø–Ω–æ –±–µ–∑ –∫–æ–º–∏—Å—Å–∏–π. (—Å—Ö–æ–∂–µ—Å—Ç—å: 0.06)
‚Üí –í—ã –æ–±—Ä–∞—â–∞–µ—Ç–µ—Å—å –ø–æ –≤–æ–ø—Ä–æ—Å—É –∫—Ä–µ–¥–∏—Ç–∞, –¥–µ–ø–æ–∑–∏—Ç–∞ –∏–ª–∏ –¥—Ä—É–≥–æ–≥–æ –ø—Ä–æ–¥—É–∫—Ç–∞? (—Å—Ö–æ–∂–µ—Å—Ç—å: 0.05)

üîç –ó–∞–ø—Ä–æ—Å: '–ö–∞–∫–∏–µ —É –≤–∞—Å –µ—Å—Ç—å –∏–Ω–≤–µ—Å—Ç–∏—Ü–∏–æ–Ω–Ω—ã–µ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏—è?'
‚Üí –ù–∞—à–∏ –∫–æ–Ω—Å—É–ª—å—Ç–∞–Ω—Ç—ã –ø–æ–º–æ–≥—É—Ç –≤–∞–º –ø–æ–¥–æ–±—Ä–∞—Ç—å –∏–Ω–≤–µ—Å—Ç–∏—Ü–∏–æ–Ω–Ω—ã–π –ø—Ä–æ–¥—É–∫—Ç. (—Å—Ö–æ–∂–µ—Å—Ç—å: 0.09)
‚Üí –í—ã –æ–±—Ä–∞—â–∞–µ—Ç–µ—Å—å –ø–æ –≤–æ–ø—Ä–æ—Å—É –∫—Ä–µ–¥–∏—Ç–∞, –¥–µ–ø–æ–∑–∏—Ç–∞ –∏–ª–∏ –¥—Ä—É–≥–æ–≥–æ –ø—Ä–æ–¥—É–∫—Ç–∞? (—Å—Ö–æ–∂–µ—Å—Ç—å: 0.07)
‚Üí –ú—ã –º–æ–∂–µ–º –ø

In [77]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# –ó–∞–≥—Ä—É–∂–∞–µ–º –º–æ–¥–µ–ª—å
model = SentenceTransformer('./paraphrase-multilingual-MiniLM-L12-v2')

# –î–≤–∞ —Å–ª–æ–≤–∞
word1 = "–ö–∞–∫ —è –º–æ–≥—É –æ–ø–ª–∞—Ç–∏—Ç—å –∫—Ä–µ–¥–∏—Ç?"
word2 = "–ì—Ä–∞—Ñ–∏–∫ –ø–ª–∞—Ç–µ–∂–µ–π –±—ã–ª –æ—Ç–ø—Ä–∞–≤–ª–µ–Ω –≤–∞–º –Ω–∞ email –ø—Ä–∏ –æ—Ñ–æ—Ä–º–ª–µ–Ω–∏–∏ –∫—Ä–µ–¥–∏—Ç–∞."

# –ü–æ–ª—É—á–∞–µ–º —ç–º–±–µ–¥–¥–∏–Ω–≥–∏
embedding1 = model.encode(word1)
embedding2 = model.encode(word2)

# –°—á–∏—Ç–∞–µ–º –∫–æ—Å–∏–Ω—É—Å–Ω—É—é —Å—Ö–æ–∂–µ—Å—Ç—å
similarity = cosine_similarity([embedding1], [embedding2])[0][0]
distance = 1 - similarity

print(f"–ö–æ—Å–∏–Ω—É—Å–Ω–æ–µ —Ä–∞—Å—Å—Ç–æ—è–Ω–∏–µ –º–µ–∂–¥—É '{word1}' –∏ '{word2}': {distance:.4f}")

–ö–æ—Å–∏–Ω—É—Å–Ω–æ–µ —Ä–∞—Å—Å—Ç–æ—è–Ω–∏–µ –º–µ–∂–¥—É '–ö–∞–∫ —è –º–æ–≥—É –æ–ø–ª–∞—Ç–∏—Ç—å –∫—Ä–µ–¥–∏—Ç?' –∏ '–ì—Ä–∞—Ñ–∏–∫ –ø–ª–∞—Ç–µ–∂–µ–π –±—ã–ª –æ—Ç–ø—Ä–∞–≤–ª–µ–Ω –≤–∞–º –Ω–∞ email –ø—Ä–∏ –æ—Ñ–æ—Ä–º–ª–µ–Ω–∏–∏ –∫—Ä–µ–¥–∏—Ç–∞.': 0.4681
