In [22]:
# Блок 2. Импорты и настройка OpenAI-ключа
from pathlib import Path
import os, shutil, re, time, json
from typing import Optional, List, Any, Dict
from enum import Enum

from dotenv import load_dotenv
from rapidfuzz import fuzz

from pydantic import BaseModel, ValidationError
import chromadb
from chromadb.config import Settings

from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document, BaseRetriever
from langchain_community.vectorstores import Chroma
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.memory import ConversationBufferMemory
from langchain.agents import Tool, AgentType, initialize_agent
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser, EnumOutputParser
from langchain_core.exceptions import OutputParserException

load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

persist_dir = str(Path.home() / "chroma_asic_idx")
if os.path.exists(persist_dir):
    shutil.rmtree(persist_dir)


In [23]:
product_texts = [
    """
    Bitmain Antminer S19 Pro 110 TH/s
    SHA-256 • 110 TH/s • 3250 Вт • 29,5 J/TH
    Цена 199 000 ₽ • Гарантия 12 мес.
    """,
    """
    MicroBT Whatsminer M30S++ 112 TH/s
    SHA-256 • 112 TH/s • 3472 Вт • 31 J/TH
    Цена 128 000 ₽ • Гарантия 3 мес.
    """,
    """
    iPollo V1 Mini ETC 300 MH/s (Wi-Fi)
    EtHash ETC • 300 MH/s • 240 Вт
    Цена 38 500 ₽ • Гарантия 6 мес.
    """,
]
documents = [Document(page_content=t.strip()) for t in product_texts]


In [15]:
splitter = CharacterTextSplitter(separator="\n", chunk_size=800, chunk_overlap=150)
chunks = splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma.from_documents(
    chunks,
    embedding=embeddings,
    collection_name="asic_store",
    persist_directory=persist_dir,
    client_settings=Settings(anonymized_telemetry=False),
)


In [16]:
bm25 = BM25Retriever.from_documents(documents, k=3)

class FuzzyRetriever(BaseRetriever):
    model_config = {"extra": "allow"}
    _docs: List[Document] = bm25.docs
    _k: int = 3

    def _get_relevant_documents(self, query, **_):
        ranked = sorted(
            self._docs,
            key=lambda d: fuzz.partial_ratio(query.lower(), d.page_content.lower()),
            reverse=True,
        )
        return ranked[: self._k]

    async def _aget_relevant_documents(self, query, **_):
        return self._get_relevant_documents(query)

hybrid_retriever = EnsembleRetriever(
    retrievers=[
        vectorstore.as_retriever(search_kwargs={"k": 8}),
        bm25,
        FuzzyRetriever(),
    ],
    weights=[0.5, 0.35, 0.15],
)


In [17]:
class ClientCard(BaseModel):
    name: Optional[str] = None
    telegram: Optional[str] = None
    phone: Optional[str] = None
    location: Optional[str] = None
    entity_type: Optional[str] = None
    experience: Optional[int] = None
    rigs_owned: Optional[int] = None
    rigs_plan: Optional[int] = None
    electricity_price: Optional[float] = None
    host_choice: Optional[str] = None
    free_power: Optional[int] = None
    budget: Optional[int] = None
    financial_level: Optional[int] = None
    knowledge: Optional[int] = None
    stage_closed: Optional[bool] = None

card_parser = PydanticOutputParser(pydantic_object=ClientCard)
card_prompt = PromptTemplate(
    template=(
        "Обнови JSON-карту клиента по новой реплике.\n"
        "Текущий JSON: {cur}\n\n"
        "Реплика: \"{utt}\"\n\n"
        "{fmt}"
    ),
    input_variables=["cur", "utt", "fmt"],
)
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.7)
card_chain = LLMChain(llm=llm, prompt=card_prompt, output_parser=card_parser)

def update_card(card: ClientCard, utt: str, retry: int = 2) -> ClientCard:
    for _ in range(retry):
        try:
            raw = card_chain.invoke({
                "cur": card.model_dump_json(),
                "utt": utt,
                "fmt": card_parser.get_format_instructions(),
            })["text"]
            if isinstance(raw, ClientCard):
                return raw
            return (
                ClientCard(**raw)
                if isinstance(raw, dict)
                else ClientCard.parse_raw(raw)
            )
        except (OutputParserException, ValidationError):
            time.sleep(0.2)
    return card


In [18]:
class Intent(str, Enum):
    list   = "list"
    detail = "detail"
    budget = "budget"

intent_parser = EnumOutputParser(enum=Intent)
intent_prompt = PromptTemplate(
    template=(
        "Категоризуй запрос клиента одним словом: list, detail или budget.\n"
        "Запрос: \"{q}\"\n\n"
        "{fmt}"
    ),
    input_variables=["q", "fmt"],
)
intent_chain = LLMChain(
    llm=llm,
    prompt=intent_prompt,
    output_parser=intent_parser,
)


In [19]:
def product_info(q: str) -> str:
    intent = intent_chain.invoke({
        "q": q,
        "fmt": intent_parser.get_format_instructions(),
    })["text"].strip()

    if intent == "list":
        return "\n".join(f"• {d.page_content.splitlines()[0]}" for d in documents)

    if intent == "budget":
        m = re.search(r"(\d[\d\s]{3,})", q)
        if not m:
            return "Пожалуйста, уточните бюджет в рублях."
        budget_val = int(m.group(1).replace(" ", ""))
        fits = [
            d for d in documents
            if (p := re.search(r"Цена\s+(\d[\d\s]+)", d.page_content))
            and int(p.group(1).replace(" ", "")) <= budget_val
        ]
        return (
            "Подходит:\n" +
            "\n".join(f"• {d.page_content.splitlines()[0]}" for d in fits)
            if fits else "Нет моделей в этом бюджете."
        )

    ql = q.lower()
    for d in documents:
        title = d.page_content.splitlines()[0].lower()
        if any(tok in ql for tok in re.split(r"\W+", title) if len(tok) > 2):
            return d.page_content

    docs = hybrid_retriever.invoke(q)
    if not docs:
        return "Информация не найдена."
    ctx = "\n---\n".join(d.page_content for d in docs[:2])
    return llm.invoke(
        f"Используя только этот контекст, ответь фактами:\n{ctx}\n\nВопрос: {q}\nОтвет:"
    )

product_tool = Tool(
    name="product_info",
    func=product_info,
    description="Инструмент для уточнения характеристик майнеров",
)


In [20]:
class FieldCondition(BaseModel):
    field: str
    question: str
    condition_field: Optional[str]    = None
    condition_values: Optional[List[Any]] = None

class PitchRule(BaseModel):
    keywords: List[str]
    text:     str

class ScriptStage(BaseModel):
    id:     int
    name:   Optional[str]            = None
    fields: List[FieldCondition]     = []
    pitch:  Optional[PitchRule]      = None

class ScriptConfig(BaseModel):
    stages:            List[ScriptStage]
    override_keywords: Dict[str, int]

def load_script(path: str) -> ScriptConfig:
    raw = Path(path).read_text(encoding="utf-8")
    try:
        return ScriptConfig.parse_raw(raw)
    except ValidationError as e:
        raise RuntimeError(f"Ошибка в конфиge {path}: {e}")

script_cfg = load_script("sales_script.json")

def determine_stage(card: ClientCard, last_utt: str) -> int:
    for kw, sid in script_cfg.override_keywords.items():
        if kw in last_utt.lower():
            return sid
    for stage in script_cfg.stages:
        for cond in stage.fields:
            val = getattr(card, cond.field)
            if val is None:
                if cond.condition_field:
                    cf = getattr(card, cond.condition_field)
                    if cf and cf.lower() in [v.lower() for v in cond.condition_values]:
                        return stage.id
                    else:
                        continue
                return stage.id
    return script_cfg.stages[-1].id

def next_question(card: ClientCard, stage_id: int, last_utt: str) -> Optional[str]:
    stage = next(s for s in script_cfg.stages if s.id == stage_id)
    if stage.pitch:
        for kw in stage.pitch.keywords:
            if kw in last_utt.lower():
                return stage.pitch.text
    for cond in stage.fields:
        val = getattr(card, cond.field)
        if val is None:
            if cond.condition_field:
                cf = getattr(card, cond.condition_field)
                if cf and cf.lower() in [v.lower() for v in cond.condition_values]:
                    return cond.question
            else:
                return cond.question
    return None

from langchain.memory import ConversationBufferMemory

def build_agent(card: ClientCard, memory: ConversationBufferMemory):
    last = memory.buffer[-1].content if memory.buffer else ""
    stage = determine_stage(card, last)
    qtext = next_question(card, stage, last) or ""
    prefix = f"""
Ты — эксперт по промышленному майнингу.
Скрипт: sales_script.json, стадия: {stage}.

Задача: **задать ОДИН** вопрос или сделать питч:
{qtext}

Если нужно точное описание модели — TOOL: product_info <вопрос>
Перефразируй Observation, не копируй дословно.
"""
    return initialize_agent(
        tools=[product_tool],
        llm=llm,
        agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
        memory=memory,
        verbose=True,
        agent_kwargs={"prefix": prefix},
    )


/var/folders/dx/2j_jz8k12tn9dvfk_ny7sxr40000gn/T/ipykernel_46330/2394851482.py:25: PydanticDeprecatedSince20: The `parse_raw` method is deprecated; if your data is JSON use `model_validate_json`, otherwise load the data then use `model_validate` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  return ScriptConfig.parse_raw(raw)


In [21]:
card = ClientCard()
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

print("🟢 Готово! Напишите 'exit' для вывода карточки.")
while True:
    user = input("\nВы: ")
    if user.lower() in {"exit", "quit"}:
        snapshot = card.model_dump(exclude_none=True)
        print("\n📇 Карточка клиента:")
        print(json.dumps(snapshot, indent=2, ensure_ascii=False))
        break

    print(f"👤 Клиент: {user}")
    card = update_card(card, user)
    memory.chat_memory.add_user_message(user)

    last_user = memory.buffer[-1].content
    stage = determine_stage(card, last_user)
    print(f"[Стадия: {stage}]")

    agent = build_agent(card, memory)
    reply = agent.invoke({"input": user})["output"]
    print(f"🤖 Продавец: {reply}")

    card = update_card(card, reply)
    memory.chat_memory.add_ai_message(reply)


🟢 Готово! Напишите 'exit' для вывода карточки.
👤 Клиент: Добрый день
[Стадия: 1]


[1m> Entering new AgentExecutor chain...[0m


  return initialize_agent(


[32;1m[1;3m```
Thought: Do I need to use a tool? No
AI: Здравствуйте! Как к вам можно обращаться?
```[0m

[1m> Finished chain.[0m
🤖 Продавец: Здравствуйте! Как к вам можно обращаться?
```
👤 Клиент: Иван
[Стадия: 1]


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```
Thought: Do I need to use a tool? No
AI: Приятно познакомиться, Иван! Где вы планируете размещать оборудование для майнинга?
```[0m

[1m> Finished chain.[0m
🤖 Продавец: Приятно познакомиться, Иван! Где вы планируете размещать оборудование для майнинга?
```

📇 Карточка клиента:
{
  "name": "Иван"
}
