In [57]:
from pathlib import Path
import os, shutil, re, time, json
from typing import Optional, List
from enum import Enum

from dotenv import load_dotenv
from rapidfuzz import fuzz

from pydantic import BaseModel, Field, ValidationError
from langchain.chat_models         import ChatOpenAI
from langchain.embeddings          import OpenAIEmbeddings
from langchain.text_splitter       import CharacterTextSplitter
from langchain.schema              import Document, BaseRetriever
from langchain_community.vectorstores import Chroma
from langchain.retrievers          import BM25Retriever, EnsembleRetriever
from langchain.memory              import ConversationBufferMemory
from langchain.agents              import Tool, AgentType, initialize_agent
from langchain.prompts             import PromptTemplate
from langchain.chains              import LLMChain
from langchain.output_parsers      import PydanticOutputParser, EnumOutputParser
from langchain_core.exceptions     import OutputParserException

import chromadb
from chromadb.config import Settings

In [58]:
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

persist_dir = str(Path.home() / "chroma_asic_idx")
if os.path.exists(persist_dir):
    shutil.rmtree(persist_dir)

In [59]:
product_texts = [
    """
Bitmain Antminer S19 Pro 110 TH/s
SHA-256 ‚Ä¢ 110 TH/s ‚Ä¢ 3250 –í—Ç ‚Ä¢ 29,5 J/TH
–¶–µ–Ω–∞ 199 000 ‚ÇΩ ‚Ä¢ –ì–∞—Ä–∞–Ω—Ç–∏—è 12 –º–µ—Å.
    """,
    """
MicroBT Whatsminer M30S++ 112 TH/s
SHA-256 ‚Ä¢ 112 TH/s ‚Ä¢ 3472 –í—Ç ‚Ä¢ 31 J/TH
–¶–µ–Ω–∞ 128 000 ‚ÇΩ ‚Ä¢ –ì–∞—Ä–∞–Ω—Ç–∏—è 3 –º–µ—Å.
    """,
    """
iPollo V1 Mini ETC 300 MH/s (Wi-Fi)
EtHash ETC ‚Ä¢ 300 MH/s ‚Ä¢ 240 –í—Ç
–¶–µ–Ω–∞ 38 500 ‚ÇΩ ‚Ä¢ –ì–∞—Ä–∞–Ω—Ç–∏—è 6 –º–µ—Å.
    """,
]
documents = [Document(page_content=t.strip()) for t in product_texts]

In [60]:
splitter   = CharacterTextSplitter(separator="\n", chunk_size=800, chunk_overlap=150)
chunks     = splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma.from_documents(
    chunks,
    embedding         = embeddings,
    collection_name   = "asic_store",
    persist_directory = persist_dir,
    client_settings   = Settings(anonymized_telemetry=False),
)

In [70]:
bm25 = BM25Retriever.from_documents(documents, k=3)

class FuzzyRetriever(BaseRetriever):
    model_config = {"extra": "allow"}
    _docs: List[Document] = bm25.docs
    _k:   int             = 3

    def _get_relevant_documents(self, query, **_):
        ranked = sorted(
            self._docs,
            key=lambda d: fuzz.partial_ratio(query.lower(), d.page_content.lower()),
            reverse=True,
        )
        return ranked[: self._k]

    async def _aget_relevant_documents(self, query, **_):
        return self._get_relevant_documents(query)

hybrid_retriever = EnsembleRetriever(
    retrievers=[
        vectorstore.as_retriever(search_kwargs={"k": 8}),
        bm25,
        FuzzyRetriever(),
    ],
    weights=[0.5, 0.35, 0.15],
)

In [77]:
class ClientCard(BaseModel):
    name:              Optional[str]  = None
    telegram:          Optional[str]  = None
    phone:             Optional[str]  = None
    location:          Optional[str]  = None
    entity_type:       Optional[str]  = None
    experience:        Optional[int]  = None
    rigs_owned:        Optional[int]  = None
    rigs_plan:         Optional[int]  = None
    electricity_price: Optional[float]= None
    host_choice:       Optional[str]  = None
    free_power:        Optional[int]  = None
    budget:            Optional[int]  = None
    financial_level:   Optional[int]  = None
    knowledge:         Optional[int]  = None
    stage_closed:      Optional[bool] = None

card_parser = PydanticOutputParser(pydantic_object=ClientCard)
card_prompt = PromptTemplate(
    template=(
        "–û–±–Ω–æ–≤–∏ JSON-–∫–∞—Ä—Ç—É –∫–ª–∏–µ–Ω—Ç–∞ –ø–æ –Ω–æ–≤–æ–π —Ä–µ–ø–ª–∏–∫–µ.\n"
        "–¢–µ–∫—É—â–∏–π JSON: {cur}\n\n"
        "–†–µ–ø–ª–∏–∫–∞: \"{utt}\"\n\n"
        "{fmt}"
    ),
    input_variables=["cur", "utt", "fmt"],
)
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.7)
card_chain = LLMChain(llm=llm, prompt=card_prompt, output_parser=card_parser)

def update_card(card: ClientCard, utt: str, retry: int = 2) -> ClientCard:
    for _ in range(retry):
        try:
            raw = card_chain.invoke({
                "cur": card.model_dump_json(),
                "utt": utt,
                "fmt": card_parser.get_format_instructions(),
            })["text"]
            if isinstance(raw, ClientCard):
                return raw
            return ClientCard(**raw) if isinstance(raw, dict) else ClientCard.parse_raw(raw)
        except (OutputParserException, ValidationError):
            time.sleep(0.2)
    return card

In [78]:
from enum import Enum
from langchain.output_parsers import EnumOutputParser
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

class Intent(str, Enum):
    list   = "list"
    detail = "detail"
    budget = "budget"

intent_parser = EnumOutputParser(enum=Intent)

intent_prompt = PromptTemplate(
    template=(
        "–ö–∞—Ç–µ–≥–æ—Ä–∏–∑—É–π –∑–∞–ø—Ä–æ—Å –∫–ª–∏–µ–Ω—Ç–∞ –æ–¥–Ω–∏–º —Å–ª–æ–≤–æ–º: list, detail –∏–ª–∏ budget.\n"
        "–ó–∞–ø—Ä–æ—Å: \"{q}\"\n\n"
        "{fmt}"
    ),
    input_variables=["q", "fmt"],
)

intent_chain = LLMChain(
    llm=llm,
    prompt=intent_prompt,
    output_parser=intent_parser,
)


In [79]:
def product_info(q: str) -> str:
    intent = intent_chain.invoke({
        "q": q,
        "fmt": intent_parser.get_format_instructions(),
    })["text"].strip()

    if intent == "list":
        return "\n".join(f"‚Ä¢ {d.page_content.splitlines()[0]}" for d in documents)

    if intent == "budget":
        m = re.search(r"(\d[\d\s]{3,})", q)
        if not m:
            return "–ü–æ–∂–∞–ª—É–π—Å—Ç–∞, —É—Ç–æ—á–Ω–∏—Ç–µ –±—é–¥–∂–µ—Ç –≤ —Ä—É–±–ª—è—Ö."
        budget = int(m.group(1).replace(" ", ""))
        fits = [
            d for d in documents
            if (p := re.search(r"–¶–µ–Ω–∞\s+(\d[\d\s]+)", d.page_content))
            and int(p.group(1).replace(" ", "")) <= budget
        ]
        return (
            "–ü–æ–¥—Ö–æ–¥–∏—Ç:\n" +
            "\n".join(f"‚Ä¢ {d.page_content.splitlines()[0]}" for d in fits)
            if fits else "–ù–µ—Ç –º–æ–¥–µ–ª–µ–π –≤ —ç—Ç–æ–º –±—é–¥–∂–µ—Ç–µ."
        )

    ql = q.lower()
    for d in documents:
        title = d.page_content.splitlines()[0].lower()
        if any(tok in ql for tok in re.split(r"\W+", title) if len(tok) > 2):
            return d.page_content

    # fallback: —Å–µ–º–∞–Ω—Ç–∏—á–µ—Å–∫–∏–π –ø–æ–∏—Å–∫
    docs = hybrid_retriever.invoke(q)
    if not docs:
        return "–ò–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –Ω–µ –Ω–∞–π–¥–µ–Ω–∞."
    ctx = "\n---\n".join(d.page_content for d in docs[:2])
    return llm.invoke(
        f"–ò—Å–ø–æ–ª—å–∑—É—è —Ç–æ–ª—å–∫–æ —ç—Ç–æ—Ç –∫–æ–Ω—Ç–µ–∫—Å—Ç, –æ—Ç–≤–µ—Ç—å —Ñ–∞–∫—Ç–∞–º–∏:\n{ctx}\n\n–í–æ–ø—Ä–æ—Å: {q}\n–û—Ç–≤–µ—Ç:"
    )


In [80]:
stage_prompt = PromptTemplate.from_template(
    """–û–ø—Ä–µ–¥–µ–ª–∏ —Å—Ç–∞–¥–∏—é 1‚Äì4.

1 ‚Äì –Ω—É–∂–µ–Ω name –ò–õ–ò location  
2 ‚Äì –≤—ã—è–≤–ª–µ–Ω–∏–µ –ø–æ—Ç—Ä–µ–±–Ω–æ—Å—Ç–µ–π (rigs_owned, rigs_plan, host_choice, electricity_price, free_power, budget)  
3 ‚Äì –ø—Ä–µ–∑–µ–Ω—Ç–∞—Ü–∏—è —Ä–µ—à–µ–Ω–∏—è  
4 ‚Äì –∑–∞–∫—Ä—ã—Ç–∏–µ —Å–¥–µ–ª–∫–∏ (—Å–±–æ—Ä –∫–æ–Ω—Ç–∞–∫—Ç–∞ –∏ –≤—Ä–µ–º–µ–Ω–∏)

–ü—Ä–∞–≤–∏–ª–∞:
- –°–ª–æ–≤–∞ ‚Äú–∫—É–ø–∏—Ç—å‚Äù, ‚Äú—Å–≤—è–∑–∞—Ç—å—Å—è‚Äù, ‚Äú–æ—Ñ–æ—Ä–º–∏—Ç—å‚Äù, ‚Äú—Ö–æ—á—É‚Äù, ‚Äú—Ö–æ—Ç–µ–ª‚Äù ‚Üí —Å—Ç–∞–¥–∏—è 4.
- –§—Ä–∞–∑—ã ‚Äú–Ω–µ –∏–Ω—Ç–µ—Ä–µ—Å—É–µ—Ç‚Äù, ‚Äú–ø—Ä–æ—Å—Ç–æ —Å–º–æ—Ç—Ä—é‚Äù –Ω–∞ —ç—Ç–∞–ø–µ 2 ‚Üí —Å–Ω–∞—á–∞–ª–∞ –∫—Ä–∞—Ç–∫–∏–π pitch.
- –°—Ä–∞–∑—É –ø–æ—Å–ª–µ pitch –∑–∞–¥–∞—ë–º –ø–µ—Ä–≤—ã–π –≤–æ–ø—Ä–æ—Å –ø–æ –±–ª–æ–∫—É 2.

–ö–∞—Ä—Ç–∞ (JSON): {card_json}
–ò—Å—Ç–æ—Ä–∏—è: {chat_history}

–û—Ç–≤–µ—Ç—å –û–î–ù–û–ô —Ü–∏—Ñ—Ä–æ–π 1‚Äì4."""
)
stage_chain = LLMChain(llm=llm, prompt=stage_prompt)

def next_question(card: ClientCard, stage: str) -> Optional[str]:
    if stage == "1":
        if not card.name:
            return "–ö–∞–∫ –º–Ω–µ –∫ –≤–∞–º –æ–±—Ä–∞—â–∞—Ç—å—Å—è?"
        if not card.location:
            return "–ì–¥–µ –≤—ã –ø–ª–∞–Ω–∏—Ä—É–µ—Ç–µ —Ä–∞–∑–º–µ—â–∞—Ç—å –æ–±–æ—Ä—É–¥–æ–≤–∞–Ω–∏–µ?"
    if stage == "2":
        if card.rigs_owned is None:
            return "–°–∫–æ–ª—å–∫–æ ASIC-–º–∞–π–Ω–µ—Ä–æ–≤ —É –≤–∞—Å —É–∂–µ –µ—Å—Ç—å?"
        if card.rigs_plan is None:
            return "–°–∫–æ–ª—å–∫–æ —É—Å—Ç—Ä–æ–π—Å—Ç–≤ –≤—ã —Ö–æ—Ç–µ–ª–∏ –±—ã –ø—Ä–∏–æ–±—Ä–µ—Å—Ç–∏?"
        if not card.host_choice:
            return "–†–∞–∑–º–µ—â–∞—Ç—å –ø–ª–∞–Ω–∏—Ä—É–µ—Ç–µ —É —Å–µ–±—è –∏–ª–∏ –Ω–∞ –Ω–∞—à–µ–º —Ö–æ—Å—Ç–∏–Ω–≥–µ?"
        if card.host_choice and card.host_choice.lower() == "—Å–≤–æ–π" and card.electricity_price is None:
            return "–ö–∞–∫–∞—è —Å—Ç–æ–∏–º–æ—Å—Ç—å —ç–ª–µ–∫—Ç—Ä–æ—ç–Ω–µ—Ä–≥–∏–∏ —É –≤–∞—Å –Ω–∞ –ø–ª–æ—â–∞–¥–∫–µ (‚ÇΩ/–∫–í—Ç‚ãÖ—á)?"
        if card.host_choice and card.host_choice.lower() != "—Å–≤–æ–π" and card.free_power is None:
            return "–°–∫–æ–ª—å–∫–æ —Å–≤–æ–±–æ–¥–Ω—ã—Ö –∫–í—Ç –≤–∞–º –ø–æ—Ç—Ä–µ–±—É–µ—Ç—Å—è –Ω–∞ –Ω–∞—à–µ–º —Ö–æ—Å—Ç–∏–Ω–≥–µ?"
        if card.budget is None:
            return "–ö–∞–∫–æ–π –±—é–¥–∂–µ—Ç –≤—ã –∑–∞–∫–ª–∞–¥—ã–≤–∞–µ—Ç–µ –Ω–∞ –ø–æ–∫—É–ø–∫—É?"
    if stage == "4":
        if not card.phone and not card.telegram:
            return "–ü–æ–∂–∞–ª—É–π—Å—Ç–∞, –æ—Å—Ç–∞–≤—å—Ç–µ —Ç–µ–ª–µ—Ñ–æ–Ω –∏–ª–∏ Telegram –¥–ª—è —Å–æ–∑–≤–æ–Ω–∞."
        return "–ö–æ–≥–¥–∞ –≤–∞–º –±—É–¥–µ—Ç —É–¥–æ–±–Ω–æ —Å–æ–∑–≤–æ–Ω–∏—Ç—å—Å—è —Å –º–µ–Ω–µ–¥–∂–µ—Ä–æ–º?"
    return None

def build_agent(card: ClientCard, stage: str, memory):
    last = memory.buffer[-1].content.lower() if memory.buffer else ""
    pitch = ""
    if stage == "2" and re.search(r"–Ω–µ –∏–Ω—Ç–µ—Ä–µ—Å—É–µ—Ç|–ø—Ä–æ—Å—Ç–æ —Å–º–æ—Ç—Ä—é", last):
        pitch = (
            "–ù–∞—à–µ —Ä–µ—à–µ–Ω–∏–µ —Å–Ω–∏–∂–∞–µ—Ç –∑–∞—Ç—Ä–∞—Ç—ã –Ω–∞ –æ—Ö–ª–∞–∂–¥–µ–Ω–∏–µ –¥–æ 30 %\n"
            "–∏ –ø–æ–≤—ã—à–∞–µ—Ç –Ω–∞–¥—ë–∂–Ω–æ—Å—Ç—å –≤–∞—à–µ–π —Ñ–µ—Ä–º—ã.\n"
        )
    if re.search(r"\b(–∫—É–ø–∏—Ç—å|—Å–≤—è–∑–∞—Ç—å—Å—è|–æ—Ñ–æ—Ä–º–∏—Ç—å|—Ö–æ—á—É|—Ö–æ—Ç–µ–ª)\b", last):
        stage = "4"

    q = pitch or next_question(card, stage) or ""

    prefix = f"""
–¢—ã ‚Äî —ç–∫—Å–ø–µ—Ä—Ç –ø–æ –ø—Ä–æ–º—ã—à–ª–µ–Ω–Ω–æ–º—É –º–∞–π–Ω–∏–Ω–≥—É.
–≠—Ç–∞–ø: {stage}.

{('–ü–∏—Ç—á: ' + pitch) if pitch else ''}
–ó–∞–¥–∞—á–∞: **–∑–∞–¥–∞—Ç—å –û–î–ò–ù** –≤–æ–ø—Ä–æ—Å –∏–ª–∏ —Å–æ–±—Ä–∞—Ç—å –∫–æ–Ω—Ç–∞–∫—Ç (—ç—Ç–∞–ø 4):
{q}

–ï—Å–ª–∏ –Ω—É–∂–Ω–æ —Ç–æ—á–Ω–æ–µ –æ–ø–∏—Å–∞–Ω–∏–µ –º–æ–¥–µ–ª–∏ ‚Äî TOOL: product_info <–≤–æ–ø—Ä–æ—Å>
–ü–µ—Ä–µ—Ñ—Ä–∞–∑–∏—Ä—É–π Observation, –Ω–µ –∫–æ–ø–∏—Ä—É–π –¥–æ—Å–ª–æ–≤–Ω–æ.
"""
    return initialize_agent(
        tools=[product_tool],
        llm=llm,
        agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
        memory=memory,
        verbose=True,
        agent_kwargs={"prefix": prefix},
    )


In [81]:
card   = ClientCard()
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

print("üü¢ –ì–æ—Ç–æ–≤–æ! –ù–∞–ø–∏—à–∏—Ç–µ 'exit' –¥–ª—è –≤—ã–≤–æ–¥–∞ –∫–∞—Ä—Ç–æ—á–∫–∏.")
while True:
    user = input("\n–í—ã: ")
    if user.lower() in {"exit", "quit"}:
        snapshot = card.model_dump(exclude_none=True)
        print("\nüìá –ö–∞—Ä—Ç–æ—á–∫–∞ –∫–ª–∏–µ–Ω—Ç–∞:")
        print(json.dumps(snapshot, indent=2, ensure_ascii=False))
        break

    print(f"üë§ –ö–ª–∏–µ–Ω—Ç: {user}")
    card = update_card(card, user)
    memory.chat_memory.add_user_message(user)

    stage = stage_chain.invoke({
        "card_json":   card.model_dump_json(),
        "chat_history": memory.buffer,
    })["text"].strip()
    if stage not in {"1","2","3","4"}:
        stage = "1"
    print(f"[–°—Ç–∞–¥–∏—è: {stage}]")

    agent = build_agent(card, stage, memory)
    reply = agent.invoke({"input": user})["output"]
    print(f"ü§ñ –ü—Ä–æ–¥–∞–≤–µ—Ü: {reply}")

    card = update_card(card, reply)
    memory.chat_memory.add_ai_message(reply)

üü¢ –ì–æ—Ç–æ–≤–æ! –ù–∞–ø–∏—à–∏—Ç–µ 'exit' –¥–ª—è –≤—ã–≤–æ–¥–∞ –∫–∞—Ä—Ç–æ—á–∫–∏.
üë§ –ö–ª–∏–µ–Ω—Ç: –î–æ–±—Ä—ã–π –¥–µ–Ω—å
[–°—Ç–∞–¥–∏—è: 1]


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```
Thought: Do I need to use a tool? No
AI: –î–æ–±—Ä—ã–π –¥–µ–Ω—å! –ö–∞–∫ –º–Ω–µ –∫ –≤–∞–º –æ–±—Ä–∞—â–∞—Ç—å—Å—è?
```[0m

[1m> Finished chain.[0m
ü§ñ –ü—Ä–æ–¥–∞–≤–µ—Ü: –î–æ–±—Ä—ã–π –¥–µ–Ω—å! –ö–∞–∫ –º–Ω–µ –∫ –≤–∞–º –æ–±—Ä–∞—â–∞—Ç—å—Å—è?
```
üë§ –ö–ª–∏–µ–Ω—Ç: –ò–≤–∞–Ω
[–°—Ç–∞–¥–∏—è: 1]


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```
Thought: Do I need to use a tool? No
AI: –ü—Ä–∏—è—Ç–Ω–æ –ø–æ–∑–Ω–∞–∫–æ–º–∏—Ç—å—Å—è, –ò–≤–∞–Ω! –ì–¥–µ –≤—ã –ø–ª–∞–Ω–∏—Ä—É–µ—Ç–µ —É—Å—Ç–∞–Ω–∞–≤–ª–∏–≤–∞—Ç—å –æ–±–æ—Ä—É–¥–æ–≤–∞–Ω–∏–µ –¥–ª—è –º–∞–π–Ω–∏–Ω–≥–∞?
```[0m

[1m> Finished chain.[0m
ü§ñ –ü—Ä–æ–¥–∞–≤–µ—Ü: –ü—Ä–∏—è—Ç–Ω–æ –ø–æ–∑–Ω–∞–∫–æ–º–∏—Ç—å—Å—è, –ò–≤–∞–Ω! –ì–¥–µ –≤—ã –ø–ª–∞–Ω–∏—Ä—É–µ—Ç–µ —É—Å—Ç–∞–Ω–∞–≤–ª–∏–≤–∞—Ç—å –æ–±–æ—Ä—É–¥–æ–≤–∞–Ω–∏–µ –¥–ª—è –º–∞–π–Ω–∏–Ω–≥–∞?
```
üë§ –ö–ª–∏–µ–