In [42]:
from pathlib import Path
import os, shutil, json, re, time
from typing import Optional, List

from dotenv import load_dotenv
from rapidfuzz import fuzz

from pydantic import BaseModel, Field, ValidationError

from langchain.llms              import OpenAI
from langchain.embeddings        import OpenAIEmbeddings
from langchain.text_splitter     import CharacterTextSplitter
from langchain.schema            import Document, BaseRetriever
from langchain_community.vectorstores import Chroma
from langchain.retrievers        import BM25Retriever, EnsembleRetriever
from langchain.memory            import ConversationBufferMemory
from langchain.agents            import Tool, AgentType, initialize_agent
from langchain.prompts           import PromptTemplate
from langchain.chains            import LLMChain
from langchain.output_parsers    import PydanticOutputParser
from langchain_core.exceptions   import OutputParserException     
import chromadb
from chromadb.config import Settings


In [43]:
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

persist_dir = str(Path.home() / "chroma_asic_idx")
if os.path.exists(persist_dir):
    shutil.rmtree(persist_dir)


In [44]:
product_texts = [
    # ‚Äî‚Äî‚Äî S19 Pro ‚Äî‚Äî‚Äî
    """
Bitmain Antminer S19 Pro 110 TH/s
–ê–ª–≥–æ—Ä–∏—Ç–º SHA-256 (Bitcoin/BCH)
110 TH/s ¬±3 % ‚Ä¢ 3250 –í—Ç ¬±5 % ‚Ä¢ 29,5 J/TH
–®—É–º 75 –¥–ë ‚Ä¢ 400√ó195√ó290 –º–º, 13,2 –∫–≥
–¶–µ–Ω–∞ 199 000 ‚ÇΩ  (—Å–∫–∏–¥–∫–∞ –æ—Ç 3 —à—Ç)
–ì–∞—Ä–∞–Ω—Ç–∏—è 12 –º–µ—Å. –æ—Ç Bitmain
–î–æ—Å—Ç–∞–≤–∫–∞ –°–î–≠–ö –†–§ / —Å–∞–º–æ–≤—ã–≤–æ–∑ (–ú–æ—Å–∫–≤–∞)
    """,
    # ‚Äî‚Äî‚Äî M30S++ ‚Äî‚Äî‚Äî
    """
MicroBT Whatsminer M30S++ 112 TH/s
SHA-256 ‚Ä¢ 112 TH/s ¬±2 % ‚Ä¢ 3472 –í—Ç ‚Ä¢ 31 J/TH
–°–æ—Å—Ç–æ—è–Ω–∏–µ –ë–£ 2023, 1000 —á ‚Ä¢ –≥–∞—Ä–∞–Ω—Ç–∏—è 3 –º–µ—Å
–¶–µ–Ω–∞ 128 000 ‚ÇΩ ‚Ä¢ —Å–∫–∏–¥–∫–∞ 5 % ‚â• 5 —à—Ç
–û–ø–ª–∞—Ç–∞ BTC, –°–±–µ—Ä, Tinkoff
–î–æ—Å—Ç–∞–≤–∫–∞ Boxberry, –ü–≠–ö
    """,
    # ‚Äî‚Äî‚Äî iPollo V1 Mini ‚Äî‚Äî‚Äî
    """
iPollo V1 Mini ETC 300 MH/s (Wi-Fi)
EtHash ETC ‚Ä¢ 300 MH/s ¬±10 % ‚Ä¢ 240 –í—Ç
–®—É–º 50 –¥–ë ‚Ä¢ 178√ó143√ó90 –º–º, 2,1 –∫–≥
–¶–µ–Ω–∞ 38 500 ‚ÇΩ ‚Ä¢ –≥–∞—Ä–∞–Ω—Ç–∏—è 6 –º–µ—Å iPollo
–û–ø–ª–∞—Ç–∞ USDT (TRC-20), –∫–∞—Ä—Ç–∞ –†–§
–î–æ—Å—Ç–∞–≤–∫–∞ EMS, Boxberry ‚Äî –≤ –¥–µ–Ω—å –æ–ø–ª–∞—Ç—ã
    """,
]
documents = [Document(page_content=t.strip()) for t in product_texts]


In [45]:
splitter  = CharacterTextSplitter(separator="\n", chunk_size=800, chunk_overlap=150)
chunks    = splitter.split_documents(documents)

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vectorstore = Chroma.from_documents(
    chunks,
    embedding         = embeddings,
    collection_name   = "asic_store",
    persist_directory = persist_dir,
    client_settings   = Settings(anonymized_telemetry=False),
)

In [66]:
bm25 = BM25Retriever.from_documents(documents, k=3)

class FuzzyRetriever(BaseRetriever):
    model_config = {"extra": "allow"}
    _docs: List[Document] = bm25.docs
    _k:   int             = 3
    def _get_relevant_documents(self, query, *, run_manager=None, **kw):
        ranked = sorted(
            self._docs,
            key=lambda d: fuzz.partial_ratio(query.lower(), d.page_content.lower()),
            reverse=True,
        )
        return ranked[: self._k]
    async def _aget_relevant_documents(self, query, *, run_manager=None, **kw):
        return self._get_relevant_documents(query)

hybrid_retriever = EnsembleRetriever(
    retrievers=[
        vectorstore.as_retriever(search_kwargs={"k": 8}),
        bm25,
        FuzzyRetriever(),
    ],
    weights=[0.5, 0.35, 0.15],
)


In [67]:
class CustomerCard(BaseModel):
    name:        Optional[str] = Field(None, description="–ò–º—è –∫–ª–∏–µ–Ω—Ç–∞")
    telegram:    Optional[str] = Field(None, description="–¢–µ–ª–µ–≥—Ä–∞–º @...")
    phone:       Optional[str] = Field(None, description="–¢–µ–ª–µ—Ñ–æ–Ω")

    location:    Optional[str] = Field(None, description="–ì–æ—Ä–æ–¥/—Ä–µ–≥–∏–æ–Ω")
    entity_type: Optional[str] = Field(None, description="–§–∏–∑/–Æ—Ä –ª–∏—Ü–æ, –∫–æ–º–ø–∞–Ω–∏—è")
    mining_years:Optional[int] = Field(None, description="–û–ø—ã—Ç –º–∞–π–Ω–∏–Ω–≥–∞, –≥–æ–¥—ã")
    rigs_owned:  Optional[int] = Field(None, description="–°–∫–æ–ª—å–∫–æ –∞—Å–∏–∫–æ–≤ —É–∂–µ –µ—Å—Ç—å")
    rigs_plan:   Optional[int] = Field(None, description="–°–∫–æ–ª—å–∫–æ –ø–ª–∞–Ω–∏—Ä—É–µ—Ç –∫—É–ø–∏—Ç—å")

    has_asic:    Optional[bool]  = None
    electricity_price: Optional[float] = Field(None, description="‚ÇΩ/–∫–í—Ç‚ãÖ—á")
    budget:      Optional[int]  = Field(None, description="–ë—é–¥–∂–µ—Ç ‚ÇΩ")
    host_choice: Optional[str]  = Field(None, description="–†–∞–∑–º–µ—â–µ–Ω–∏–µ: —Å–≤–æ–π —Ö–æ—Å—Ç–∏–Ω–≥ / –Ω–∞—à")
    free_power:  Optional[int]  = Field(None, description="–°–≤–æ–±–æ–¥–Ω—ã–µ –∫–í—Ç")

    knowledge_lvl:        Optional[int] = Field(None, description="1-10 –æ—Ü–µ–Ω–∫–∞ –∑–Ω–∞–Ω–∏–π")
    financial_potential:  Optional[int] = Field(None, description="1-10 –ø–æ—Ç–µ–Ω—Ü–∏–∞–ª")
    stage_closed:         Optional[bool] = None  # —Å–¥–µ–ª–∫–∞ –∑–∞–∫—Ä—ã—Ç–∞?

card_parser = PydanticOutputParser(pydantic_object=CustomerCard)
card_prompt  = PromptTemplate(
    template=(
        "–ó–∞–ø–æ–ª–Ω–∏ –∫–∞—Ä—Ç–æ—á–∫—É –∫–ª–∏–µ–Ω—Ç–∞ –ø–æ –Ω–æ–≤–æ–π —Ä–µ–ø–ª–∏–∫–µ.\n"
        "–¢–µ–∫—É—â–∏–π JSON: {current_json}\n\n"
        "–†–µ–ø–ª–∏–∫–∞: \"{utterance}\"\n\n"
        "–û—Ç–≤–µ—Ç—å –¢–û–õ–¨–ö–û JSON:\n{fmt}"
    ),
    input_variables=["current_json", "utterance", "fmt"],
)
card_chain = LLMChain(llm=llm, prompt=card_prompt, output_parser=card_parser)

client_card = CustomerCard()

def update_card(cur_json: str, utterance: str, retry=2) -> CustomerCard:
    for _ in range(retry):
        try:
            raw = card_chain.invoke({
                "current_json": cur_json,
                "utterance":    utterance,
                "fmt": card_parser.get_format_instructions(),
            })["text"]
            if isinstance(raw, CustomerCard):
                return raw
            return CustomerCard(**raw) if isinstance(raw, dict) else CustomerCard.parse_raw(raw)
        except (OutputParserException, ValidationError):
            time.sleep(0.3)
    return CustomerCard.parse_raw(cur_json)

def classify_financial(card: CustomerCard) -> None:
    if card.financial_potential:
        return
    rigs = (card.rigs_plan or 0) + (card.rigs_owned or 0)
    if rigs >= 3000:  fp = 10
    elif rigs >= 1000: fp = 9
    elif rigs >= 500:  fp = 8
    elif rigs >= 100:  fp = 7
    elif rigs >= 50:   fp = 6
    elif rigs >= 20:   fp = 5
    elif rigs >= 10:   fp = 4
    elif rigs >= 7:    fp = 3
    elif rigs >= 4:    fp = 2
    elif rigs >= 1:    fp = 1
    else:              fp = None
    card.financial_potential = fp


In [69]:
from enum import Enum
from langchain.output_parsers import EnumOutputParser

class Intent(str, Enum):
    list   = "list"   
    detail = "detail"  
    budget = "budget" 

intent_parser = EnumOutputParser(enum=Intent)

intent_prompt = PromptTemplate(
    template=(
        "–ö –∫–∞–∫–æ–π –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ –æ—Ç–Ω–æ—Å–∏—Ç—Å—è –∑–∞–ø—Ä–æ—Å –∫–ª–∏–µ–Ω—Ç–∞?\n"
        "–í–∞—Ä–∏–∞–Ω—Ç—ã: list | detail | budget\n\n"
        "–ó–∞–ø—Ä–æ—Å: \"{q}\"\n\n"
        "{format_instr}"         
    ),
    input_variables=["q", "format_instr"],
)

intent_chain = LLMChain(
    llm=llm,
    prompt=intent_prompt,
    output_parser=intent_parser,
)

In [71]:
def product_info(question: str) -> str:
    intent = intent_chain.invoke({
        "q": question,
        "format_instr": intent_parser.get_format_instructions(),  
    })["text"].strip()

    if intent == "list":
        return "–í –Ω–∞–ª–∏—á–∏–∏:\n" + "\n".join(d.page_content.splitlines()[0] for d in documents)

    if intent == "budget":
        m = re.search(r"(\d[\d\s]{3,})", question)
        if not m:
            return "–£—Ç–æ—á–Ω–∏—Ç–µ —Ü–∏—Ñ—Ä—É –±—é–¥–∂–µ—Ç–∞ –≤ —Ä—É–±–ª—è—Ö."
        budget = int(m.group(1).replace(" ", ""))
        fits = [d for d in documents
                if (p := re.search(r"–¶–µ–Ω–∞\s+(\d[\d\s]+)", d.page_content))
                and int(p.group(1).replace(" ", "")) <= budget]
        return "–ü–æ–¥—Ö–æ–¥–∏—Ç:\n" + "\n".join(d.page_content.splitlines()[0] for d in fits) if fits else "–ó–∞ —É–∫–∞–∑–∞–Ω–Ω—ã–π –±—é–¥–∂–µ—Ç –º–æ–¥–µ–ª–µ–π –Ω–µ—Ç."

    docs = hybrid_retriever.invoke(question)
    if not docs:
        return "–ò–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –Ω–µ –Ω–∞–π–¥–µ–Ω–∞."
    ctx = "\n---\n".join(d.page_content for d in docs)
    return llm.invoke(f"–û—Ç–≤–µ—Ç—å —Ñ–∞–∫—Ç–∞–º–∏:\n{ctx}\n\n–í–æ–ø—Ä–æ—Å: {question}\n–û—Ç–≤–µ—Ç:")


In [72]:
stage_prompt = PromptTemplate.from_template(
    """–í—ã–±–µ—Ä–∏ —Å—Ç–∞–¥–∏—é: 1-–∏–º—è/–ª–æ–∫–∞—Ü–∏—è 2-–ø–æ—Ç—Ä–µ–±–Ω–æ—Å—Ç–∏ 3-–ø—Ä–µ–∑–µ–Ω—Ç–∞—Ü–∏—è 4-–∑–∞–∫—Ä—ã—Ç–∏–µ.
–ö–∞—Ä—Ç–∞: {facts}
–ò—Å—Ç–æ—Ä–∏—è: {chat_history}
–¢–æ–ª—å–∫–æ —Ü–∏—Ñ—Ä–∞:""")
stage_chain = LLMChain(llm=llm, prompt=stage_prompt)

def missing(card: CustomerCard, st: str) -> str:
    need=[]
    if st=="1":
        if not card.name: need.append("–∏–º—è")
        if not card.location: need.append("–ª–æ–∫–∞—Ü–∏—è/—Ä–∞–∑–º–µ—â–µ–Ω–∏–µ")
    elif st=="2":
        if card.has_asic is None: need.append("–µ—Å—Ç—å –ª–∏ ASIC-—ã")
        if card.has_asic and card.electricity_price is None: need.append("—Ü–µ–Ω–∞ —ç–ª–µ–∫—Ç—Ä–∏—á–µ—Å—Ç–≤–∞")
        if (card.has_asic is False) and card.budget is None: need.append("–±—é–¥–∂–µ—Ç –∏–ª–∏ –∏–Ω—Ç–µ—Ä–µ—Å—É—é—â–∏–µ –º–æ–¥–µ–ª–∏")
    elif st=="4" and card.contact is None:
        need.append("–∫–æ–Ω—Ç–∞–∫—Ç –∏ —É–¥–æ–±–Ω–æ–µ –≤—Ä–µ–º—è –∑–≤–æ–Ω–∫–∞")
    return ", ".join(need)

def build_agent(card: CustomerCard, st: str, memory):
    ask = missing(card, st)
    prefix = f"""
–¢—ã ‚Äî –¥—Ä—É–∂–µ–ª—é–±–Ω—ã–π –ø—Ä–æ–¥–∞–≤–µ—Ü ASIC-–º–∞–π–Ω–µ—Ä–æ–≤.

–¢–µ–∫—É—â–∏–π —ç—Ç–∞–ø: {st}.
{('–í—ã—è—Å–Ω–∏: '+ask) if ask else ''}

* –í–æ–ø—Ä–æ—Å—ã –æ —Ç–æ–≤–∞—Ä–∞—Ö –∑–∞–¥–∞–≤–∞–π —á–µ—Ä–µ–∑ TOOL: product_info <–≤–æ–ø—Ä–æ—Å>.
* Observation –º–æ–∂–Ω–æ –ø–µ—Ä–µ—Å–∫–∞–∑—ã–≤–∞—Ç—å —Å–≤–æ–∏–º–∏ —Å–ª–æ–≤–∞–º–∏ –ª–∏–±–æ —á–∞—Å—Ç–∏—á–Ω–æ,
    —Ç–æ–ª—å–∫–æ –µ—Å–ª–∏ –æ–Ω–æ –¥–æ–±–∞–≤–ª—è–µ—Ç –ø–æ–ª—å–∑—É –∫–ª–∏–µ–Ω—Ç—É.
* –ù–µ –ø–æ–∫–∞–∑—ã–≤–∞–π —Å–∏—Å—Ç–µ–º–Ω—ã—Ö —Ç–µ–≥–æ–≤ TOOL / Observation.
* –ó–∞–¥–∞–≤–∞–π –æ—Ç–∫—Ä—ã—Ç—ã–µ –≤–æ–ø—Ä–æ—Å—ã –∏–∑ —Å–∫—Ä–∏–ø—Ç–∞ (–æ–ø—ã—Ç, –ø—Ä–æ–±–ª–µ–º—ã, —Ö–æ—Å—Ç–∏–Ω–≥, –±—é–¥–∂–µ—Ç).
"""
    return initialize_agent(
        tools=[product_tool],
        llm=llm,
        agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
        memory=memory,
        verbose=True,
        agent_kwargs={"prefix": prefix},
    )


In [73]:
client_card = CustomerCard()
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

print("üü¢ –ì–æ—Ç–æ–≤!  'exit' ‚Äî –≤—ã–≤–µ—Å—Ç–∏ –∫–∞—Ä—Ç–æ—á–∫—É.")
while True:
    user = input("\n–í—ã: ")
    if user.lower() in {"exit", "quit"}:
        print("\nüìá –ö–∞—Ä—Ç–æ—á–∫–∞ –∫–ª–∏–µ–Ω—Ç–∞:")
        print(json.dumps(client_card.dict(exclude_none=True, ensure_ascii=False), indent=2))
        break

    print(f"üë§ –ö–ª–∏–µ–Ω—Ç: {user}")

    client_card = update_card(client_card.model_dump_json(), user)
    classify_financial(client_card)
    memory.chat_memory.add_user_message(user)

    # ‚ûú —Å—Ç–∞–¥–∏—è
    stage = stage_chain.invoke({
        "facts": client_card.model_dump_json(),
        "chat_history": memory.buffer,
    })["text"].strip()
    if stage not in {"1","2","3","4"}: stage="1"
    print(f"[–°—Ç–∞–¥–∏—è: {stage}]")

    agent = build_agent(client_card, stage, memory)

    reply = agent.invoke({"input": user})["output"]
    print(f"ü§ñ –ü—Ä–æ–¥–∞–≤–µ—Ü: {reply}")
    
    client_card = update_card(client_card.model_dump_json(), reply)
    classify_financial(client_card)
    memory.chat_memory.add_ai_message(reply)


üü¢ –ì–æ—Ç–æ–≤!  'exit' ‚Äî –≤—ã–≤–µ—Å—Ç–∏ –∫–∞—Ä—Ç–æ—á–∫—É.
üë§ –ö–ª–∏–µ–Ω—Ç: –ø—Ä–∏–≤–µ—Ç


/var/folders/dx/2j_jz8k12tn9dvfk_ny7sxr40000gn/T/ipykernel_44325/1260107577.py:56: PydanticDeprecatedSince20: The `parse_raw` method is deprecated; if your data is JSON use `model_validate_json`, otherwise load the data then use `model_validate` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  return CustomerCard.parse_raw(cur_json)


[–°—Ç–∞–¥–∏—è: 1]


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? No
AI: –ü—Ä–∏–≤–µ—Ç! –Ø –º–æ–≥—É –ø–æ–º–æ—á—å —Ç–µ–±–µ —Å –≤—ã–±–æ—Ä–æ–º ASIC-–º–∞–π–Ω–µ—Ä–∞. –ö–∞–∫–æ–π —É —Ç–µ–±—è –±—é–¥–∂–µ—Ç?[0m

[1m> Finished chain.[0m
ü§ñ –ü—Ä–æ–¥–∞–≤–µ—Ü: –ü—Ä–∏–≤–µ—Ç! –Ø –º–æ–≥—É –ø–æ–º–æ—á—å —Ç–µ–±–µ —Å –≤—ã–±–æ—Ä–æ–º ASIC-–º–∞–π–Ω–µ—Ä–∞. –ö–∞–∫–æ–π —É —Ç–µ–±—è –±—é–¥–∂–µ—Ç?
üë§ –ö–ª–∏–µ–Ω—Ç: –ø–æ–∫–∞ –¥–∞–≤–∞–π –±–µ–∑ –±—é–¥–∂–µ—Ç–∞
[–°—Ç–∞–¥–∏—è: 4]


AttributeError: 'CustomerCard' object has no attribute 'contact'