In [64]:
from pathlib import Path
import os, shutil, re
from dotenv import load_dotenv

from langchain.text_splitter            import CharacterTextSplitter
from langchain.embeddings               import OpenAIEmbeddings
from langchain_community.vectorstores   import Chroma
from langchain.llms                     import OpenAI
from langchain.schema                   import Document
from langchain.memory                   import ConversationBufferMemory
from langchain.agents                   import Tool, AgentType, initialize_agent
from langchain.prompts                  import PromptTemplate
from langchain.chains                   import LLMChain
from langchain.retrievers               import BM25Retriever, EnsembleRetriever

import chromadb
from chromadb.config import Settings
from rapidfuzz import fuzz               


In [65]:
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

persist_dir = str(Path.home() / "chroma_asic_idx")
if os.path.exists(persist_dir):
    shutil.rmtree(persist_dir)           # —á–∏—Å—Ç—ã–π –±–∏–ª–¥


In [66]:
product_texts = [
    # ‚Äî‚Äî‚Äî S19 Pro ‚Äî‚Äî‚Äî
    """
Bitmain Antminer S19 Pro 110 TH/s
–ê–ª–≥–æ—Ä–∏—Ç–º SHA-256 (Bitcoin/BCH)
110 TH/s ¬±3 % ‚Ä¢ 3250 –í—Ç ¬±5 % ‚Ä¢ 29,5 J/TH
–®—É–º 75 –¥–ë ‚Ä¢ 400√ó195√ó290 –º–º, 13,2 –∫–≥
–¶–µ–Ω–∞ 199 000 ‚ÇΩ  (—Å–∫–∏–¥–∫–∞ –æ—Ç 3 —à—Ç)
–ì–∞—Ä–∞–Ω—Ç–∏—è 12 –º–µ—Å. –æ—Ç Bitmain
–î–æ—Å—Ç–∞–≤–∫–∞ –°–î–≠–ö –†–§ / —Å–∞–º–æ–≤—ã–≤–æ–∑ (–ú–æ—Å–∫–≤–∞)
    """,
    # ‚Äî‚Äî‚Äî M30S++ ‚Äî‚Äî‚Äî
    """
MicroBT Whatsminer M30S++ 112 TH/s
SHA-256 ‚Ä¢ 112 TH/s ¬±2 % ‚Ä¢ 3472 –í—Ç ‚Ä¢ 31 J/TH
–°–æ—Å—Ç–æ—è–Ω–∏–µ –ë–£ 2023, 1000 —á ‚Ä¢ –≥–∞—Ä–∞–Ω—Ç–∏—è 3 –º–µ—Å
–¶–µ–Ω–∞ 128 000 ‚ÇΩ ‚Ä¢ —Å–∫–∏–¥–∫–∞ 5 % ‚â• 5 —à—Ç
–û–ø–ª–∞—Ç–∞ BTC, –°–±–µ—Ä, Tinkoff
–î–æ—Å—Ç–∞–≤–∫–∞ Boxberry, –ü–≠–ö
    """,
    # ‚Äî‚Äî‚Äî iPollo V1 Mini ‚Äî‚Äî‚Äî
    """
iPollo V1 Mini ETC 300 MH/s (Wi-Fi)
EtHash ETC ‚Ä¢ 300 MH/s ¬±10 % ‚Ä¢ 240 –í—Ç
–®—É–º 50 –¥–ë ‚Ä¢ 178√ó143√ó90 –º–º, 2,1 –∫–≥
–¶–µ–Ω–∞ 38 500 ‚ÇΩ ‚Ä¢ –≥–∞—Ä–∞–Ω—Ç–∏—è 6 –º–µ—Å iPollo
–û–ø–ª–∞—Ç–∞ USDT (TRC-20), –∫–∞—Ä—Ç–∞ –†–§
–î–æ—Å—Ç–∞–≤–∫–∞ EMS, Boxberry ‚Äî –≤ –¥–µ–Ω—å –æ–ø–ª–∞—Ç—ã
    """,
]
documents = [Document(page_content=txt.strip()) for txt in product_texts]


In [67]:
splitter = CharacterTextSplitter(
    separator="\n", chunk_size=800, chunk_overlap=150
)
chunks = splitter.split_documents(documents)
print("–ß–∞–Ω–∫–æ–≤:", len(chunks))

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vectorstore = Chroma.from_documents(
    chunks,
    embedding        = embeddings,
    collection_name  = "asic_store",
    persist_directory= persist_dir,
    client_settings  = Settings(anonymized_telemetry=False),
)
print("–ó–∞–ø–∏—Å–∞–Ω–æ –≤ Chroma:", vectorstore._collection.count())


–ß–∞–Ω–∫–æ–≤: 3
–ó–∞–ø–∏—Å–∞–Ω–æ –≤ Chroma: 15


In [68]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.schema import BaseRetriever, Document
from rapidfuzz import fuzz
from pydantic import PrivateAttr

bm25 = BM25Retriever.from_documents(documents, k=3)

class FuzzyRetriever(BaseRetriever):
    """–ü—Ä–æ—Å—Ç–æ–π retriever, –∏—â—É—â–∏–π –ø–æ –Ω–µ—á–µ—Ç–∫–æ–º—É —Å–æ–≤–ø–∞–¥–µ–Ω–∏—é substrings."""
    k: int = 3
    _docs: list[Document] = PrivateAttr(default_factory=list)

    def __init__(self, docs, k: int = 3, **kwargs):
        super().__init__(k=k, **kwargs)
        self._docs = docs

    def _get_relevant_documents(self, query: str, *, run_manager=None, **kwargs):
        scored = sorted(
            self._docs,
            key=lambda d: fuzz.partial_ratio(query.lower(), d.page_content.lower()),
            reverse=True,
        )
        return scored[: self.k]

    async def _aget_relevant_documents(self, query: str, *, run_manager=None, **kwargs):
        return self._get_relevant_documents(query)

fuzzy_retr = FuzzyRetriever(docs=documents, k=3)

vec_retr = vectorstore.as_retriever(search_kwargs={"k": 8})

hybrid_retriever = EnsembleRetriever(
    retrievers=[vec_retr, bm25, fuzzy_retr],
    weights=[0.5, 0.35, 0.15],
)


In [69]:
llm = OpenAI(temperature=0.0)


In [70]:
def product_info(question: str) -> str:
    q = re.sub(r"[^\w\s+]", " ", question.lower()).strip()
    docs = hybrid_retriever.get_relevant_documents(q)
    if not docs:
        return "–ò–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –Ω–µ –Ω–∞–π–¥–µ–Ω–∞."

    context = "\n---\n".join(d.page_content for d in docs)
    prompt = (
        "–¢—ã –∫–æ–Ω—Å—É–ª—å—Ç–∞–Ω—Ç –ø–æ ASIC-–º–∞–π–Ω–µ—Ä–∞–º.\n"
        "–û—Ç–≤–µ—á–∞–π –¢–û–õ–¨–ö–û —Ñ–∞–∫—Ç–∞–º–∏ –∏–∑ –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞ –Ω–∏–∂–µ.\n"
        "–û—Ç–≤–µ—á–∞–π —Ç–æ–ª—å–∫–æ –Ω–∞ —Ä—É—Å—Å–∫–æ–º"
        "–ï—Å–ª–∏ –æ—Ç–≤–µ—Ç–∞ –Ω–µ—Ç ‚Äî —Å–∫–∞–∂–∏: ¬´–ò–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –Ω–µ –Ω–∞–π–¥–µ–Ω–∞.¬ª\n\n"
        f"–ö–æ–Ω—Ç–µ–∫—Å—Ç:\n{context}\n\n–í–æ–ø—Ä–æ—Å: {question}\n–û—Ç–≤–µ—Ç:"
    )
    return llm.invoke(prompt).strip()

product_tool = Tool(
    name        = "product_info",
    func        = product_info,
    description = "–í–æ–∑–≤—Ä–∞—â–∞–µ—Ç —Ñ–∞–∫—Ç—ã –æ –º–∞–π–Ω–µ—Ä–∞—Ö (—Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫–∏, —Ü–µ–Ω–∞, –Ω–∞–ª–∏—á–∏–µ)",
)


In [71]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

agent = initialize_agent(
    tools   = [product_tool],
    llm     = llm,
    agent   = AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
    memory  = memory,
    verbose = True,
)


In [72]:
print("üü¢ –ì–æ—Ç–æ–≤!  –ü–∏—à–∏—Ç–µ –≤–æ–ø—Ä–æ—Å—ã, 'exit' ‚Äî –≤—ã–π—Ç–∏.")
while True:
    user = input("\n–í—ã: ")
    if user.lower() in {"exit", "quit"}: break
    print("ü§ñ:", agent.invoke({"input": user})["output"])


üü¢ –ì–æ—Ç–æ–≤!  –ü–∏—à–∏—Ç–µ –≤–æ–ø—Ä–æ—Å—ã, 'exit' ‚Äî –≤—ã–π—Ç–∏.


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: product_info
Action Input: models[0m
Observation: [36;1m[1;3miPollo V1 Mini ETC 300 MH/s (Wi-Fi), Bitmain Antminer S19 Pro 110 TH/s, MicroBT Whatsminer M30S++ 112 TH/s[0m
Thought:[32;1m[1;3m Do I need to use a tool? No
AI: –Ø –ø—Ä–æ–¥–∞—é —Ä–∞–∑–ª–∏—á–Ω—ã–µ –º–æ–¥–µ–ª–∏ –∞—Å–∏–∫–æ–≤, –≤–∫–ª—é—á–∞—è iPollo V1 Mini ETC 300 MH/s (Wi-Fi), Bitmain Antminer S19 Pro 110 TH/s –∏ MicroBT Whatsminer M30S++ 112 TH/s. –ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –≤–∞—Å –∏–Ω—Ç–µ—Ä–µ—Å—É–µ—Ç?[0m

[1m> Finished chain.[0m
ü§ñ: –Ø –ø—Ä–æ–¥–∞—é —Ä–∞–∑–ª–∏—á–Ω—ã–µ –º–æ–¥–µ–ª–∏ –∞—Å–∏–∫–æ–≤, –≤–∫–ª—é—á–∞—è iPollo V1 Mini ETC 300 MH/s (Wi-Fi), Bitmain Antminer S19 Pro 110 TH/s –∏ MicroBT Whatsminer M30S++ 112 TH/s. –ö–∞–∫–∞—è –º–æ–¥–µ–ª—å –≤–∞—Å –∏–Ω—Ç–µ—Ä–µ—Å—É–µ—Ç?


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: Do I n