# Customised data

In [36]:
def make_rag_chain(model, retriever, rag_prompt = None):
    # We will use a prompt template from langchain hub.
    if not rag_prompt:
        rag_prompt = hub.pull("rlm/rag-prompt")

    # And we will use the LangChain RunnablePassthrough to add some custom processing into our chain.
    rag_chain = (
            {
                "context": RunnableLambda(get_question) | retriever | format_docs,
                "question": RunnablePassthrough()
            }
            | rag_prompt
            | model
    )

    return rag_chain


def get_question(input):
    if not input:
        return None
    elif isinstance(input,str):
        return input
    elif isinstance(input,dict) and 'question' in input:
        return input['question']
    elif isinstance(input,BaseMessage):
        return input.content
    else:
        raise Exception("string or dict with 'question' key expected as RAG chain input.")

In [1]:
import pandas as pd
import os

from langchain_core.documents.base import Document
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.llms import HuggingFaceEndpoint
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain



# Set up the Hugging Face Hub API token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

# Define the repository ID for the Gemma 2b model
repo_id = "google/gemma-2b-it"

llm = HuggingFaceEndpoint(
    repo_id=repo_id, max_length=1024, temperature=1.5
)


df = pd.read_excel("/home/fish/Documents/optymize.xlsx")
data_list = df.values.ravel().tolist()
document_list = []

for content in data_list:
    document = Document(content=content, page_content=content)
    document_list.append(document)
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(document_list)
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma.from_documents(docs, embedding_function)

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
  from .autonotebook import tqdm as notebook_tqdm


Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.


Created a chunk of size 635, which is longer than the specified 500
Created a chunk of size 868, which is longer than the specified 500
Created a chunk of size 606, which is longer than the specified 500
Created a chunk of size 1257, which is longer than the specified 500
Created a chunk of size 533, which is longer than the specified 500


Token is valid (permission: write).
Your token has been saved to /home/fish/.cache/huggingface/token
Login successful


In [2]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain.chains import RetrievalQA

retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 4, 'fetch_k': 20})
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
)

In [37]:
output_parser = StrOutputParser()
rag_chain = make_rag_chain(llm, retriever) | output_parser

In [3]:
questions = [
        "what is Optymize?",
        "how can i deposite coin on Optymize?",
        "what is Optymize's twitter?",
        "what is gOPZ tokens?",
        "what is Optymize tokenomics?",
        "what is Optymize details tokenomics?",
        "Optymize Vault Model – How does it works?, detail explaination",
        "Optymize Vault Model – How does it works?"
        ]
for q in questions:
    print("\n--- QUESTION: ", q)
    print("* Ans:\n", rag_chain.invoke(q))


--- QUESTION:  what is Optymize?
* Ans:
  Optymize allows users to stake their tokens to earn return in the form of OPZ (Optimizable Z tokens) plus an opportunity to earn common ownership rewards via gOPZ. This means the number of Optymize Vaults that can be created is limited by capital and user stake rates to ensure the network remains sustainable.

--- QUESTION:  how can i deposite coin on Optymize?
* Ans:
  Deposit by staking your tokens into the Optymize Vault, bearing the risk of a Security Incident. Each token can be deposited for the purpose of earning both Opz revenues and tiered APY rewards related to liquidity pool activities on the exchange.

--- QUESTION:  what is Optymize's twitter?
* Ans:
  Optymize's Twitter handle is @Optymize_xyz.

--- QUESTION:  what is gOPZ tokens?
* Ans:
  The gOPZ tokens can be earned by staking OPZ tokens with us. The length of lockup depends on the user’s preference, but it generally starts from 60 days and can go as far as 24 months.

--- QUES

In [61]:
question = """The Optymize Protocol is a first-of-its-kind multi-blockchain solution that combines both yield enhancement and risk mitigation for crypto assets.
Explain more on the keywords here"""

template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

llm_chain = LLMChain(prompt=prompt, llm=llm)
llm_chain.invoke(question)

{'question': 'The Optymize Protocol is a first-of-its-kind multi-blockchain solution that combines both yield enhancement and risk mitigation for crypto assets.\nExplain more on the keywords here',
 'text': '\n\n- **Multi-blockchain**: It highlights that the protocol operates across multiple blockchains, opening up opportunities for broader reach and cross-asset market participation.\n\n- **Yield Enhancement**: This means finding and securing opportunities to earn additional income from crypto assets, often through staking or lending processes.\n\n- **Risk Mitigation**: It suggests strategies and measures taken to minimize potential financial losses and protect capital against market volatility.\n\n- ** crypto assets**: This refers to various cryptocurrencies and other crypto-based assets, such as NFTs or DeFi protocols.'}

In [60]:
qs = [{'question': question}]
res = llm_chain.generate(qs)
print(res.generations)

[[Generation(text='\n\n- **Multi-blockchain**: It highlights that the protocol operates across multiple blockchains, opening up opportunities for broader reach and cross-asset market participation.\n\n- **Yield Enhancement**: This means finding and securing opportunities to earn additional income from crypto assets, often through staking or lending processes.\n\n- **Risk Mitigation**: It suggests strategies and measures taken to minimize potential financial losses and protect capital against market volatility.\n\n- ** crypto assets**: This refers to various cryptocurrencies and other crypto-based assets, such as NFTs or DeFi protocols.')]]
