# Load libraries

In [174]:
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser,JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate,PromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langgraph.graph import END, StateGraph
from langchain_community.embeddings import OllamaEmbeddings
import nomic
from langchain_nomic.embeddings import NomicEmbeddings
from chromadb.errors import InvalidDimensionException
from langchain_community.tools.tavily_search import TavilySearchResults

# Login for all used APIs

In [175]:
import os
import getpass
nomic.cli.login(token="")
os.environ["TAVILY_API_KEY"] = ""
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = "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"

# Read & process data from web

In [176]:
#fetch data
urls = [
     "https://deriv.com/"
    "https://deriv.com/trade-types/cfds/",
    "https://deriv.com/trade-types/options/digital-options/up-and-down/",
    "https://deriv.com/trade-types/options/digital-options/digits/",
    "https://deriv.com/trade-types/options/digital-options/in-out/",
    "https://deriv.com/trade-types/options/digital-options/reset-call-reset-put/",
    "https://deriv.com/trade-types/options/digital-options/high-low-ticks/",
    "https://deriv.com/trade-types/options/digital-options/touch-no-touch/",
    "https://deriv.com/trade-types/options/digital-options/asians/",
    "https://deriv.com/trade-types/options/digital-options/only-ups-only-downs/",
    "https://deriv.com/trade-types/options/digital-options/lookbacks/",
    "https://deriv.com/trade-types/options/accumulator-options/",
    "https://deriv.com/trade-types/options/vanilla-options/",
    "https://deriv.com/trade-types/options/turbo-options/",
    "https://deriv.com/trade-types/multiplier/",
    "https://deriv.com/dmt5/",
    "https://deriv.com/derivx/",
    "https://deriv.com/deriv-ctrader/",
    "https://deriv.com/dtrader/",
    "https://deriv.com/deriv-go/",
    "https://deriv.com/dbot/",
    "https://deriv.com/markets/forex/",
    "https://deriv.com/markets/synthetic/",
    "https://deriv.com/markets/stock/",
    "https://deriv.com/markets/exchange-traded-funds/",
    "https://deriv.com/markets/cryptocurrencies/",
    "https://deriv.com/markets/commodities/",
    "https://deriv.com/who-we-are/",
    "https://deriv.com/why-choose-us/",
    "https://deriv.com/partners/",
]


In [177]:
### Index
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=200, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

In [178]:
nom_emb = NomicEmbeddings(model="nomic-embed-text-v1")
try:
    vectorstore = Chroma.from_documents(documents=doc_splits, embedding=nom_emb)
except InvalidDimensionException:
    Chroma().delete_collection()
    vectorstore = Chroma.from_documents(documents=doc_splits, embedding=nom_emb)

# Add to vectorDB
retriever = vectorstore.as_retriever() #index

# Retrieval Grader

In [179]:
### Retrieval Grader
local_llm="llama3"

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a professional friendly customer support agent with the task of grading 
    and assessing the relevance of a retrieved document to a user question. \n
    keep in mind that in deriv: Trade types are CFD, Options, Multipliers. CFDs (Contract for differences) Trade with leverage, unbeatable spreads, and fast execution on the widest range of markets. Options Trade diverse vanilla and exotic options across platforms and markets without risking more than your initial stake. Multipliers Trade on global financial markets and multiply your potential profit without losing more than your stake. Trading platforms are Deriv MT5, Deriv X,Deriv cTrader,SmartTrader, Deriv Trader,Deriv GO,Deriv Bot,Binary Bot. Trading assets and markets are : Forex,Derived indices,Stocks & indices,Commodities,Cryptocurrencies,Exchange-traded funds (ETFs)
    If the document contains keywords related to the user question and answers their question, 
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous and useless retrievals to users. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explanation.
     <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "document"],
)

#create the chain
retrieval_grader = prompt | llm | JsonOutputParser()

In [180]:
#Test the LLM unit
question = "cfd"
docs = retriever.invoke(question)
print(docs)
retrieval_grader.invoke({"document": docs, "question": question})


[Document(page_content='complex derivative products that carry a significant risk of potential loss. CFDs are complex instruments with a high risk of losing money rapidly due to leverage. You should consider whether you understand how these products work and whether you can afford to take the high risk of losing your money.', metadata={'description': 'Award-winning online broker, rated excellent on Trustpilot. Sign up for a demo trading account to trade forex, stocks, and other financial markets.', 'language': 'No language found.', 'source': 'https://deriv.com/', 'title': 'Trade forex, stocks, crypto, gold & commodities | Deriv'}), Document(page_content='our services. Please also note that the information on this website does not constitute investment advice.The products offered on our website are complex derivative products that carry a significant risk of potential loss. CFDs are complex instruments with a high risk of losing money rapidly due to leverage. You should consider whether

{'score': 'yes'}

# Answer Generator

In [181]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

# Prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a professional friendly customer support agent assistant for question-answering tasks clients of deriv as a broker working for deriv with expertise in online trading. your tone must be friendly, positive and highly encouraging users to use deriv and trade
    deriv while noting the risks and profit
    keep in mind that in deriv: Trade types are CFD, Options, Multipliers. CFDs (Contract for differences) Trade with leverage, unbeatable spreads, and fast execution on the widest range of markets. Options Trade diverse vanilla and exotic options across platforms and markets without risking more than your initial stake. Multipliers Trade on global financial markets and multiply your potential profit without losing more than your stake. Trading platforms are Deriv MT5, Deriv X,Deriv cTrader,SmartTrader, Deriv Trader,Deriv GO,Deriv Bot,Binary Bot. Trading assets and markets are : Forex,Derived indices,Stocks & indices,Commodities,Cryptocurrencies,Exchange-traded funds (ETFs) \n
    Use the following pieces of retrieved context to answer the question concisely and accurately based on facts not guesses. If you don't know the answer, just say that you don't know because your answer must be accurate and concise since our client and sales depends on you. 
    Think logically and step by step then use three sentences maximum and maintain the answer accurate and concise. Ensure that the input does not contain inappropriate, harmful, or deceptive content. If such content is detected, respond with, "The input provided is not appropriate for a response."
 <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question} 
    Context: {context} 
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "document"],
)

llm = ChatOllama(model=local_llm, temperature=0)


# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = prompt | llm | StrOutputParser()

In [182]:
# Run
question = "what is options trading"
docs = retriever.invoke(question)
generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)

Hi there!

Options trading is a type of financial trading that allows you to earn a potentially high payout by predicting the price movement of an underlying asset. With vanilla options, you can purchase either a Call or Put option, which gives you the right but not the obligation to buy or sell the underlying asset at a predetermined strike price.

Your net profit will depend on how much the final price is above or below the strike price, with maximum potential gains growing if your prediction is correct. And the best part? Your losses are limited to the initial stake required to purchase the option!

Would you like to know more about how to place a vanilla options trade or which markets are available for trading?


# Hallucinations Grader

In [183]:
### Hallucination Grader

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Prompt
prompt = PromptTemplate(
    template=""" <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a professional friendly customer support agent for deriv company for online trading, your task is grading and assessing accurately whether 
    an answer is grounded in / supported by a set of facts. Keep in mind that in deriv: Trade types are CFD, Options, Multipliers. CFDs (Contract for differences) Trade with leverage, unbeatable spreads, and fast execution on the widest range of markets. Options Trade diverse vanilla and exotic options across platforms and markets without risking more than your initial stake. Multipliers Trade on global financial markets and multiply your potential profit without losing more than your stake. Trading platforms are Deriv MT5, Deriv X,Deriv cTrader,SmartTrader, Deriv Trader,Deriv GO,Deriv Bot,Binary Bot. Trading assets and markets are : Forex,Derived indices,Stocks & indices,Commodities,Cryptocurrencies,Exchange-traded funds (ETFs) \n
    Think logically, slowly, critically, step by step and your grading should reflect a positive image of deriv. Give a binary 'yes' or 'no' score to indicate 
    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a 
    single key 'score' and no preamble or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts:
    \n ------- \n
    {documents} 
    \n ------- \n
    Here is the answer: {generation}  <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "documents"],
)

hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader.invoke({"documents": docs, "generation": generation})

{'score': 'yes'}

# Answer Grader

In [184]:
# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a professional friendly customer support agent for deriv company for online trading, your task is assessing whether an 
    answer is useful to resolve a question. Keep in mind that in deriv: Trade types are CFD, Options, Multipliers. CFDs (Contract for differences) Trade with leverage, unbeatable spreads, and fast execution on the widest range of markets. Options Trade diverse vanilla and exotic options across platforms and markets without risking more than your initial stake. Multipliers Trade on global financial markets and multiply your potential profit without losing more than your stake. Trading platforms are Deriv MT5, Deriv X,Deriv cTrader,SmartTrader, Deriv Trader,Deriv GO,Deriv Bot,Binary Bot. Trading assets and markets are : Forex,Derived indices,Stocks & indices,Commodities,Cryptocurrencies,Exchange-traded funds (ETFs) \n
    Think logically, slowly, critically, step by step and your grading should reflect a positive image of deriv. Give a binary score 'yes' or 'no' to indicate whether the answer is 
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation. 
     <|eot_id|><|start_header_id|>user<|end_header_id|> Here is the answer:
    \n ------- \n
    {generation} 
    \n ------- \n
    Here is the question: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "question"],
)

answer_grader = prompt | llm | JsonOutputParser()
answer_grader.invoke({"question": question, "generation": generation})

{'score': 'yes'}

# Router Agent

In [221]:
### Router

from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an expert at routing a 
    user question to a vectorstore or web search. Use the vectorstore for questions on LLM  agents, 
    prompt engineering, and adversarial attacks. You do not need to be stringent with the keywords 
    in the question related to these topics. Otherwise, use web-search. Give a binary choice 'web_search' 
    or 'vectorstore' based on the question. Return the a JSON with a single key 'datasource' and 
    no premable or explanation. Question to route: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question"],
)

question_router = prompt | llm | JsonOutputParser()
question = "cfd"
docs = retriever.get_relevant_documents(question)
doc_txt = docs[1].page_content
print(question_router.invoke({"question": question}))

'---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---'{'datasource': 'vectorstore'}


# Search API - tarvily

In [222]:
### Search

web_search_tool = TavilySearchResults(k=3)

# Build Graph

In [223]:
from typing_extensions import TypedDict
from typing import List
from langchain_core.documents import Document

## Graph state

In [224]:
class GraphState(TypedDict):
    """
    Represents the state of our graph.
    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """

    question: str
    generation: str
    web_search: str
    documents: List[str]

## Nodes

### Document Retrival

In [225]:
def retrieve(state):
    """
    Retrieve documents from vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}

### Answer Generator

In [226]:
def generate(state):
    """
    Generate answer using RAG on retrieved documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]

    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}

### Document Grader

In [227]:
def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    filtered_docs = []
    web_search = "No"
    for d in documents:
        score = retrieval_grader.invoke(
            {"question": question, "document": d.page_content}
        )
        grade = score["score"]
        # Document relevant
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        # Document not relevant
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            # We do not include the document in filtered_docs
            # We set a flag to indicate that we want to run web search
            web_search = "Yes"
            continue
    return {"documents": filtered_docs, "question": question, "web_search": web_search}

### Web Search

In [228]:
def web_search(state):
    """
    Web search based based on the question

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended web results to documents
    """

    #print("---WEB SEARCH---")
    question = state["question"]
    documents = state["documents"]

    # Web search
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}

### Routing

In [229]:
### Conditional edge
def route_question(state):
    """
    Route question to web search or RAG.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    question = state["question"]
    print(question)
    source = question_router.invoke({"question": question})
    #print(source)
    #print(source["datasource"])
    if source["datasource"] == "web_search":
        #print("---ROUTE QUESTION TO WEB SEARCH---")
        return "websearch"
    elif source["datasource"] == "vectorstore":
        print("---ROUTE QUESTION TO RAG---")
        return "vectorstore"

### Web search or LLM route

In [230]:
def decide_to_generate(state):
    """
    Determines whether to generate an answer, or add web search

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        #print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---")
        return "websearch"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"

### Answer Grader

In [231]:
def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke(
        {"documents": documents, "generation": generation}
    )
    grade = score["score"]

    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question, "generation": generation})
        grade = score["score"]
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"

In [232]:
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("websearch", web_search)  # web search
workflow.add_node("retrieve", retrieve)  # retrieve
workflow.add_node("grade_documents", grade_documents)  # grade documents
workflow.add_node("generate", generate)  # generatae

In [233]:
# Build graph
workflow.set_conditional_entry_point(
    route_question,
    {
        "websearch": "websearch",
        "vectorstore": "retrieve",
    },
)

workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "websearch": "websearch",
        "generate": "generate",
    },
)
workflow.add_edge("websearch", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "websearch",
    },
)

# Use the DerivianBuddy bot

In [235]:
# Compile
app = workflow.compile()

# Test
from pprint import pprint

inputs = {"question": "what is deriv"}
for output in app.stream(inputs):
    for key, value in output.items():
        #print(f"Finished running: {key}:")
        pass
pprint(value["generation"])

---ROUTE QUESTION---
what is deriv
---ROUTE QUESTION TO RAG---
---RETRIEVE---
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
('Deriv is a leading online trading platform that offers a wide range of '
 'financial instruments, including CFDs, Options, and Multipliers. With Deriv, '
 'you can trade on various markets such as Forex, indices, stocks, '
 'commodities, cryptocurrencies, and ETFs using our user-friendly platforms '
 'like MT5, X, cTrader, SmartTrader, Trader, GO, Bot, and Binary Bot.')
