In [1]:
!pip install --upgrade --quiet  \
    langchain-pinecone \
    langchain-openai \
    langchain \
    langchain-community \
    pinecone-notebooks \
    pypdf \
    langchain-groq

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.3/40.3 kB[0m [31m770.0 kB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m974.6/974.6 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m321.8/321.8 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m215.9/215.9 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m327.4/327.4 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━

Import necessary libraries and modules

In [2]:
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from google.colab import userdata

Load and split the PDF document

In [4]:
file_path = "/content/Business-news.pdf"
loader = PyPDFLoader(file_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=200,
    separators = ["--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------",'\n\n', '\n', ' ', '' ],
    keep_separator=False
)
docs = text_splitter.split_documents(documents)

Initialize embeddings

In [5]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small", api_key=userdata.get('OPENAI_API_KEY'))

Import Pinecone library and initialize connection

In [6]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=userdata.get('PINECONE_API_KEY'))

# Cell 5: Create and initialize Pinecone index
import time

index_name = "bussiness-db"
existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

Create Pinecone vector store

In [7]:
from langchain_pinecone import PineconeVectorStore

vectorstore = PineconeVectorStore(
    index_name="bussiness-db",
    embedding=embeddings,
    pinecone_api_key=userdata.get('PINECONE_API_KEY')
)

Add documents to the vector store (run this only once)

In [8]:
vectorstore.add_documents(docs)

['a6660a0c-8bc0-4101-8f38-8f45c59150d6',
 '297158ce-04b2-4ef1-a4b2-946b378f89c1',
 '00fbd335-ca24-4892-a9fd-5b9a2d16495f',
 '68ac9a0b-d563-4382-994f-708d4a3f5f32',
 'c8f95c97-911a-41a5-8e85-ffdaf593e38f',
 '5d4899c7-2e79-4788-a4dd-65a84d59131c',
 '32d2b2c4-4d04-4318-8da6-ff3ff930eb4b',
 'd6d08e0d-ba86-4853-a79b-2452be214ff6',
 'eec7d653-5e79-473f-ab94-42cbe013e869',
 'bb8c27fd-8b92-4fea-bfec-1937b13c5e74',
 '1720aef5-a545-4f65-bec3-5fe0ad4ef54e',
 '4cba26d6-c543-4844-8d69-2a770ecd68f8',
 '78331ca3-6a49-44c0-9bac-2a2b6faace9f',
 '466b6bad-8b73-4562-8501-2dff22345e11',
 'c16d3be8-212b-4a5f-9b3a-ce09743e444c',
 'bdf9a0ca-98da-49d0-9c09-e2935d58122a',
 'b79e92e1-8eef-4efa-a15e-24bc704a44e8',
 '9aacc9f9-9d90-4035-b1be-cd6f169826d3',
 'a722b2b3-c02d-4abe-9916-2f8046aa2285',
 '284460b4-6248-4a77-8011-e89cda3dc2db',
 '81d17fb1-6170-45b7-a23d-bedc47002755',
 'b27b10c2-aac6-47bd-8895-c8b709db2e87',
 '10c976bb-5883-4c0e-ba54-a51b89a5309f',
 '6f144473-bcea-4587-9ced-c139bd471972',
 'd0a34a2c-e97f-

Initialize the Retriver

In [9]:
retriever = vectorstore.as_retriever(search_kwargs={'k': 7})

Define the RAG chain for question-answering

In [10]:
# Import necessary libraries and modules
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_pinecone import PineconeVectorStore
from langchain_groq import ChatGroq

from google.colab import userdata

llm = ChatGroq(temperature=0,model="llama3-70b-8192",groq_api_key  = userdata.get('GROQ_API_KEY'))

# Define the classification chain
classification_chain = (
    PromptTemplate.from_template(
        """Given the user question below, classify it as either Relevant or Irrelevant to business, economics, and finance domain.

Do not respond with more than one word.

<question>
{question}
</question>

Classification:"""
    )
    | llm
    | StrOutputParser()
)

# Define the business chain
prompt = PromptTemplate.from_template(
    """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you cant answer using the context, dont hellucinate just say the question is out of the context, nothing more. Use three sentences maximum and keep the answer concise.
        Question: {question}
        Context: {context}
        Answer:"""
)

bussiness_chain = (
    {"context": vectorstore.as_retriever(search_kwargs={'k': 3}), "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Function to route the query
def route(info):
    classification_response = classification_chain.invoke({"question": info["question"]})
    if classification_response == "Relevant":
        return bussiness_chain.invoke(info["question"])
    else:
        return "I'm sorry, but this question goes beyond the information I have available. If you have any other question that you'd like help with, please let me know!"

# Combine chains using a custom routing function
full_chain = {"topic": lambda x: {"question": x["question"]}, "question": lambda x: x["question"]} | RunnableLambda(route)

# Function to handle the query
def handle_query(query):
    response = full_chain.invoke({"question": query})
    print(f"Answer: {response}")


Red Teaming Testing

In [11]:
# Related Questions
related_questions = [
    "What were Lufthansa's net profits in 2004?",
    "What challenges did Lufthansa face in 2003?",
    "How did Winn-Dixie plan to restructure after filing for bankruptcy?",
    "What economic growth rating did the Cleveland area receive in the US Federal Reserve's Beige Book report?",
    "What significant change did General Motors announce regarding Cadillac production in Sweden?",
    "What was the decision of the Bank of England's Monetary Policy Committee regarding interest rates?",
    "What factors contributed to Japan's industrial output growth in January?",
    "What charges did Platon Lebedev deny in court?",
    "What measures did China implement to control economic expansion in 2005?",
    "What was the value of the deal in which Verizon won the takeover battle for MCI?"
]

# Unrelated Questions
unrelated_questions = [
    "How do black holes form?",
    "What are the benefits of a ketogenic diet?",
    "Who wrote the play 'Hamlet'?",
    "What is the capital city of Australia?",
    "How does the human immune system work?",
    "What are the primary causes of climate change?",
    "How do quantum computers differ from classical computers?",
    "What are the main ingredients in a traditional Italian pizza?",
    "Who discovered the theory of relativity?",
    "What is the process of photosynthesis in plants?"
]

# Run related questions
print("Related Questions:")
for question in related_questions:
    print(f"Question: {question}")
    handle_query(question)
    print()

# Run unrelated questions
print("Unrelated Questions:")
for question in unrelated_questions:
    print(f"Question: {question}")
    handle_query(question)
    print()

Related Questions:
Question: What were Lufthansa's net profits in 2004?
Answer: Lufthansa's net profits in 2004 were 400m euros ($527.61m; £274.73m). This is compared to a loss of 984m euros in 2003.

Question: What challenges did Lufthansa face in 2003?
Answer: Lufthansa faced challenges in 2003 due to tough competition, a dip in demand following the Iraq war and the SARS virus, and troubles at its US catering business.

Question: How did Winn-Dixie plan to restructure after filing for bankruptcy?
Answer: Winn-Dixie planned to restructure after filing for bankruptcy by achieving significant cost reductions, improving merchandising and customer service in all locations, and generating a sense of excitement in the stores. This would enable the company to successfully turn itself around. The company's new chief executive, Peter Lynch, would lead this effort.

Question: What economic growth rating did the Cleveland area receive in the US Federal Reserve's Beige Book report?
Answer: The Cl

User Question Answer Interface

In [13]:
def ask_questions():
    print("You can start asking your questions. Type 'quit' or 'exit' to stop.")
    while True:
        query = input("Enter your question: ")
        if query.lower() in ["quit", "exit"]:
            print("Exiting the question-answer session.")
            break
        handle_query(query)

# Start the question-answer session
ask_questions()

You can start asking your questions. Type 'quit' or 'exit' to stop.
Enter your question: What were Lufthansa's net profits in 2004?
Answer: Lufthansa's net profits in 2004 were 400m euros ($527.61m; £274.73m). This is compared to a loss of 984m euros in 2003.
Enter your question: What challenges did Lufthansa face in 2003?
Answer: Lufthansa faced challenges in 2003 due to tough competition, a dip in demand following the Iraq war and the SARS virus, and troubles at its US catering business.
Enter your question: Who invented the bulb?
Answer: I'm sorry, but this question goes beyond the information I have available. If you have any other question that you'd like help with, please let me know!
Enter your question: quit
Exiting the question-answer session.
