In [1]:
import os 
import pickle
import time
import langchain
from groq import Groq
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq

In [34]:
key = os.environ['GROQ_API_KEY'] = 'GROQ_API_KEY'

In [35]:
# Initialize Groq LLM
llm = ChatGroq(
    model_name="llama-3.3-70b-versatile",
    temperature=0.7,
    max_tokens=500
)

loader = UnstructuredURLLoader(urls = [
    "https://www.british-business-bank.co.uk/business-guidance/guidance-articles/business-essential/navigating-competition-exclusions-in-procurement-act",
    "https://www.british-business-bank.co.uk/business-guidance/guidance-articles/business-essentials/capital-allowances-scaling-plant-and-machinery"
])

data = loader.load()
len(data)

2

In [36]:
text_splitter = RecursiveCharacterTextSplitter(
   # separators = ["\n\n", "\n", " "],
    chunk_size = 1000,
    chunk_overlap = 200
)

docs = text_splitter.split_documents(data)
len(docs)

24

In [37]:
docs[2]

Document(metadata={'source': 'https://www.british-business-bank.co.uk/business-guidance/guidance-articles/business-essential/navigating-competition-exclusions-in-procurement-act'}, page_content='There are two key terms in the Procurement Act that businesses need to understand which will prevent them from taking part in any public sector procurement process:\n\nexclusion: this is where a supplier is not permitted to participate in a procurement, or be awarded a public contract, following an assessment of exclusion grounds by a contracting authority\n\ndebarment: occurs when a supplier is placed on a central debarment list by a Minister of the Crown which prevents the supplier from participating in any procurements or being awarded public contracts for up to five years.\n\nWhy exclusion and debarment matter to businesses\n\nBusinesses that take part in illegal cartel activity such as bid-rigging, price fixing, or market sharing risk being excluded from public procurement unless they can 

In [42]:
# Load the embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Create FAISS index
vector_index = FAISS.from_documents(docs, embedding_model)

print("FAISS Index Created Successfully!")

FAISS Index Created Successfully!


In [44]:
file_path = "vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vector_index, f)

In [46]:
retriever = vector_index.as_retriever()
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=retriever)
chain



In [48]:
query = "How to report concerns to the CMA"

langchain.debug=True
chain({"question": query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "How to report concerns to the CMA"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "‘Self-Cleaning’\n\nBusinesses can avoid exclusion if they can demonstrate that the circumstances leading to the exclusion ground are not continuing or are unlikely to occur again.\n\nThis might include, for example, showing they have been granted leniency, or admitted wrongdoing and agreed to pay a penalty under the CMA’s settlement procedure.\n\nHow to report concerns to the CMA\n\nIf you witness or suspect another business breaking the law you can:\n\ncall 020 3738 6888\n\nemail [email p

Token indices sequence length is longer than the specified maximum sequence length for this model (1934 > 1024). Running this sequence through the model will result in indexing errors


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "How to report concerns to the CMA",
  "summaries": "Content: If you witness or suspect another business breaking the law you can:\ncall 020 3738 6888\nemail [email protected].\nSource: https://www.british-business-bank.co.uk/business-guidance/guidance-articles/business-essential/navigating-competition-exclusions-in-procurement-act\n\nContent: There is no relevant text in the provided portion of the document that answers the question \"How to report concerns to the CMA\". The text only mentions that more information can be found on the CMA's overview page, but it does not provide instructions on how to report concerns.\nSource: https://www.british-business-bank.co.uk/business-guidance/guidance-articles/business-essential/navigating-competition-exclusions-in-procurement-act\n\nContent: If a supplier realises it has inf

{'answer': 'To report concerns to the CMA, you can call 020 3738 6888 or email [email protected].\n',
 'sources': 'https://www.british-business-bank.co.uk/business-guidance/guidance-articles/business-essential/navigating-competition-exclusions-in-procurement-act'}