In [1]:
from dotenv import load_dotenv
load_dotenv()

True

Loading the data

In [None]:
from langchain_groq import ChatGroq  
from langchain.document_loaders import DirectoryLoader 
from langchain.document_loaders import PyPDFLoader 
data_folder = "../data"  

loader = DirectoryLoader(data_folder, glob="*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()


In [11]:
print(f"Loaded {len(documents)} documents.")
print(documents[0].page_content[:500])  

Loaded 106 documents.
2021
Code of Business 
Conduct and Ethics


In [9]:
len(documents)

106

Chunking

In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter 
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1500,chunk_overlap=200)

In [13]:
chunks=text_splitter.split_documents(documents)

In [14]:
len(chunks)

221

Embedding

In [19]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

text = "Goldman Sachs Code of Business Conduct and Ethics"
vector = embeddings.embed_query(text)

print("Embedding Vector Size:", len(vector))  
print("Sample Vector:", vector[:5])  


Embedding Vector Size: 768
Sample Vector: [0.024842817336320877, 0.08980801701545715, 0.01351351197808981, 0.01878681778907776, -0.03634510189294815]


In [26]:
from langchain.vectorstores import FAISS
faiss_index=FAISS.from_documents(chunks,embeddings)

In [27]:
# Save FAISS index
faiss_index.save_local("faiss_index")

print("FAISS index saved successfully!")


FAISS index saved successfully!


In [29]:
# Load FAISS index
faiss_index = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization=True)

# Search for similar content
query = "Goldman Sachs business ethics policies"
results = faiss_index.similarity_search(query, k=3)  # Retrieve top 3 similar chunks

# Print results
for i, doc in enumerate(results):
    print(f"\n🔹 Result {i+1}:\n{doc.page_content[:500]}...\n---")



🔹 Result 1:
20011456.3.BUSINESS
GOLDMAN SACHS BDC, INC.
CODE OF BUSINESS CONDUCT AND ETHICS
Preamble
Pursuant to Section 406 of the Sarbanes-Oxley Act of 2002, the Securities and Exchange Commission (the
“SEC”) has adopted rules requiring annual disclosure of an investment company’s code of ethics applicable
to its principal executive, principal financial and principal accounting officers (the “Covered Officers”).
Pursuant to Section 303A.10 of the NYSE Listed Company Manual, the New York Stock Exchange, LL...
---

🔹 Result 2:
Goldman Sachs 3
04Our Shared Commitment to Integrity
05Our Core Values
06Raising Concerns and Reporting Issues
16Our Firm
25Special Goldman Sachs  
Bank-Related Considerations
18Anti-Bribery and Anti-Corruption
17Three Lines of Defense
19Anti-Money Laundering
20Sanctions
21Privacy and Data Protection
23Personal Trading
24Anti-Tying
24Recordkeeping and Reporting
23Antitrust
22Artificial Intelligence
35Human Rights, Sustainability, 
and Inclusive Growth
36Charitab

Model

In [55]:
import os
GROQ_API_KEY=os.getenv('GROQ_API_KEY')

In [56]:
from langchain_groq import ChatGroq 
llm=ChatGroq(api_key=GROQ_API_KEY,model='llama-3.3-70b-versatile')

In [57]:
##example 
response=llm.invoke("what is goldman scahes polocies")

In [58]:
response.content

"Goldman Sachs is a multinational investment bank and financial services company. As such, it has various policies in place to guide its operations, manage risk, and ensure compliance with regulatory requirements. Here are some of the key policies at Goldman Sachs:\n\n1. **Code of Conduct**: Goldman Sachs has a Code of Conduct that outlines the company's expectations for employee behavior and decision-making. The code emphasizes the importance of integrity, honesty, and respect for others.\n2. **Risk Management Policy**: Goldman Sachs has a risk management policy that aims to identify, assess, and mitigate potential risks to the company. This includes credit risk, market risk, operational risk, and reputational risk.\n3. **Compliance Policy**: Goldman Sachs has a compliance policy that ensures the company adheres to all relevant laws, regulations, and industry standards. This includes policies related to anti-money laundering, know-your-customer, and securities trading.\n4. **Diversity

In [67]:
from langchain.prompts import PromptTemplate

rag_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""You are a helpful AI support assistant for Goldman Sachs, providing accurate and reliable information 
based only on the retrieved document excerpts below. Always ensure compliance, transparency, and professionalism. 
Do not make up answers. If the answer is not found in the provided context, state that clearly.

---
Context:
{context}
---

User Query: {question}

Guidelines:
1. Use only the context provided. Do NOT generate unsupported information.
2. Be concise and professional in your response.
3. If the answer is unclear or unavailable in the context, respond with:
   "I'm sorry, but I couldn't find relevant information in the retrieved documents."
4. Maintain a neutral and compliant tone, as this is an official support system.

Answer:
"""
)

print("✅ RAG Support Bot PromptTemplate Ready!")


✅ RAG Support Bot PromptTemplate Ready!


In [68]:
retriever = faiss_index.as_retriever(search_kwargs={"k": 10})  # Retrieve top 3 most relevant chunks


In [69]:
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # "stuff" places all retrieved text into {context}
    retriever=retriever,
    chain_type_kwargs={"prompt": rag_prompt}
)


In [70]:
# ✅ Test the RAG Pipeline
query = "What are the compliance policies for ethical trading?"
response = qa_chain.run(query)

print("\n🔹 AI Support Response:\n", response)



🔹 AI Support Response:
 The provided context discusses the Code of Business Conduct and Ethics, which includes guidelines for ethical trading. According to the context, Covered Persons may not individually engage in certain transactions, such as the purchase or sale of securities or other property, with the Company. The compliance programs and procedures of the Company and its investment adviser and distributor are designed to prevent, or identify and correct, violations of these provisions.

Additionally, the context mentions that Goldman Sachs is committed to ensuring compliance with relevant market conduct laws and has implemented policies and procedures to mitigate market conduct risks, including conduct and supervision, confidentiality and communication, market manipulation, collusion, conflicts of interest, and inappropriate sales practices.

However, specific compliance policies for ethical trading are not explicitly outlined in the provided context. Therefore, I must respond w

Reranking

In [54]:
from sentence_transformers import CrossEncoder
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")



In [71]:
from sentence_transformers import CrossEncoder
from langchain.chains.question_answering import load_qa_chain
from langchain.schema import Document
from typing import List

# Step 1: Load Reranker Model
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

# Step 2: Function to Re-Rank Documents
def rerank_documents(query: str, retrieved_docs: List[Document]) -> List[Document]:
    """Re-rank retrieved documents using a Cross-Encoder."""
    docs_texts = [doc.page_content for doc in retrieved_docs]
    scores = reranker.predict([(query, doc) for doc in docs_texts])
    
    # Sort documents based on scores (highest first)
    sorted_docs = [doc for _, doc in sorted(zip(scores, retrieved_docs), key=lambda x: x[0], reverse=True)]
    return sorted_docs

# Step 3: Query and Retrieve Documents (Initial Retrieval)
query = "What are the compliance policies for ethical trading?"
retrieved_docs = retriever.get_relevant_documents(query)  # Assumes `retriever` is defined

# Step 4: Re-Rank Retrieved Documents
reranked_docs = rerank_documents(query, retrieved_docs)  # Apply reranking

# Step 5: Initialize GPT-4 LLM

# Step 6: Load QA Chain Without Using a Retriever
qa_chain = load_qa_chain(llm, chain_type="stuff")

# Step 7: Generate Answer Using Only Reranked Documents
final_answer = qa_chain.run(input_documents=reranked_docs, question=query)

# Step 8: Print Results
print("\n🔹 Top Re-Ranked Documents:\n")
for i, doc in enumerate(reranked_docs[:5]):  # Show top 5 results
    print(f"📜 Document {i+1}:\n{doc.page_content[:500]}...\n{'-'*80}")

print("\n💡 AI Answer:", final_answer)



🔹 Top Re-Ranked Documents:

📜 Document 1:
Covered Persons may not individually engage in certain transactions (such as the purchase or sale of
securities or other property) with the Company. The compliance programs and procedures of the Company
and its investment adviser and distributor (as applicable) are designed to prevent, or identify and correct,
violations of these provisions. This Code does not, and is not intended to, repeat or replace these programs
and procedures....
--------------------------------------------------------------------------------
📜 Document 2:
Company;
•compliance with applicable laws and governmental rules and regulations;
•the prompt internal reporting of violations of this Code to an appropriate person or persons
identified herein; and
•accountability for adherence to this Code.
Each Covered Person owes a duty to the Company to adhere to a high standard of business ethics, and
should be sensitive to situations that may give rise to actual as well as appar

In [72]:
final_answer

'The compliance policies for ethical trading, as per the provided context, include:\n\n1. **Compliance with applicable laws and regulations**: The Company and its Covered Persons must comply with all applicable laws, rules, and regulations governing their businesses.\n2. **Conflict of interest provisions**: Covered Persons are prohibited from individually engaging in certain transactions, such as the purchase or sale of securities or other property, with the Company.\n3. **Market conduct risk policies**: The firm has implemented policies and procedures to mitigate market conduct risks, including conduct and supervision, confidentiality and communication, market manipulation, collusion, conflicts of interest, and inappropriate sales practices.\n4. **Reporting and accountability**: Covered Persons must report any violations of the Code, and the Company will investigate and enforce the Code through its Secretary and Board of Directors.\n5. **Internal reporting of violations**: Covered Per

Hybrid_search

In [75]:
# ✅ Step 1: Load Retrievers (FAISS + BM25 Hybrid)
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever

retriever_dense = faiss_index.as_retriever(search_kwargs={"k": 10})  
bm25_retriever = BM25Retriever.from_documents(chunks)

hybrid_retriever = EnsembleRetriever(retrievers=[retriever_dense, bm25_retriever], weights=[0.5, 0.5])



In [76]:
from sentence_transformers import CrossEncoder
from langchain.chains.question_answering import load_qa_chain
from langchain.schema import Document
from typing import List

# Step 1: Load Reranker Model
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

# Step 2: Function to Re-Rank Documents
def rerank_documents(query: str, retrieved_docs: List[Document]) -> List[Document]:
    """Re-rank retrieved documents using a Cross-Encoder."""
    docs_texts = [doc.page_content for doc in retrieved_docs]
    scores = reranker.predict([(query, doc) for doc in docs_texts])
    
    # Sort documents based on scores (highest first)
    sorted_docs = [doc for _, doc in sorted(zip(scores, retrieved_docs), key=lambda x: x[0], reverse=True)]
    return sorted_docs

# Step 3: Query and Retrieve Documents (Initial Retrieval)
query = "What are the compliance policies for ethical trading?"
retrieved_docs = hybrid_retriever.get_relevant_documents(query)  # Assumes `retriever` is defined

# Step 4: Re-Rank Retrieved Documents
reranked_docs = rerank_documents(query, retrieved_docs)  # Apply reranking

# Step 5: Initialize GPT-4 LLM

# Step 6: Load QA Chain Without Using a Retriever
qa_chain = load_qa_chain(llm, chain_type="stuff")

# Step 7: Generate Answer Using Only Reranked Documents
final_answer = qa_chain.run(input_documents=reranked_docs, question=query)

# Step 8: Print Results
print("\n🔹 Top Re-Ranked Documents:\n")
for i, doc in enumerate(reranked_docs[:5]):  # Show top 5 results
    print(f"📜 Document {i+1}:\n{doc.page_content[:500]}...\n{'-'*80}")

print("\n💡 AI Answer:", final_answer)



🔹 Top Re-Ranked Documents:

📜 Document 1:
Covered Persons may not individually engage in certain transactions (such as the purchase or sale of
securities or other property) with the Company. The compliance programs and procedures of the Company
and its investment adviser and distributor (as applicable) are designed to prevent, or identify and correct,
violations of these provisions. This Code does not, and is not intended to, repeat or replace these programs
and procedures....
--------------------------------------------------------------------------------
📜 Document 2:
Company;
•compliance with applicable laws and governmental rules and regulations;
•the prompt internal reporting of violations of this Code to an appropriate person or persons
identified herein; and
•accountability for adherence to this Code.
Each Covered Person owes a duty to the Company to adhere to a high standard of business ethics, and
should be sensitive to situations that may give rise to actual as well as appar

In [77]:
final_answer

"The compliance policies for ethical trading at Goldman Sachs include:\n\n1. **Market Conduct Risk Management**: The firm has implemented policies and procedures to mitigate market conduct risks, such as conduct and supervision, confidentiality and communication, market manipulation, collusion, conflicts of interest, and inappropriate sales practices.\n2. **Firmwide Policy on Cooperation with the Firm**: Employees are expected to fully and honestly cooperate with the firm, its outside counsel, regulators, auditors, and other parties to uphold the firm's integrity and reputation.\n3. **Code of Business Conduct and Ethics**: The code promotes honest and ethical conduct, including the ethical handling of actual or apparent conflicts of interest between personal and professional relationships.\n4. **Compliance with applicable laws and regulations**: Employees are expected to comply with all applicable laws, rules, and regulations, including those related to trading and market conduct.\n5. 