# INIT

In [2]:
%%capture
!pip install langfuse langchain langchain_core langchain_community faiss-cpu openai langchain_openai sentence_transformers langchainhub langchain_cohere cohere google-search-results

In [3]:
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser, BaseOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List, Optional, Dict, Any, Tuple, Union, Type, Callable
import re
import types
from langchain import hub
from langchain_core.output_parsers import CommaSeparatedListOutputParser
from langchain.utilities import SerpAPIWrapper
from langchain.tools import BaseTool, StructuredTool, tool
from langchain.agents import create_react_agent
from langchain.agents import AgentExecutor
from langchain.schema import BaseMessage, HumanMessage, AIMessage
from langchain.memory import ChatMessageHistory
from dataclasses import dataclass
from langchain.memory.entity import BaseEntityStore
import requests
import json
from langfuse.callback import CallbackHandler
from pydantic import BaseModel


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("Open AI API Key:")

Open AI API Key:··········


In [5]:
os.environ["COHERE_API_KEY"] = getpass.getpass("Cohere API Key:")

Cohere API Key:··········


In [6]:
os.environ["SERPAPI_API_KEY"] = getpass.getpass("SerpAPI Key:")

SerpAPI Key:··········


In [7]:
os.environ["PROXYCURL_API_KEY"] = getpass.getpass("proxyCurl API Key:")

proxyCurl API Key:··········


In [8]:
# LangFuse Tracing API keys
lf_public_key = getpass.getpass("LangFuse Public API Key:")
lf_secret_key = getpass.getpass("LangFuse Secret API Key:")

LangFuse Public API Key:··········
LangFuse Secret API Key:··········


In [9]:
# LangFuse Tracing init

langfuse_handler = CallbackHandler(
    public_key=lf_public_key,
    secret_key=lf_secret_key,
    host="https://cloud.langfuse.com",
    trace_name="ReactWithReflexion_Demo"
)

Constants

In [10]:
MODEL_NAME = "BAAI/bge-base-en-v1.5"
LLM = ChatOpenAI(model = "gpt-3.5-turbo")
CHUNK_SIZE = 500
CHUNK_OVERLAP = 50
VECTOR_STORE_PATH = "RAG/faiss_index"
USER_AGENT = 'SM-ResearchBot/1.0 (sm12345@gmail.com)'
TOP_N = 12
RERANKER_MODEL = "rerank-english-v3.0"
TEN_K_URL = [
    ("Tesla", "https://www.sec.gov/Archives/edgar/data/1318605/000162828024002390/tsla-20231231.htm"),
    ("General Motors", "https://www.sec.gov/Archives/edgar/data/1467858/000146785824000031/gm-20231231.htm")
]

Local embeddings model

In [11]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

embeddings = HuggingFaceBgeEmbeddings(
    model_name = MODEL_NAME,
    model_kwargs = {'device': 'cuda'},
    encode_kwargs = {'normalize_embeddings': True}
)

  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# Preprocessing

In [12]:
class Preprocessing:
    def __init__(self) -> None:
        pass

In [13]:
def loadDocumentFromWeb(self, url: str) -> List[Document]:
    try:
        headers = {'User-Agent': USER_AGENT}
        return WebBaseLoader(url, header_template=headers).load()
    except Exception as e:
        print(f"Failed to load document from {url}: {str(e)}")
        return []

In [14]:
def makeChunks(self, elements: List[Document]) -> List[Document]:
    textSplitter = RecursiveCharacterTextSplitter(
        chunk_size = CHUNK_SIZE,
        chunk_overlap = CHUNK_OVERLAP,
        length_function = len
    )
    return textSplitter.transform_documents(elements)

In [15]:
def processCompanies(self, urlsAndCompanies: List[Tuple[str, str]]) -> Tuple[List[Document], List[Tuple[str, str]]]:
    allDocuments = []
    signaturesString = []
    for company, url  in urlsAndCompanies:
        documents = self.loadDocumentFromWeb(url)
        if documents:
            lastThousandChars = documents[0].page_content[-1000:]
            signaturesString.append((company, lastThousandChars))
        chunkedDocuments = self.makeChunks(documents)
        allDocuments.extend(chunkedDocuments)
    return allDocuments, signaturesString

In [16]:
Preprocessing.loadDocumentFromWeb = loadDocumentFromWeb
Preprocessing.makeChunks = makeChunks
Preprocessing.processCompanies = processCompanies

# Basic RAG framework

In [17]:
@dataclass
class Director:
    name: str
    linkedin_handle: str
    education: List[str]

In [18]:
@dataclass
class Company:
    name: str
    snippet: str
    directors: List[Director] = None

In [19]:
class Companies(BaseEntityStore):
    """Local Entity store for Company objects."""

    def get(self, key: str, default: Optional[str] = None) -> Optional[str]:
        pass
    def set(self, key: str, value: Optional[str]) -> None:
        pass
    def delete(self, key: str) -> None:
        pass
    def exists(self, key: str) -> bool:
        pass
    def clear(self) -> None:
        pass

    #store: Dict[str, str] = Field(default_factory=dict)
    store: Dict[str, str] = {}


In [20]:
def get(self, key: str, default: Optional[str] = None) -> Optional[str]:
    company_json = self.store.get(key, default)
    if company_json is None:
        return None
    return company_json

In [21]:
def set(self, key: str, value: Optional[str]) -> None:
    if value is None:
        self.store[key] = None
    else:
        self.store[key] = value

In [22]:
def delete(self, key: str) -> None:
    if key in self.store:
        del self.store[key]

In [23]:
def exists(self, key: str) -> bool:
    return key in self.store

In [24]:
def clear(self) -> None:
    self.store.clear()

In [25]:
def get_company(self, key: str) -> Optional[Company]:
    company_json = self.get(key)
    if company_json is None:
        return None
    company_dict = json.loads(company_json)
    directors = company_dict.pop('directors', None)
    if directors:
        directors = [Director(**director_data) for director_data in directors]
    return Company(**company_dict, directors=directors)

In [26]:
def set_company(self, key: str, company: Company) -> None:
    company_dict = company.__dict__.copy()
    if company.directors:
        company_dict['directors'] = [director.__dict__ for director in company.directors]
    company_json = json.dumps(company_dict)
    self.set(key, company_json)

In [27]:
def update_company(self, key: str, **kwargs) -> None:
    company = self.get_company(key)
    if company is not None:
        for attr, value in kwargs.items():
            if hasattr(company, attr):
                setattr(company, attr, value)
        self.set_company(key, company)

In [28]:
Companies.get = get
Companies.set = set
Companies.delete = delete
Companies.exists = exists
Companies.clear = clear
Companies.get_company = get_company
Companies.set_company = set_company
Companies.update_company = update_company

In [29]:
class RAG:
    def __init__(self, llm, embeddings) -> None:
        try:
            self.llm = llm
            self.embeddings = embeddings
            self.companyStore = Companies()
            self.preprocessor = Preprocessing()
            self.reactWithReflexion = REACTwithReflexion(self)
        except Exception as e:
            print(f"Failed to initialize RAG: {str(e)}")

In [30]:
def createVectorStore(self) -> None:
    try:
        self.vectorStore = FAISS.from_texts([""], self.embeddings)
        self.vectorStore.save_local(VECTOR_STORE_PATH)
        self.retriever = self.vectorStore.as_retriever(search_kwargs={"k": 12})
    except Exception as e:
        print(f"Failed to create vector store: {str(e)}")

In [31]:
def addDocsToVectorstore(self,documents: List[Document]) -> None:
    if not self.vectorStore:
        print("Vector store not initialized. Call createVectorStore first.")
    self.vectorStore.add_documents(documents)
    self.vectorStore.save_local(VECTOR_STORE_PATH)

In [32]:
def addPageContent(self, urlsAndCompanies: List[Tuple[str, str]]) -> None:
    try:
        preprocessedData, companyData = self.preprocessor.processCompanies(urlsAndCompanies)
        self.addDocsToVectorstore(preprocessedData)
        for company_name, snippet in companyData:
            company = Company(name=company_name, snippet=snippet)
            if self.companyStore.exists(company_name):
                self.companyStore.update_company(company_name, company)
            else:
                self.companyStore.set_company(company_name, company)
        self.reactWithReflexion.updateChain()
    except Exception as e:
        print(f"Failed to add page content to vector store: {str(e)}")

In [33]:
def getCompany(self, company_name: str):

    return self.companyStore.get_company(company_name)

In [34]:
def getAvailableCompanies(self) -> str:
    return "\n\n".join(self.companyStore.get_company(_name).name for _name in self.companyStore.store)

In [35]:
def getCompanySnippet(self, company_name: str) -> Optional[str]:
    company = self.companyStore.get_company(company_name)
    if company is None:
        return None
    return company.snippet

In [36]:
def getCompanyDirectors(self, company_name: str):
    company = self.companyStore.get_company(company_name)
    if company is None:
        return None
    return company.directors


In [37]:
def updateCompany(self, company_name: str, directors: List[Director] = None, **changes) -> None:
    company = self.companyStore.get_company(company_name)
    if company is not None:
        if directors is not None:
            company.directors = directors
        for attribute, value in changes.items():
            if hasattr(company, attribute):
                setattr(company, attribute, value)
        self.companyStore.set_company(company_name, company)


In [38]:
RAG.createVectorStore = createVectorStore
RAG.addDocsToVectorstore = addDocsToVectorstore
RAG.addPageContent = addPageContent
RAG.getAvailableCompanies = getAvailableCompanies
RAG.getCompanySnippet = getCompanySnippet
RAG.getCompanyDirectors = getCompanyDirectors
RAG.updateCompany = updateCompany
RAG.getCompany = getCompany

# RerankerRAG

In [39]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank

class RerankerRAG:
    def __init__(self, model="rerank-english-v2.0", topN=3):
        self.rerankModel = model
        self.topN = topN
        self.compressor = CohereRerank(model=self.rerankModel, top_n=self.topN)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [40]:
def createReranker(self, baseRetriever):
    return ContextualCompressionRetriever(
        base_compressor=self.compressor, base_retriever=baseRetriever
    )

In [41]:
RerankerRAG.createReranker = createReranker

# SubQuery

In [42]:
class SubQuery(BaseModel):
    """Search over a database of 10-K statements from various companies."""

    subQuery: str = Field(
        ...,
        description="A very specific query against the database of 10-K statements.",
    )

In [43]:
class SubQueryOutputParser(BaseOutputParser[List[SubQuery]]):
    def parse(self, text: str) -> List[SubQuery]:
        return [SubQuery(subQuery=q) for q in re.findall(r'<(.*?)>', text)]

In [44]:
class QueryDecomposer:
    def __init__(self):
        self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.8)
        self.parser = SubQueryOutputParser()


In [45]:
system = '''You are an expert at converting user questions into specific database queries for similarity search.

Given a user question, break it down into distinct sub-queries that address different aspects of the original question.

For each sub-query:
1. Provide a very specific statement describing the information needed.
2. Include the company name mentioned in the original question in each sub-query.
3. Enclose each sub-query in angle brackets like this: <sub-query text>

Ensure that the set of sub-queries comprehensively covers the main aspects of the original question.

Do not include any meta-instructions or references to data sources in your sub-queries.

Do not rephrase or alter any financial terms or company names you are not familiar with.'''

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

In [46]:
def createDecomposerChain(self):
    self.queryAnalyzer = ( self.prompt
        | self.llm
        | self.parser
    )

In [47]:
QueryDecomposer.prompt = prompt
QueryDecomposer.createDecomposerChain = createDecomposerChain

# nameExtractor

In [48]:
class nameExtractor:
    def __init__(self):
        self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.4)
        self.parser = CommaSeparatedListOutputParser()
        self.nameExtractionChain = (
                                    nameExtractionPrompt
                                    | self.llm
                                    | self.parser
                                    )

In [49]:
nameExtractionPrompt = '''
Extract and list the names of all individuals with the title 'Director' from the following text, excluding any additional information such as dates or signatures.
Present the names as a simple, comma-separated list.

{text}
'''
nameExtractionPrompt = PromptTemplate.from_template(nameExtractionPrompt)

In [50]:
def getDirectorNames(self, stringWithNames):
    return self.nameExtractionChain.invoke({"text": stringWithNames})

In [51]:
nameExtractor.getDirectorNames = getDirectorNames

# LinkedinHandles

In [52]:
class LinkedinHandles:
    def __init__(self):
        self.search = SerpAPIWrapper()

In [53]:
def getHandles(self, directorNames:List[str], companyName: str):
    handles = []
    for name in directorNames:
        handle = self.getLinkedinHandle(name)
        if handle:
            handles.append((name, handle))
    return handles

In [54]:
def getLinkedinHandle(self, name: str, company: Optional[str] = None) -> Optional[str]:

    query = f'site:linkedin.com/in/ "{name}"'

    if company:
        query += f'"{company}"'

    try:
        results = self.search.results(query)
        for result in results.get("organic_results", []):
            link = result.get("link", "")

            return link

        return(f"No LinkedIn profile found for {name}")

    except Exception as e:
        return None

In [55]:
LinkedinHandles.getHandles = getHandles
LinkedinHandles.getLinkedinHandle = getLinkedinHandle

# companyDirectorsLinkedinHandle

In [56]:
class companyDirectorsLinkedinHandleInput(BaseModel):
    """Input for the companyDirectorsLinkedinHandle tool."""

    companyName: str = Field(description="Name of the company")

In [57]:
class companyDirectorsLinkedinHandle(BaseTool):
    """Function takes company name as input and returns the LinkedIn handle of the company's directors."""

    name: str = "companyDirectorsLinkedinHandle"
    baseDescription: str = (
        "A tool to get the names of the directors of a company and store their LinkedIn handles. "
        "Takes the companyName as input. Outputs a list with director names only."
    )
    description: str = baseDescription
    args_schema: Type[BaseModel] = companyDirectorsLinkedinHandleInput
    rag: Any = Field(description="RAG system")
    nameExtractor: Any = Field(description="Name extractor")
    linkedin: Any = Field(description="LinkedIn handles")

    def __init__(self, ragSystem: Any):
        super().__init__()
        self.rag = ragSystem
        self.nameExtractor = nameExtractor()  # Assuming nameExtractor is a callable that initializes an instance
        self.linkedin = LinkedinHandles()  # Assuming LinkedinHandles is a callable that initializes an instance

    def modifyDescription(self):
        self.description = self.baseDescription + f" The companies available are {self.rag.getCompany()}"

    def _run(self, companyName: str):
        company = self.rag.companyStore.get_company(companyName)
        if company and company.directors:
            output = "\n".join([f"{director.name}: {director.linkedin_handle}" for director in company.directors])
            return output

        stringWithNames = self.rag.getCompanySnippet(companyName)
        directorNames = self.nameExtractor.getDirectorNames(stringWithNames)
        handles = self.linkedin.getHandles(directorNames, companyName)

        new_directors = [
            Director(name=name, linkedin_handle=handle, education=[])
            for name, handle in handles
        ]
        self.rag.updateCompany(companyName, new_directors)
        output = "\n".join([f"{name}: {handle}" for name, handle in handles])
        return output

# proxyCurl

In [58]:
class proxyCurl():
    def __init__(self):
        self.api_key = os.environ.get("PROXYCURL_API_KEY")

In [59]:
name = "proxycurl"
description = "A tool for accessing ProxyCurl API to fetch LinkedIn data."

In [60]:
def _call(self, input_text: str):
    url = "https://nubela.co/proxycurl/api/v2/linkedin"
    headers = {
        'Authorization': f'Bearer {self.api_key}',
    }
    params = {
    'linkedin_profile_url': 'https://www.linkedin.com/in/kimbalmusk/',
    'use_cache': 'if-present',
    'fallback_to_cache': 'on-error'
    }

    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        return {"error": response.status_code, "message": response.text}

In [61]:
def run(self, url: str):
    data =  self._call(url)
    company_names = []
    seen_companies = set()
    for experience in data["experiences"]:
        start_year = experience["starts_at"]["year"] if experience["starts_at"] else 2022
        end_year = experience["ends_at"]["year"] if experience["ends_at"] else 2024
        duration = end_year - start_year

        if duration > 1:
            if experience["company"] not in seen_companies:
                seen_companies.add(experience["company"])
                company_names.append(experience["company"])

    institute_names = []
    seen_institutes = set()
    for edu in data["education"]:
        start_year = edu["starts_at"]["year"] if edu["starts_at"] else 2022
        end_year = edu["ends_at"]["year"] if edu["ends_at"] else 2024
        duration = end_year - start_year

        if duration > 1:
            if edu["school"] not in seen_institutes:
                seen_institutes.add(edu["school"])
                institute_names.append(edu["school"])

    company_names = company_names[:7]
    institute_names = institute_names[:7]

    return company_names, institute_names

In [62]:
proxyCurl.name = name
proxyCurl.description = description
proxyCurl._call = _call
proxyCurl.run = run

# directorBackground

In [63]:
class directorBackgroundInput(BaseModel):
    """Input for the directorBackground tool."""

    directorLinkedinHandle: str = Field(description="Name of The Director")

In [64]:
class directorBackground(BaseTool):
    """Function takes Director Name as input and returns the education and career of the director."""

    name: str = "directorsProfessionalExperience"

    description: str = (
        "A tool to get the education and career of a director. "
        "Takes the director's Name as input. "
        "Outputs a list of their education, companies, and posts held."
    )
    args_schema: Type[BaseModel] = directorBackgroundInput
    rag: Any = Field(description="RAG system")
    proxyCurl: Any = Field(description="proxyCurl tool")

    def __init__(self, ragSystem: Any):
        super().__init__()
        self.proxyCurl = proxyCurl()  # Assuming proxyCurl is a callable that initializes an instance
        self.rag = ragSystem

    def _run(self, directorName: str):
        handle = None
        director_found = None

        # Find the director in the company store
        for company_name in self.rag.companyStore.store:
            company = self.rag.getCompany(company_name)
            if company and company.directors:
                for director in company.directors:
                    if director.name.lower() == directorName.lower():
                        handle = director.linkedin_handle
                        director_found = director
                        break
                if handle:
                    break

        # If director is found with existing data
        if director_found:
            if director_found.education and director_found.past_work:
                companies_str = ", ".join(director_found.past_work) if director_found.past_work else "No companies"
                institutes_str = ", ".join(director_found.education) if director_found.education else "No institutes"
                return f"Companies worked at: {companies_str}.\nEducation: {institutes_str}."

            # If data is not available, attempt to retrieve it from LinkedIn
            try:
                experience, education = self.proxyCurl.run(handle)

                if education:
                    director_found.education = education
                if experience:
                    director_found.past_work = experience

                self.rag.updateCompany(company_name, directors=company.directors)

                companies_str = ", ".join(experience) if experience else "No companies"
                institutes_str = ", ".join(education) if education else "No institutes"
                return f"Companies worked at: {companies_str}.\nEducation: {institutes_str}."

            except Exception as e:
                return f"Failed to retrieve data from LinkedIn for {directorName}: {str(e)}"

        # If director is not found
        return f"Director {directorName} not found."

# REACTwithReflexion

In [65]:
class REACTwithReflexion():
    def __init__(self, ragSystem):
        self.rag = ragSystem
        self.tools = []
        self.parser = SubQueryOutputParser()
        self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
        self.prompt = PromptTemplate.from_template(reflexionPrompt)
        self.agent = create_react_agent(self.llm, self.tools, self.prompt)
        self.reactAgentExecutor = AgentExecutor(agent=self.agent, tools=self.tools, handle_parsing_errors=True, verbose=True)

In [66]:
reflexionPrompt = '''
You are a React agent tasked with performing an analysis on the background independence of company directors.
Your goal is to gather and analyze data accurately using the tools provided.


You must strictly use the information obtained from these tools and refrain from manipulating or fabricating any details.

You have access to the following tools:
{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
Reflection: Reflect on the observation. Analyze whether the information supports the background independence of the directors. Consider if the result aligns with expectations or if there are any alternative approaches to explore further.
... (this Thought/Action/Action Input/Observation/Reflection cycle can repeat until the analysis is completed)
Thought: I now have completed my task and am ready to provide a well-supported final answer.
Final Answer: Provide your final analysis, ensuring it is backed by the data gathered and reflects a comprehensive understanding of the background independence of the directors.

Begin!

Question: {input}
Thought:{agent_scratchpad}
'''

In [67]:
def addTool(self, tool):
    self.tools.append(tool)
    self.updateChain()

In [68]:
def updateChain(self):
    agent = create_react_agent(self.llm, self.tools, self.prompt)
    self.reactAgentExecutor = AgentExecutor(agent=agent, tools=self.tools, verbose=True)

In [69]:
def runn(self, input):
    output = self.reactAgentExecutor.invoke({"input": input})

    return output

In [70]:
REACTwithReflexion.addTool = addTool
REACTwithReflexion.updateChain = updateChain
REACTwithReflexion.runn = runn

## Formatting method

In [71]:
def formatDocs(self, docs: List[Document], isTop2=False):
    if not isTop2:
        return "\n\n".join(doc.page_content for doc in docs[:2 if isTop2 else 5])

RAG.formatDocs = formatDocs

## RAG chain with Query Decomposition

In [72]:
def makeRagChain(self, reranker: [RerankerRAG] = None,decomposer = None):
    ragPrompt = '''
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know.

    Question: {question}

    Context: {context}

    Answer:
    '''
    prompt = PromptTemplate.from_template(ragPrompt)

    if not self.retriever:
        print("Retriever not initialized. Call createVectorStore first.")
        return

    retriever = reranker.createReranker(self.retriever)



    def retrieveContext(query):

        decomposer.createDecomposerChain()
        subqueries = decomposer.queryAnalyzer.invoke({"question": query})
        toolrun = self.reactWithReflexion.runn(query)
        context = f"Tool output: {toolrun}"
        print(context)
        for subquery in subqueries:
            subqueryText = subquery.subQuery
            docs = retriever.invoke(subqueryText)
            topTwo = docs[:2]
            formattedDocs = self.formatDocs(topTwo)
            context += f"\n{formattedDocs}\n"
        return context

    ragChain = (
        {
            "context": RunnableLambda(retrieveContext),
            "question": RunnablePassthrough()
        }
        | prompt
        | self.llm
        | StrOutputParser()
    )

    return ragChain


RAG.makeRagChain = makeRagChain

# Exec

In [73]:
ten_k_rag = RAG(LLM, embeddings)
ten_k_rag.createVectorStore()

In [74]:
ten_k_rag.addPageContent(TEN_K_URL)

In [75]:
print(f"Number of documents in the vector store: {ten_k_rag.vectorStore.index.ntotal}")

Number of documents in the vector store: 2028


In [76]:
reranker = RerankerRAG( RERANKER_MODEL, TOP_N)

In [77]:
linkedinTool = companyDirectorsLinkedinHandle(ten_k_rag)

In [78]:
backgroundTool = directorBackground(ten_k_rag)

In [79]:
ten_k_rag.reactWithReflexion.addTool(linkedinTool)

In [80]:
ten_k_rag.reactWithReflexion.addTool(backgroundTool)

In [81]:
rag_chain_with_REACTwithReflexion= ten_k_rag.makeRagChain(reranker, QueryDecomposer())

In [82]:
# rag_chain_with_REACTwithReflexion.invoke("Make an analisys on the independence of backgrounds of directors at tesla", )

In [83]:
user_input = "Make an analisys on the independence of backgrounds of directors at General Motors"
rag_chain_with_REACTwithReflexion.invoke(input=user_input, config={"callbacks": [langfuse_handler]})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should start by using the companyDirectorsLinkedinHandle tool to gather the names of the directors at General Motors. Then, I can use the directorsProfessionalExperience tool to look into their education and career backgrounds to analyze their independence.
Action: companyDirectorsLinkedinHandle
Action Input: General Motors[0m[36;1m[1;3mPATRICIA F. RUSSO: https://www.linkedin.com/in/chasemoskowitz
ANEEL BHUSRI: https://www.linkedin.com/in/aneelbhusri
WESLEY G. BUSH: https://www.linkedin.com/in/nicoconut
JOANNE C. CREVOISERAT: https://www.linkedin.com/in/joanne-crevoiserat
LINDA R. GOODEN: No LinkedIn profile found for LINDA R. GOODEN
JOSEPH JIMENEZ: https://www.linkedin.com/in/joseph-jimenez-09a693123
JONATHAN MCNEILL: https://www.linkedin.com/in/jonmcneill1
JUDITH A. MISCIK: No LinkedIn profile found for JUDITH A. MISCIK
THOMAS M. SCHOEWE: No LinkedIn profile found for THOMAS M. SCHOEWE
MARK A. TATUM: https://www.linked

'The analysis on the background independence of directors at General Motors is somewhat limited due to the inability to retrieve detailed information from LinkedIn for some directors. However, based on the LinkedIn handles obtained, it seems that there is a mix of directors with and without LinkedIn profiles, which could potentially impact the transparency and visibility of their backgrounds. Further investigation into the education and career backgrounds of the directors with available information could provide more insights into their independence.'