# Architecture

![Screenshot](./flow_diagram.png)

In [21]:
from langchain_community.document_loaders.csv_loader import CSVLoader #To load the csv file (data containing companys faq)
from langchain_huggingface import ChatHuggingFace,HuggingFaceEndpoint,HuggingFaceEmbeddings # Load the llm and embedding model from huggingface
from langchain_chroma import Chroma #Vectorstore to store the embedded vectors
import warnings
warnings.filterwarnings('ignore')

## Step-1 Load the data (company_Q1.csv)

#### load the CSV file that Contains the FAQ question regarding the company

In [2]:
file_path = "./company_QA.csv" #Path to the file
loader = CSVLoader(file_path=file_path) #CSVLoader to load the CSV file
docs = []
for doc in loader.lazy_load(): #Perfom lazy load
    docs.append(doc)


In [3]:
len(docs) # Total number of document object

50

In [4]:
docs[0] #first document object

Document(metadata={'source': './company_QA.csv', 'row': 0}, page_content="Question: Where is the company's headquarters located?\nAnswer: Our headquarters is located in San Francisco, California. Nestled in the vibrant downtown area, it provides easy access to public transport and major city landmarks.")

### Note : No need to performing chunking since each document object is a single row of the CSV file

## Step-2 Load the LLM and Embedding model from HuggingFace

In [10]:
# Load the Mistral 7b model 
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"  #Repo for mistral
api_key = "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" #Api key to access the hugging face
llm = HuggingFaceEndpoint(
    repo_id = repo_id,
    huggingfacehub_api_token=api_key,
    temperature = 0.3, 
    max_new_tokens=200   # Max number of tokens to generate in the final output

)
model = ChatHuggingFace(llm=llm)

# Test the model if working properly
model.invoke("who is messi ?")

AIMessage(content='Lionel Messi is a world-renowned professional footballer from Argentina. He is considered one of the greatest players of all time and has spent the majority of his professional career playing for FC Barcelona in La Liga, where he won numerous accolades, including multiple FIFA World Player of the Year awards and the European Golden Shoe for top goalscorer in Europe. Messi also plays for the Argentina national team. His skills, speed, and precision with the ball have made him an iconic figure in global sports.', additional_kwargs={}, response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=107, prompt_tokens=8, total_tokens=115), 'model': '', 'finish_reason': 'stop'}, id='run-51564ce5-b7ea-4b32-91e5-e57b7bdf6004-0')

In [None]:
#Loading the embedding model 
embedding_model = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-mpnet-base-v2")

## Step-3 Creating a Vectorstore and a retriever

#### a. Chroma vector store to store the embedding vectors
#### b. retriever to fetch the relevant documents based on user query from vectorstore

In [None]:
# Creating and added documents to vector store
db = Chroma.from_documents(docs,  #Document object 
                            embedding_model) #Huggingface embedding model

In [None]:
# Creating a retriever
retriever = db.as_retriever(search_type = "mmr"  #Maximux-marginal-relevance 
                            ,search_kwargs = {'k':2,'lambda_mult':0.4} # 'k': select top 2 similar documents and 'lambda_mult': for diverse documents 
                            )

In [27]:
# Test how well is the retriever working 
query = "can we take pictures inside the office?"
result = retriever.invoke(query)
for index , res in enumerate(result):
    print(f"---Document-{index+1}")
    print(res.page_content)

---Document-1
Question: Is photography allowed inside the headquarters?
Answer: Photography is generally prohibited except in designated areas or with special permission for official events or promotional purposes.
---Document-2
Question: Can clients rent office space temporarily?
Answer: Temporary office space is available for clients during collaborative projects. Terms and availability can be discussed with the account management team.
