### API key

In [None]:
from google.colab import userdata
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')

### Install Libraries

In [None]:
# Install necessary libraries
! pip install langchain-google-genai
! pip install faiss-cpu
! pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.15-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.0-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB

### Retrieval-Augmented Generation chatbot with FAISS and LangChain using CSV file

This code demonstrates how to create a question-answering chatbot using employee data stored in a CSV file.

The system uses LangChain, FAISS (vector database), and Google Generative AI for embedding and generating answers based on user queries.

In [None]:
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
import google.generativeai as genai
from langchain.prompts import PromptTemplate
from google.colab import userdata

# Fetching the API key from user data
google_api_key = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=google_api_key)

def load_csv_to_text(csv_file_path):
    # Load CSV into a DataFrame
    dataframe = pd.read_csv(csv_file_path)

    # Convert the DataFrame rows into a concatenated string of text
    concatenated_text = ""
    for idx, row in dataframe.iterrows():
        concatenated_text += ' '.join(row.astype(str).values) + "\n"

    return concatenated_text

def partition_text_into_segments(text_content):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    text_segments = text_splitter.split_text(text_content)
    return text_segments

def build_vector_index_from_segments(text_segments):
    # Create embeddings using the Generative AI model
    embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=google_api_key)

    # Create a FAISS vector store using the text segments
    vector_index = FAISS.from_texts(text_segments, embedding=embedding_model)

    # Save the index locally as 'vector_index'
    vector_index.save_local("vector_index")

    return vector_index

def create_query_answering_chain(vector_index):
    # Define a prompt template for answering questions from the context
    prompt_template = PromptTemplate(
        input_variables=["context", "question"],
        template="""
        Provide a detailed answer to the question based on the context provided. If the answer is not in the context, reply with, "answer is not available in the context". Do not provide incorrect answers.\n\n
        Context:\n {context}?\n
        Question: \n{question}\n
        Answer:
        """
    )

    # Initialize the ChatGoogleGenerativeAI model for QA
    chat_model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, google_api_key=google_api_key)

    # Set up the RetrievalQA chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=chat_model,
        chain_type="stuff",
        retriever=vector_index.as_retriever(),
        chain_type_kwargs={"prompt": prompt_template}  # Pass the prompt template
    )

    return qa_chain

def process_user_query(query):
    # Create embeddings for the user's query using the Google Generative AI model
    query_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=google_api_key)

    # Load the FAISS vector store from the local index file
    faiss_index = FAISS.load_local("vector_index", query_embeddings, allow_dangerous_deserialization=True)

    # Retrieve documents that are similar to the query
    relevant_documents = faiss_index.similarity_search(query)

    # Create the vector store using the text chunks
    vector_index = build_vector_index_from_segments(text_segments)

    # Initialize the QA chain
    qa_chain = create_query_answering_chain(vector_index)

    # Get the answer using the query and the retrieved documents
    response = qa_chain.invoke(
        {"input_documents": relevant_documents, "query": query}, return_only_outputs=True
    )

    # Output the response
    print(f"Response: {response['result']}")

# Load the employee data and create the FAISS index
csv_file_path = "/content/employee_data.csv"  # Path to your employee data CSV
csv_text = load_csv_to_text(csv_file_path)
text_segments = partition_text_into_segments(csv_text)
build_vector_index_from_segments(text_segments)

# Continuous loop to handle user queries
while True:
    user_input = input("Ask a Question (or type 'exit' to quit): ")

    if user_input.lower() == "exit":
        print("Exiting the program.")
        break

    print(f"User query: {user_input}")
    if user_input:
        process_user_query(user_input)


Ask a Question(or type 'exit' to quit): who is Assistant?
query: who is Assistant?
response:Eve
Ask a Question(or type 'exit' to quit): Eve salary?
query: Eve salary?
response:60000
Ask a Question(or type 'exit' to quit): alice department?
query: alice department?
response:answer is not available in the context
Ask a Question(or type 'exit' to quit): Alice position?
query: Alice position?
response:HR Manager
Ask a Question(or type 'exit' to quit): Exit
Exiting the program.


### Install Libraries

In [None]:
# Install necessary libraries
!pip install --quiet langchain langchain_community langchain-google-genai chromadb

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.1/611.1 kB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m59.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.6/278.6 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m70.2 MB/s[0m eta [36m0:00:00

### Retrieval-Augmented Generation chatbot with Chroma and LangChain from Webpage

This code demonstrates how to build a Retrieval-Augmented Generation (RAG) system using Google Generative AI, LangChain, and Chroma.

It enables users to input questions, retrieve relevant data from a vector store, and generate responses using a large language model.

In [None]:
# Import necessary libraries
import os
from IPython import get_ipython
from IPython.display import display
from langchain import PromptTemplate, hub
from langchain.docstore.document import Document
from langchain_community.document_loaders import WebBaseLoader
from langchain.schema import StrOutputParser
from langchain.schema.prompt_template import format_document
from langchain.schema.runnable import RunnablePassthrough
from langchain.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI



In [None]:
# Fetching the API key from user data
api_key = userdata.get('GOOGLE_API_KEY')

# Function to load and clean data from a URL
def fetch_and_clean_data(source_url):
    data_loader = WebBaseLoader(source_url)
    documents = data_loader.load()
    content = documents[0].page_content
    cleaned_content = content.split("A note from Google and Alphabet CEO Sundar Pichai:", 1)[1]
    final_cleaned_content = cleaned_content.split(" Gemini's more complex reasoning abilities.", 1)[0]
    return [Document(page_content=final_cleaned_content, metadata={"source": "local"})]

# Function to initialize embeddings for a specified model
def setup_embeddings(model_identifier):
    return GoogleGenerativeAIEmbeddings(model=model_identifier, google_api_key=api_key)

# Function to generate a vector store from documents and embeddings
def build_vector_index(docs, embeddings, storage_path):
    vector_db = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory=storage_path)
    return vector_db

# Function to create a retriever from an existing vector store
def prepare_retriever(index_disk, search_parameters):
    return index_disk.as_retriever(search_kwargs=search_parameters)

# Function to initialize a language model (LLM)
def configure_llm(model_identifier, temp_value, probability_value):
    return ChatGoogleGenerativeAI(model=model_identifier, temperature=temp_value, top_p=probability_value, google_api_key=api_key)

# Function to generate a custom prompt template
def setup_prompt_template(template_string):
    return PromptTemplate.from_template(template_string)

# Function to build the RAG chain
def create_rag_query_chain(retriever, query_template, language_model):
    def concatenate_documents(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    return (
        {"context": retriever | concatenate_documents, "question": RunnablePassthrough()}
        | query_template
        | language_model
        | StrOutputParser()
    )

# Function to process a query through the chain
def execute_query(query_chain, query_input):
    return query_chain.invoke(query_input)


# Example usage of the updated functions
documents = fetch_and_clean_data("https://blog.google/technology/ai/google-gemini-ai/")

# Set up embeddings and create vector store
gemini_model_embeddings = setup_embeddings("models/embedding-001")
vector_db = build_vector_index(documents, gemini_model_embeddings, "./chroma_storage")

# Prepare the retriever and search parameters
index_disk = Chroma(persist_directory="./chroma_storage", embedding_function=gemini_model_embeddings)
search_retriever = prepare_retriever(index_disk, search_parameters={"k": 1})

# Configure the LLM (Language Model)
language_model = configure_llm("gemini-pro", temp_value=0.7, probability_value=0.85)

# Define the prompt template
prompt_template = """You are an assistant for answering questions based on the provided context.
If you don't know the answer, simply state that you don't know.
Your answer should be brief, with no more than five sentences.\n
Question: {question} \nContext: {context} \nAnswer:"""
formatted_prompt = setup_prompt_template(prompt_template)


In [None]:
# Loop for continuously asking questions until user exits
while True:
    # Prompt user for their query
    user_query = input("Enter your query (or type 'exit' to quit): ")

    # If user types 'exit', break the loop and exit
    if user_query.lower() == "exit":
        print("Exiting the program.")
        break

    # Create the RAG chain for question-answering
    rag_chain = create_rag_query_chain(search_retriever, formatted_prompt, language_model)

    # Execute the query with the user input and get the result
    query_result = execute_query(rag_chain, user_query)

    # Print the response to the query
    print(f"Response: {query_result}")


Enter your query (or type 'exit' to quit): who is Sundar Pichai?
Response: Sundar Pichai is the CEO of Google DeepMind, a company focused on developing AI technology.
Enter your query (or type 'exit' to quit): what is gemini
Response: Gemini is a multimodal AI model developed by Google DeepMind. It is designed to understand and operate across different types of information, including text, code, audio, image, and video. Gemini is optimized for different sizes, including Ultra, Pro, and Nano, to meet the needs of developers and enterprise customers. It has state-of-the-art performance on a wide variety of tasks, including natural image, audio, and video understanding, mathematical reasoning, and text and coding. Gemini is the first model to outperform human experts on the MMLU benchmark, which tests both world knowledge and problem-solving abilities.
Enter your query (or type 'exit' to quit): who is CEO and Co-Founder of Google DeepMind?
Response: Demis Hassabis is the CEO and Co-Founde