# Multi-modal RAG with LangChain, Groq, Gemini models

## SetUp

Install the dependencies you need to run the notebook.

In [15]:
import streamlit as st
import os
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
import time

In [16]:
load_dotenv()

groq_api_key = os.getenv('GROQ_API_KEY')
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")

prompt = ChatPromptTemplate.from_template("""
Answer the questions based on the provided context only.
Please provide the most accurate response based on the question
<context>
{context}
<context>
Questions:{input}
""")


In [17]:
uploaded_folder = './uploaded_docs'
if not os.path.exists(uploaded_folder):
    os.makedirs(uploaded_folder)

In [18]:
def vector_embedding():
    if "vectors" not in globals():
        embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        loader = PyPDFDirectoryLoader(uploaded_folder)
        docs = loader.load()

        if len(docs) < 4:
            raise ValueError("Not enough documents loaded. Please ensure there are at least 4 documents.")

        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        final_documents = text_splitter.split_documents(docs[:4])
        vectors = FAISS.from_documents(final_documents, embeddings)
        globals()['vectors'] = vectors
    return globals()['vectors']

In [19]:
def main():
    prompt1 = input("Enter Your Question From Documents: ")

    if input("Initialize Document Embeddings? (yes/no): ").lower() == 'yes':
        vector_embedding()
        if 'vectors' in globals():
            print("Document embeddings have been initialized successfully.")
        else:
            print("Failed to initialize document embeddings. Please check the uploaded documents and try again.")

    if 'vectors' in globals():
        if prompt1:
            try:
                document_chain = create_stuff_documents_chain(llm, prompt)
                retriever = globals()['vectors'].as_retriever()
                retrieval_chain = create_retrieval_chain(retriever, document_chain)
                start = time.process_time()
                response = retrieval_chain.invoke({'input': prompt1})
                print("Response time:", time.process_time() - start)
            except Exception as e:
                print(f"An error occurred: {str(e)}")
                return
            print(response['answer'])

            # Display document similarity search results
            print("Document Similarity Search Results:")
            for i, doc in enumerate(response["context"]):
                print(doc.page_content)
                print("--------------------------------")
    else:
        print("Please initialize the document embeddings first by typing 'yes' when prompted.")

if __name__ == "__main__":
    main()

Document embeddings have been initialized successfully.
Response time: 0.0625
The name on the certificate is Yash Kumar Saini.
Document Similarity Search Results:
Jakarta EE Servlets
Course completed by Yash Kumar Saini 
Nov 11, 2024 at 06:14PM UTC  2 hours 36 minutes•
Top skills covered
 Jakarta EE Jakarta Servlet
Certificate ID: 0967c7afc280adf5029a6b5a1cf9441b8f45c6ef389ffa2ce4d7d51d3f50862a
Head of Global Content, Learning
Jakarta EE Servlets
Course completed by Yash Kumar Saini 
Nov 11, 2024 at 06:14PM UTC  2 hours 36 minutes•
Top skills covered
 Jakarta EE Jakarta Servlet
Certificate ID: 0967c7afc280adf5029a6b5a1cf9441b8f45c6ef389ffa2ce4d7d51d3f50862a
Head of Global Content, Learning
--------------------------------
Java: Data Structures
Course completed by Yash Kumar Saini 
Aug 31, 2024 at 04:33AM UTC  49 minutes•
Top skills covered
 Java Data Structures
Certificate ID: 2d88c7c63e99d0cab5fd3debcdbf58eab07db4c1b91162afdb95c1130aae6fc9
Head of Global Content, Learning
Java: Data S