<a href="https://colab.research.google.com/github/mhassack/challenges/blob/master/2024_12_29_Gemini_rag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
!pip install -q langchain_google_genai langchain_google_community langchain

In [None]:
import os
import logging

from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_community import BigQueryVectorStore
from langchain.prompts import PromptTemplate, StringPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

from langchain_core.runnables import RunnableLambda

In [None]:
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)

In [None]:
RUNNING_IN_COLAB = True

if RUNNING_IN_COLAB:
    logger.debug("Running in Colab")
    from google.colab import userdata
    GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
    from google.colab import auth
    auth.authenticate_user()
    from IPython.display import HTML, display

    def set_css():
      display(HTML('''
      <style>
        pre {
            white-space: pre-wrap;
        }
      </style>
      '''))
    get_ipython().events.register('pre_run_cell', set_css)
else:
    logger.debug("Loading dotenv")
    from dotenv import load_dotenv
    load_dotenv()  # take environment variables from .env.
    GEMINI_API_KEY = os.environ['GEMINI_API_KEY']

# Define the name of the GCP project
GOOGLE_CLOUD_PROJECT = "ai-experiments-445909"

# Define the model to use
#GEMINI_MODEL="gemini-1.5-pro"
GEMINI_MODEL="gemini-2.0-flash-exp"


In [None]:
# Create our embeddings object
logger.debug("Creating the embeddings object")
embedding = GoogleGenerativeAIEmbeddings(
    model="models/text-embedding-004", google_api_key=GEMINI_API_KEY
)

In [None]:
# Define the BQ bits
BIGQUERY_DATASET_ID = "rag_experiments"
BIGQUERY_TABLE_ID = "test_embeddings_overlap"
BIGQUERY_LOCATION = "europe-west2"

# Create the BQ vector store
logger.debug("Create the BigQuery Vector Store")
store = BigQueryVectorStore(
    project_id=GOOGLE_CLOUD_PROJECT,
    dataset_name=BIGQUERY_DATASET_ID,
    table_name=BIGQUERY_TABLE_ID,
    embedding=embedding,
    location=BIGQUERY_LOCATION
)

# Retrieve and generate using the relevant snippets of the blog.
logger.debug("Geetting the BQ store as a retreiver")
retriever = store.as_retriever(search_kwargs={"k": 10})

INFO:langchain_google_community.bq_storage_vectorstores._base:BigQuery table ai-experiments-445909.rag_experiments.test_embeddings_overlap initialized/validated as persistent storage. Access via BigQuery console:
 https://console.cloud.google.com/bigquery?project=ai-experiments-445909&ws=!1m5!1m4!4m3!1sai-experiments-445909!2srag_experiments!3stest_embeddings_overlap


In [None]:
# Create the LLM interface
logger.debug("Create the LLM object")
llm = ChatGoogleGenerativeAI(
    model=GEMINI_MODEL,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    google_api_key=GEMINI_API_KEY
)


In [None]:
#question = "In the context of Asterisk, tell me what IAX is"
question = "In the context of Asterisk, what does the mailboxdetail setting do?"

In [None]:
# Define the non RAG chain
non_rag_chain = (
    llm
    | StrOutputParser())

# Query the LLM using the non RAG chain
response = non_rag_chain.invoke(question)
print(f"Non RAG response: {response}")

In [None]:
def inspect(state):
    print(state)
    return state

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Create the prompt
logger.debug("Defining the LLM prompt")
template_question = """
You are an assistant for question-answering tasks. Use the following pieces of
retrieved context to answer the question. If you don't know the answer, just say
that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""

template_document = """
Please write me a short document that answers the question below.
The document should be approximetly 2 pages of A4 long. If necessary, use the
following pieces of retrieved context to answer the question. If you don't know
the answer, just say that you don't know.
Question: {question}
Context: {context}
Answer:
"""


ragPrompt = PromptTemplate.from_template(template_question)

# Create the RAG chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | ragPrompt
    | RunnableLambda(inspect)
    | llm
    | StrOutputParser()
)

# Query the LLM via the RAG chain
response = rag_chain.invoke(question)
print(f"\n\nRAG response: {response}")

logger.debug("DONE")



text="\nYou are an assistant for question-answering tasks. Use the following pieces of \nretrieved context to answer the question. If you don't know the answer, just say \nthat you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: In the context of Asterisk, what does the mailboxdetail setting do? \nContext: name\nThis is the name of the mailbox owner. The company directory uses the text in this\nfield to allow callers to spell usernames.\nemail\nThis is the email address of the mailbox owner. Asterisk can send voicemail noti-\nfications (including the voicemail message itself) to the specified email box.\n\n[default]\n; regular mailbox with email notification\n101 => 4242,Example Mailbox,somebody@asteriskdocs.org\n; more advanced mailbox with email and pager notification and a couple of\n; special options\n102 => 9855,Another User,another@asteriskdocs.org,pager@asteriskdocs.org,\nattach=no|tz=central\n\nof a simple statement of whether new and old message