# Step 1: Set up the environment

In [1]:
import os
import google.generativeai as genai

if os.getenv("COLAB_RELEASE_TAG"):
   COLAB = True
   print("Running on COLAB environment.")
else:
   COLAB = False
   print("WARNING: Running on LOCAL environment.")


Running on COLAB environment.


In [2]:
# Clone the data repository into colab
!git clone https://github.com/openknowledge/workshop-genai-data.git
PROCESSED_DATA_PATH = "/content/workshop-genai-data/processed/gutenberg/"

Cloning into 'workshop-genai-data'...
remote: Enumerating objects: 28, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (20/20), done.[K
remote: Total 28 (delta 6), reused 20 (delta 3), pack-reused 0 (from 0)[K
Receiving objects: 100% (28/28), 436.74 KiB | 669.00 KiB/s, done.
Resolving deltas: 100% (6/6), done.


In [3]:
# import colab specific lib to read user data (aka colab managed secrets)
from google.colab import userdata

In [4]:
# Initialize Google GenAI Client API with GOOGLE_API_KEY to be able to call the model.
# Note: GEMINI_API_KEY must be set as COLAB userdata before!
GOOGLE_API_KEY=userdata.get('GEMINI_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [5]:
# Install additional libraries
%%capture
!pip install -qU langchain-text-splitters
!pip install chromadb

In [6]:
# Import additional libraries
from langchain_text_splitters import RecursiveCharacterTextSplitter
from chromadb import EphemeralClient
import uuid

In [7]:
# Set default values for model, model parameters and prompt
DEFAULT_MODEL = "gemini-1.5-flash"
DEFAULT_CONFIG_TEMPERATURE = 0.9
DEFAULT_CONFIG_TOP_K = 1
DEFAULT_CONFIG_MAX_OUTPUT_TOKENS = 200
DEFAULT_SYSTEM_PROMPT = "Your are a friendly assistant"
DEFAULT_USER_PROMPT = " "

## Define helper functions

In [8]:
# This will be the chromadb collection we use as a knowledge base. We do not need the in-memory EphemeralClient.
chromadb_collection = EphemeralClient().get_or_create_collection(name="default")

# Have a look into the knowledgebase
def peek_knowledgebase():
  print(chromadb_collection.peek())

In [9]:
def call_genai_model_for_completion(
        model_name: str = DEFAULT_MODEL,
        config_temperature:float = DEFAULT_CONFIG_TEMPERATURE,
        config_top_k: int = DEFAULT_CONFIG_TOP_K,
        config_max_output_tokens: int = DEFAULT_CONFIG_MAX_OUTPUT_TOKENS,
        system_prompt : str = DEFAULT_SYSTEM_PROMPT,
        user_prompt : str = DEFAULT_USER_PROMPT,
        verbose: bool = False
        ):

    if verbose:
        # print out summary of input values / parameters
        print(f'Generating answer for following config:')
        print(f'  - SYSTEM PROMPT used:\n {system_prompt}')
        print(f'  - USER PROMPT used:\n {user_prompt}')
        print(f'  - MODEL used:\n {model_name} (temperature = {config_temperature}, top_k = {config_top_k}, max_output_tokens = {config_max_output_tokens})')

    # create generation config
    model_config = genai.GenerationConfig(
        max_output_tokens=config_max_output_tokens,
        temperature=config_temperature,
        top_k=config_top_k
    )

    # create genai model with generation config
    genai_model = genai.GenerativeModel(
        model_name= model_name,
        generation_config= model_config
    )

    response = genai_model.generate_content([system_prompt, user_prompt])
    return response

In [10]:
def print_completion_result(completion_result, full:bool = False):

    # print out answer of genai model (aka text of response)
    print(f'\nANSWER of genAI model: \n')
    if full:
        print(completion_result)
    else:
        print(completion_result.text)

# Step 2: Configure the genAI models

In [11]:
GENERATION_MODEL = "gemini-1.5-flash"
EMBEDDING_MODEL = "models/text-embedding-004"

# Step 3: Configure retriever

In [12]:
DEFAULT_K = 3
DEFAULT_CHUNK_SIZE = 2000
DEFAULT_CHUNK_OVERLAP = 100

# Step 4: Define RAG Building Blocks

In [13]:
# Get content of books. The content will already be cleansed.
def load_file_content(file_name: str) -> str:
  with open(f"{PROCESSED_DATA_PATH}{file_name}", "r") as f:
    return f.read()

In [14]:
# Building Block "Chunking": Split the content into smaller chunks
def do_chunk(text: str) -> list[str]:
  text_splitter = RecursiveCharacterTextSplitter(
      chunk_size=DEFAULT_CHUNK_SIZE,
      chunk_overlap=DEFAULT_CHUNK_OVERLAP,
      length_function=len,
  )
  return text_splitter.split_text(text=text)

In [15]:
# Building Block "Embedding": Create multi dimensional embeddings for a given chunk.
def do_embed(chunk: str) -> list[float]:
  return genai.embed_content(model=EMBEDDING_MODEL, content=chunk).get("embedding")

def do_batch_embed(chunks: list[str]) -> list[list[float]]:
  return genai.embed_content(model=EMBEDDING_MODEL, content=chunks).get("embedding")

In [16]:
# Building Block "Knowledgebase": Store embeddings and the corresponding content in a vectorstore
def persist_embeddings(chunks: list[str], embeddings: list[float])-> None:
  ids = [str(uuid.uuid4()) for _ in chunks]
  chromadb_collection.add(ids=ids, documents=chunks, embeddings=embeddings)

In [17]:
# Building Block "Augmentation": Create an updated prompt by merging the original user input with the provided context
def augment(user_input: str, context: list[str]) -> str:
  prepared_context = "\n".join(context)
  augmented_prompt = f"""
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
    Context:\n{prepared_context}?\n
    Question: \n{user_input}\n

    Answer:
  """
  return augmented_prompt

In [18]:
# Building Block "Top-k Fetching": Get the k semantically closest chunks to the user input from the knowledgebase
def do_top_k_fetching(user_input_embedding: list[float], k: int) -> list[str]:
  # Since we will do the fetching always only for one user_input,
  # instead of querying for multiple embeddings simultanously as allowed by the choma API,
  # we add the embeddings below to a list and return only the first document (chunk)
  return chromadb_collection.query(
      query_embeddings=[user_input_embedding],
      n_results=k,
  )["documents"][0]

In [19]:
# Building Block "Generation": Use the generation model to create a response
def generate_response(prompt: str) -> None:
  completion_result = call_genai_model_for_completion(
      model_name=GENERATION_MODEL,
      user_prompt=prompt,
  )
  print_completion_result(completion_result)

# Step 5: Create the ingestion pipeline

In [20]:
def do_ingestion(file_names: list[str]) -> None:
  # Ingest file by file
  for file_name in file_names:
    # Load prepared book content
    file_content = load_file_content(file_name)

    # Chunk the content into smaller chunks
    chunks = do_chunk(file_content)

    # Embed the chunks
    embeddings = do_batch_embed(chunks)

    # Persist the embeddings and the chunks in the knowledgebase
    persist_embeddings(chunks, embeddings)


# Step 6: Perform ingestion

In [21]:
# Define file names to be ingested
file_names = ['study_in_scarlett.txt']

# Perform ingestion. Depending on the chunk_size this might take some minutes.
do_ingestion(file_names)

In [22]:
# Use helper function to peek into knowledgebase
peek_knowledgebase()

{'ids': ['da1cc01f-1c64-4f82-9456-2ed6a365604b', '3045aa67-8a28-469f-a15d-82a9a4c31b6c', '7cc5c1e9-2f2b-4460-8e97-a4f9205352ac', '2af8a782-f3eb-4647-986c-4fa9f59a6271', '63cdad02-6f88-48dd-a689-ca8c54469d74', '8c332258-642f-463d-a539-812ab4bbbfda', '3c66f79d-7c93-4f14-b52a-bc3fb6d15262', '07b05615-b98c-4e76-acfc-900c821df04b', '00276b89-8ada-40c2-9d40-5beecf37f2a4', 'b29f7f19-3186-42d1-9cbb-c1bb5cc8f8ef'], 'embeddings': array([[ 0.04527783,  0.04658872, -0.02287614, ..., -0.00307323,
         0.0339922 , -0.05425236],
       [ 0.05111597,  0.00918984, -0.03866111, ...,  0.01949182,
         0.01476763, -0.05980605],
       [ 0.05621178, -0.00796154, -0.01433838, ..., -0.02149183,
         0.0053122 , -0.05220319],
       ...,
       [ 0.02364247,  0.01167416, -0.02995582, ..., -0.04703662,
        -0.01098349, -0.0318405 ],
       [-0.00460235,  0.00886577, -0.03714716, ..., -0.04433972,
         0.01823584, -0.04718024],
       [ 0.03279264, -0.0086745 , -0.0115139 , ..., -0.01131358,

# Step 7: Create RAG pipeline

In [23]:
def do_rag(user_input: str, verbose: bool = False) -> None:
  # Embed the user input
  user_input_embedding = do_embed(chunk=user_input)

  # "R" like "Retrieval": Get the k semantically closest chunks to the user input from the knowledgebase
  context = do_top_k_fetching(user_input_embedding=user_input_embedding, k=DEFAULT_K)
  if verbose:
    print(f'Retrieved context:\n {context}')

  # "A" like "Augmented": Create the augmented prompt
  augmented_prompt = augment(user_input=user_input, context=context)
  if verbose:
    print(f'Augmented prompt:\n {augmented_prompt}')

  # "G" like "Generation": Generate a response
  generate_response(prompt=augmented_prompt)


# Step 8: Perform RAG

In [24]:
# Define user input. This should be a question regarding the ingested book
user_input = "Lucy noticed a number on the ceiling when taking breakfast. which number was written into the ceiling?" # The answer should contain the number "28"

# Perform retrieval
do_rag(user_input=user_input)


ANSWER of genAI model: 

The number written on the ceiling was **28**. 

