<a href="https://colab.research.google.com/github/saif1902/Multimodal_BizPal/blob/main/Multimodal_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Multimodal RAG

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip -qqq install ffmpeg-python

In [3]:
!pip -qqq install yt_dlp

In [4]:
from yt_dlp import YoutubeDL
import os
import re

In [5]:
from posixpath import splitext
class Bizpal_YT_Audiodownload:
  def __init__(self, output_folder):
    self.output_folder = os.path.abspath(output_folder)
    self.audio_files_dict = {}

  def get_safe_filename(self, filename):
    safe_filename = re.sub(r'[^\w\-.]','_', filename)
    safe_filename = re.sub(r'_+','_', safe_filename)
    safe_filename = safe_filename[:50].strip('_')
    return safe_filename

  def download_audio(self, video_url):
    try:
      ydl_opts = {
          'format':'bestaudio/best',
          'postprocessors':[{
              'key': 'FFmpegExtractAudio',
              'preferredcodec':'mp3',
              'preferredquality':'192',
           }],
          'outtnpl': os.path.join(self.output_folder, '%(title)s.%(ext)s'),
          'ignoreerrors': True
          }
      with YoutubeDL(ydl_opts) as ydl:
          info = ydl.extract_info(video_url, download=True)
          filename = ydl.prepare_filename(info)
          base, ext = os.path.splitext(filename)
          new_file = base + '.mp3'

          print(f"Audio file downloaded: {new_file}")
          self.audio_files_dict[video_url] = new_file
          return new_file

    except Exception as e:
          print (f"Error downloading audio from {video_url}: {str(e)}")
          return None

  def download_multiplefiles(self,video_urls):
    for url in video_urls:
      print(f"Processing video: {url}")
      audio_file = self.download_audio(url)
      if audio_file is None:
        print(f"Failed to download audio from video: {url}")

    return self.audio_files_dict






In [6]:
downloader = Bizpal_YT_Audiodownload(output_folder="./data")
# Video URLS for BizPal
video_urls = ["https://www.youtube.com/watch?v=NtvjoVKKW8k",
"https://www.youtube.com/watch?v=Bv1MSHM-kzM&t=0s"]

#Download audio from videos
audio_files = downloader.download_multiplefiles(video_urls)
print("Downloaded audio files:")
for audio_file in audio_files:
  print(audio_file)

Processing video: https://www.youtube.com/watch?v=NtvjoVKKW8k
[youtube] Extracting URL: https://www.youtube.com/watch?v=NtvjoVKKW8k
[youtube] NtvjoVKKW8k: Downloading webpage
[youtube] NtvjoVKKW8k: Downloading ios player API JSON
[youtube] NtvjoVKKW8k: Downloading mweb player API JSON
[youtube] NtvjoVKKW8k: Downloading m3u8 information
[info] NtvjoVKKW8k: Downloading 1 format(s): 251
[download] Destination: BizPal website explained (with tutorial) [NtvjoVKKW8k].webm
[download] 100% of    3.87MiB in 00:00:00 at 5.15MiB/s   
[ExtractAudio] Destination: BizPal website explained (with tutorial) [NtvjoVKKW8k].mp3
Deleting original file BizPal website explained (with tutorial) [NtvjoVKKW8k].webm (pass -k to keep)
Audio file downloaded: BizPal website explained (with tutorial) [NtvjoVKKW8k].mp3
Processing video: https://www.youtube.com/watch?v=Bv1MSHM-kzM&t=0s
[youtube] Extracting URL: https://www.youtube.com/watch?v=Bv1MSHM-kzM&t=0s
[youtube] Bv1MSHM-kzM: Downloading webpage
[youtube] Bv1MSH

In [7]:
!pip -qqq install openai-whisper

In [8]:
import whisper
import torch

In [9]:
# Setting up device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Loading the model
whisper_model = whisper.load_model("medium", device=device)

  checkpoint = torch.load(fp, map_location=device)


In [10]:
class AudioTranscriber:
  def __init__(self,input_folder):
    self.input_folder = os.path.abspath(os.path.join(os.getcwd(),input_folder))
    self.whisper_model = None
    self.transcriptions_dict = {}

  def transcribe_audio(self,audio_file):
    try:
      if not os.path.exists(audio_file):
          print(f"Audio file not found: {audio_file}")
          return None

      file_size = os.path.getsize(audio_file)
      if file_size == 0:
          print(f"Empty audio file: {audio_file}")
          return None

      transcription = self.whisper_model.transcribe(audio_file)
      return transcription["text"]

    except Exception as e:
          print(f"Error transcribing audio file: {audio_file}")
          print(f"Error message: {str(e)}")
          return None

  def transcribe_all_audios(self,audio_files_dict):
    for url,audio_path in audio_files_dict.items():
      if not audio_path.endswith(".mp3"):
        print(f"Skipping non-MP3 file: {audio_path}")
        continue

      transcription = self.transcribe_audio(audio_path)

      if transcription is not None:
        self.transcriptions_dict[url] = {
            'url': url,
            'audio_path': audio_path,
            'transcription': transcription
        }

      else:
        print(f"Transcription failed for file: {audio_path}")

    return self.transcriptions_dict

In [None]:
# Initialize transcriber
transcriber = AudioTranscriber(input_folder="./data")
# Set the whisper model
transcriber.whisper_model = whisper_model
# Transcribe audio
transcriptions_dict = transcriber.transcribe_all_audios(audio_files)

for url,transcription in transcriptions_dict.items():
  print(f"URL: {url}")
  print(f"Audio file: {transcription['audio_path']}")
  print(f"Transcription: {transcription['transcription'][:100]}...")
  print("---")



In [None]:
transcriptions_dict

In [None]:
# Get first key-value pair
first_item = next(iter(transcriptions_dict.items()))

# Extract transcription
first_transcription = first_item[1]['transcription']

# Print the transcription
print(first_transcription)


In [None]:
import json

audio_data = [
    {
        "url": value["url"],
        "audiopath": value["audio_path"],
        "transcription": value["transcription"]
    }
    for value in transcriptions_dict.values()
]
print(json.dumps(audio_data, indent=2))

PDF Document Parsing

In [None]:
!pip -qqq install pdfminer.six
!pip -qqq install pillow-heif==0.3.2
!pip -qqq install matplotlib
!pip -qqq install tesseract-ocr
!pip -qqq install unstructured-inference
!pip -qqq install unstructured-pytesseract

In [None]:
%%bash
apt install tesseract-ocr
apt install libtesseract-dev

In [None]:
!pip install pi-heif==0.7.0

In [None]:
!pip install unstructured

In [None]:
from unstructured.partition.pdf import partition_pdf

In [None]:
bizpal_pdf = "/content/BizPal.pdf"

In [None]:
%%bash
sudo apt-get update
sudo apt-get install poppler-utils

In [None]:
pip install --upgrade nltk


In [None]:
import nltk
import os

# Set a new path for nltk_data, such as a 'nltk_data' folder in the current directory
new_nltk_data_path = os.path.join(os.getcwd(), "nltk_data")
nltk.data.path.append(new_nltk_data_path)

# Download 'punkt' to the new location
nltk.download('punkt', download_dir=new_nltk_data_path)


In [None]:
bizpal_pdfdata = partition_pdf(
    filename = bizpal_pdf,
    strategy="hi_res",
    extract_images_in_pdf=True,
    extract_image_block_to_payload = False,
    extract_image_block_output_dir = "./data/images"
)

In [None]:
bizpal_pdfdata

In [None]:
from unstructured.documents.elements import NarrativeText

In [None]:
def extract_text_with_metadata(bizpal_pdf, source_document):
  text_data = []
  paragraph_counters = {}

  for element in bizpal_pdf:
    if isinstance(element, NarrativeText):
      page_number = element.metadata.page_number

    if page_number not in paragraph_counters:
      paragraph_counters[page_number] = 1
    else:
      paragraph_counters[page_number] += 1

    paragraph_number = paragraph_counters[page_number]

    text_content = element.text

    text_data.append({
        'source_document': source_document,
        'page_number': page_number,
        'paragraph_number': paragraph_number,
        'text': text_content
    })

  return text_data

In [None]:
bizpal_pdf_text_data = extract_text_with_metadata(bizpal_pdfdata, bizpal_pdf)

In [None]:
bizpal_pdf_text_data

Extract Image

In [None]:
from unstructured.documents.elements import Image

In [None]:
def extract_image_metadata(bizpal_pdf, source_document):
  image_data = []

  for element in bizpal_pdf:
    if isinstance(element, Image):
      page_number = element.metadata.page_number
      image_path = element.metadata.image_path if hasattr(element.metadata, 'image_path') else None

      image_data.append({
          'source_document': source_document,
          'page_number': page_number,
          'image_path': image_path
      })

  return image_data

In [None]:
extract_image_metadata = extract_image_metadata(bizpal_pdfdata, bizpal_pdf)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import math

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import math

def display_images_from_metadata(extract_image_metadata, images_per_row=1):
    # Filter out entries with missing image paths
    valid_images = [img for img in extract_image_metadata if img['image_path']]
    if not valid_images:
        print("No valid images found.")
        return

    # Calculate rows and setup figure
    num_images = len(valid_images)
    num_rows = math.ceil(num_images / images_per_row)

    fig, axes = plt.subplots(num_rows, images_per_row, figsize=(20, 5 * num_rows))
    axes = axes.flatten() if num_rows > 1 else [axes]

    for ax, img_data in zip(axes, valid_images):
        try:
            # Open and display image
            img = Image.open(img_data['image_path'])
            ax.imshow(img)
            ax.axis('off')
            ax.set_title(f"Page {img_data['page_number']}", fontsize=10)
        except Exception as e:
            # Handle any errors opening the image
            print(f"Error opening image: {img_data['image_path']}")
            ax.axis('off')  # Hide the axis if image load fails

    # Remove any extra axes
    for ax in axes[num_images:]:
        fig.delaxes(ax)

    plt.tight_layout()
    plt.show()


In [None]:
display_images_from_metadata(extract_image_metadata)

# Extract Table

In [None]:
from unstructured.documents.elements import Table

In [None]:
def extract_table_metadata(bizpal_pdf, source_document):
  table_data = []

  for element in bizpal_pdf:
    if isinstance(element,Table):
      page_number = element.metadata.page_number

      # Extract table content as string
      table_content = str(element)

      table_data.append({
          'source_document': source_document,
          'page_number': page_number,
          'table_content': table_content
      })

  return table_data

In [None]:
extract_table_metadata = extract_table_metadata(bizpal_pdfdata, bizpal_pdf)

Image and Table summarization

In [None]:
!pip -qqq install langchain-core
!pip -qqq install langchain-openai

In [None]:
table_summarizer_prompt = """
As an expert for BizPal program, provide a concise and exact summary of the program. Focus on frequently asked questions and their relevance to BizPal. Highlight potential resources for someone starting out a business. Avoid bullet points; instead, deliver a coherent, factual summary that captures the essence of the BizPal program.
Limit your summary to 3-4 sentences, ensuring it is precise and informative for BizPal users.
"""

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

In [None]:
from google.colab import userdata

In [None]:
OPENAI_API_TOKEN = userdata.get('OPENAI_API_TOKEN')
model_ID = "gpt-4o"
os.environ["OPENAI_API_KEY"] = OPENAI_API_TOKEN

In [None]:
description_model = ChatOpenAI(
    model_name = model_ID,
    temperature = 0
)

In [None]:
def extract_table_metadata_with_summary(bizpal_pdf, source_document, table_summarizer_prompt):
  table_data = []

  prompt = ChatPromptTemplate.from_template(table_summarizer_prompt)

  for element in bizpal_pdf:
    if isinstance(element,Table):
      page_number = element.metadata.page_number

      # Extract table content as string
      table_content = str(element)

      # Summarize
      messages = prompt.format_messages(text=table_content)
      description = description_model(messages).content

      table_data.append({
          'source_document': source_document,
          'page_number': page_number,
          'table_content': table_content,
          'description': description
      })

  return table_data

In [None]:
extract_table_metadata_with_summary= extract_table_metadata_with_summary(bizpal_pdfdata, bizpal_pdf, table_summarizer_prompt)

In [None]:
extract_table_metadata_with_summary

In [None]:
# Check if the list is not empty
if extract_table_metadata_with_summary:
    # Get the first key-value pair
    first_table = extract_table_metadata_with_summary[0]

    # Extract the description
    first_description = first_table['description']

    # Print the description
    print(first_description)
else:
    print("The list 'extract_table_metadata_with_summary' is empty.")


In [None]:
from PIL import Image as PILImage
import base64

In [None]:
image_summarizer_prompt = """
As an expert for BizPal program,provide a concise and exact summary of the program. Focus on frequently asked questions and their relevance to BizPal. Highlight potential resources for someone starting out a business. Avoid bullet points; instead, deliver a coherent, factual summary that captures the essence of the BizPal program.
Image: {image_element}
Limit your summary to 3-4 sentences, ensuring it is precise and informative for BizPal users.
"""

In [None]:
def extract_image_metadata_with_summary(bizpal_pdf, source_document, image_summarizer_prompt):
  image_data = []
  # chatprompt
  prompt = ChatPromptTemplate.from_template(image_summarizer_prompt)

  description_model = ChatOpenAI(
    model_name = model_ID,
    temperature = 0
  )

  for element in bizpal_pdf:
    if "Image" in str(type(element)):
      page_number = element.metadata.page_number if hasattr(element.metadata, 'page_number') else None
      image_path = element.metadata.image_path if hasattr(element.metadata, 'image_path') else None

      if image_path and os.path.exists(image_path):
        messages = prompt.format_messages(image_element=element)
        description = description_model(messages).content

        with open(image_path, "rb") as image_file:
          image_base64 = base64.b64encode(image_file.read()).decode('utf-8')

        image_data.append({
            'source_document': bizpal_pdf,
            'page_number': page_number,
            'image_path': image_path,
            'image_base64': image_base64,
            'description': description
        })

  return image_data

In [None]:
extract_image_metadata_with_summary = extract_image_metadata_with_summary(bizpal_pdfdata, bizpal_pdf, image_summarizer_prompt)

In [None]:
extract_image_metadata_with_summary

Connect to weaviate

In [None]:
!pip -qqq install weaviate-client

In [None]:
import weaviate

In [None]:
#Setup th environment variables
URL = userdata.get('WCS_URL')
APIKEY = userdata.get('WCS_APIKEY')

# Connect to WCS instance
client = weaviate.connect_to_wcs(
    cluster_url = URL,
    auth_credentials = weaviate.auth.AuthApiKey(APIKEY),
    headers = {
        "X-OpenAI-Api-Key": OPENAI_API_TOKEN
    }
)

In [None]:
client

SETUP WEAVIATE database

In [None]:
import weaviate.classes.config as wc

properties = [
    wc.Property(name="source_document", data_type=wc.DataType.TEXT, skip_vectorization=True),
    wc.Property(name="page_number", data_type=wc.DataType.INT, skip_vectorization=True),
    wc.Property(name="paragraph_number", data_type=wc.DataType.INT, skip_vectorization=True),
    wc.Property(name="image_path", data_type=wc.DataType.TEXT, skip_vectorization=True),
    wc.Property(name="text", data_type=wc.DataType.TEXT),
    wc.Property(name="image_base64", data_type=wc.DataType.TEXT, skip_vectorization=True),
    wc.Property(name="description", data_type=wc.DataType.TEXT),
    wc.Property(name="transcription", data_type=wc.DataType.TEXT),
    wc.Property(name="audio_path", data_type=wc.DataType.TEXT, skip_vectorization=True),
    wc.Property(name="url", data_type=wc.DataType.TEXT, skip_vectorization=True),
    wc.Property(name="content_type", data_type=wc.DataType.TEXT, skip_vectorization=True),
    wc.Property(name="table_content", data_type=wc.DataType.TEXT),
]

In [None]:
client.collections.create(
    name="BizPal",
    properties=properties,
    vectorizer_config= None
)

Data Ingestion

In [None]:
from weaviate.util import generate_uuid5
from tqdm import tqdm
from openai import OpenAI
openai_client = OpenAI()

In [None]:
#Function to get embeddings
def get_embeddings(text):
  embeddings = openai_client.embeddings.create(
      input = [text],
      model = "text-embedding-3-large"
  )
  return embeddings.data[0].embedding


# Ingestion function
def ingest_audio_data(collection, audio_data):
  with collection.batch.dynamic() as batch:
    for audio in tqdm(audio_data, desc="Ingesting audio data"):
      vector = get_embeddings(audio["transcription"])
      audio_obj = {
          "url": audio["url"],
          "audio_path": audio["audio_path"],
          "transcription": audio["transcription"],
          "content_type": "audio"
      }
      batch.add_object(
          properties=audio_obj,
          uuid=generate_uuid5(audio_obj),
          vector=vector
      )

def ingest_text_data(collection, text_data):
    with collection.batch.dynamic() as batch:
        for text in tqdm(text_data, desc="Ingesting text data"):
            vector = get_embeddings(text["text"])
            text_obj = {
                "source_document": text["source_document"],
                "page_number": text["page_number"],
                "paragraph_number": text["paragraph_number"],
                "content_type": "text"
            }
            batch.add_object(
                properties=text_obj,
                uuid=generate_uuid5(f"{text['source_document']}_{text['page_number']}_{text['paragraph_number']}"),
                vector=vector
            )

def ingest_image_data(collection, image_data):
  with collection.batch.dynamic() as batch:
    for image in tqdm(image_data, desc="Ingesting image data"):
      vector = get_embeddings(image["description"])
      image_obj = {
          "source_document": image["source_document"],
          "page_number": image["page_number"],
          "image_path": image["image_path"],
          "image_base64": image["image_base64"],
          "description": image["description"],
          "content_type": "image"
      }
      batch.add_object(
          properties=image_obj,
          uuid=generate_uuid5(f"{image['source_document']}_{image['page_number']}_{image['image_path']}"),
          vector=vector
      )

def ingest_table_data(collection, table_data):
  with collection.batch.dynamic() as batch:
    for table in tqdm(table_data, desc="Ingesting table data"):
      vector = get_embeddings(table["description"])
      table_obj = {
          "source_document": table["source_document"],
          "page_number": table["page_number"],
          "table_content": table["table_content"],
          "description": table["description"],
          "content_type": "table"
      }
      batch.add_object(
          properties=table_obj,
          uuid=generate_uuid5(f"{table['source_document']}_{table['page_number']}"),
          vector=vector
      )

def ingest_all_data(collection_name, audio_data, text_data, image_data, table_data):
  collection = client.collections.get(collection_name)
  ingest_audio_data(collection, audio_data)
  ingest_text_data(collection, text_data)
  ingest_image_data(collection, image_data)
  ingest_table_data(collection, table_data)

  if len(collection.batch.failed_object) > 0:
    print(f"Failed to ingest {len(collection.batch.failed_object)} objects")
  else:
    print("All objects ingested successfully")

Start data ingestion

In [None]:
ingest_all_data(collection_name="BizPal", audio_data = audio_data, text_data=extract_text_with_metadata, image_data = extract_image_metadata_with_summary, table_data = extract_table_metadata_with_summary)

In [None]:
try:
  first_item = next(collection.iterator())
  print(first_item.properties)
  print(first_item.vector)
except StopIteration:
  print("Collection is empty")

In [None]:
import weaviate.classes.query as wc

In [None]:
def search_multimodal(query:str, limit: int=3):
  query_vector = get_embeddings(query)

  documents = client.collections.get("BizPal")
  response = documents.query.near_vector(
      near_vector = query_vector,
      limit = limit,
      return_metadata=wq.MetadataQuery(distance=True),
      return_properties=[
          "content_type",
          "source_document",
          "page_number",
          "paragraph_number",
          "url",
          "text",
          "image_path",
          "image_base64",
          "description",
          "transcription",
          "audio_path",
          "table_content"
      ]
  )

  return response.objects

In [None]:
def search_and_print(query:str, limit: int=3):
  response = search_multimodal(query, limit)

  print(f"Query: {query}")
  for item in response:
    print(f"Content type: {item.properties['content_type']}")
    if item.properties['content_type'] == "text":
      print(f"Source document: {item.properties['source_document']}")
      print(f"Page number: {item.properties['page_number']}")
    elif item.properties['content_type'] == "image":
      print(f"Source document: {item.properties['source_document']}")
      print(f"Page number: {item.properties['page_number']}")

    elif item.properties['content_type'] == "audio":
      print(f"Source document: {item.properties['source_document']}")
      print(f"Transcription: {item.properties['transcription']}")

    elif item.properties['content_type'] == "table":
      print(f"Source document: {item.properties['source_document']}")
      print(f"Page number: {item.properties['page_number']}")

    print(f"Distance to query: {item.metadata.distance:.3f}")
    print("---")

In [None]:
query = "What are the main features of BizPal?"
search_and_print(query)

Multimodal RAG for BizPal

In [None]:
def generate_response(query:str, context:str):
  prompt = f"""
  You are an AI assistant specializing in BizPal. Use the following pieces of information to answer the user's question. If you don't know the answer, just say that you don't know, don't try to make up an answer.
  Context: {context}
  User Question: {query}

  Please provide a detailed and informative answer based on the context:
  """
  response = openai_client.chat.completions.create(
      model = "gpt-4-1106-preview",
      messages = [
          {
              "role": "system",
              "content": "You are an expert in BizPal program."
          },
          {
              "role": "user",
              "content": prompt}
      ],
      temperature = 0,
      max_tokens = 2000,
      top_p = 1,
      frequency_penalty = 0,
      presence_penalty = 0
  )
  return response.choices[0].message.content


In [None]:
from typing_extensions import final
def bizpal_analysis(query:str):
  response = search_multimodal(query)
  context = ""
  for item in response:
    if item.properties['content_type'] == "text":
      context += f"Text from {item.properties['source_document']}(Page{item.properties['page_number']}, Paragraph{item.properties['paragraph_number']}\n\n"
    elif item.properties['content_type'] == "image":
      context += f"Image description from {item.properties['source_document']} (Page{item.properties['page_number']},Path: {item.properties['image_path']} \n\n"
    elif item.properties['content_type'] == "audio":
      context += f"Audio Transcription from {item.properties['url']}(Page{item.properties['transcription']}\n\n"
    elif item.properties['content_type'] == "table":
      context += f"Table description from (item.properties['source_document']} (Page {item.properties['page_number']}): {item.properties['description']}

  response = generate_response(query, context)

  sources = []
  for item in response:
    source = {
        "type": item.properties['content_type'],
        "distance": item.metadata.distance
    }
    if item.properties['content_type'] == "text":
      sources.update({
          "source_document": item.properties['source_document'],
          "page_number": item.properties['page_number'],
          "paragraph_number": item.properties['paragraph_number']
      })
    elif item.properties['content_type'] == "image":
      sources.update({
          "source_document": item.properties['source_document'],
          "page_number": item.properties['page_number'],
          "image_path": item.properties['image_path']
      })
    elif item.properties['content_type'] == "audio":
      sources.update({
          "url": item.properties['url'],
          "transcription": item.properties['transcription']
      })
    elif item.properties['content_type'] == "table":
      sources.update({
          "source_document": item.properties['source_document'],
          "page_number": item.properties['page_number']
      })

  sources.append(source)
  sources.sort(key=lambda x: x['distance'])

  final_response = {
      "query": query,
      "response": response,
      "sources": sources
  }

  return final_response

In [None]:
!pip -qqq install textwrap3

In [None]:
import textwrap
def wrap_text(text):
  wrapped_text = textwrap.fill(text, width=100)
  return wrapped_text

In [None]:
def analyze_response_model(userquestion):
  result = bizpal_analysis(userquestion)

  print("Query: ", result["query"])
  print("\nResponse: ", wrap_text(result["response"]))
  print("\nSources: ")
  for source in result["sources"]:
    print(f"- Type: {source['type']}, Distance: {source['distance']:.3f}")
    if source['type'] == "text":
      print(f"Source document: {source['source_document']}, Page number: {source['page_number']}, Paragraph number: {source['paragraph_number']}")
    elif source['type'] == "image":
      print(f"Source document: {source['source_document']}, Page number: {source['page_number']}, Paragraph number: {source['paragraph_number
    elif source['type'] == "audio":
      print(f"  URL: {source['url']}, Transcription: {source['transcription']}")
    elif source['type'] == "table":
      print(f"  Source document: {source['source_document']}, Page number: {source['page_number']}")

  response = result["response"]
  sources = result["sources"]

In [None]:
user_question = "What are the main features of BizPal?"
analyze_response_model(user_question)

In [None]:
user_question = "What are the steps for registering company?"
analyze_response_model(user_question)