# Multimodal RAG

This notebook goes over how to build a RAG system that can leverage Image+Text Capabilities of the SambaNova Multimodal models 

In [1]:
import os
import sys
import glob

current_dir = os.getcwd()
kit_dir = os.path.abspath(os.path.join(current_dir, '..'))
repo_dir = os.path.abspath(os.path.join(kit_dir, '..'))

sys.path.append(kit_dir)
sys.path.append(repo_dir)

from dotenv import load_dotenv

load_dotenv(os.path.join(repo_dir, '.env'), override=True)

import requests
import json
import base64
from pprint import pprint

## Multimodal call

In [2]:
from utils.model_wrappers.multimodal_models import SambastudioMultimodal

lvlm=SambastudioMultimodal(
    temperature = 0.01,
    max_tokens_to_generate = 1024
)

### QA Llava Call

In [3]:
prompt = 'how many birds could you find at 4pm:'
image_path = os.path.join(kit_dir, 'data', 'sample_docs', 'sample.png')
lvlm.invoke(prompt, image_path)

'At 4 pm, you could find approximately 12 birds in the image.'

### Summary Llava call

In [4]:
prompt = 'A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the humans question. USER: <image>\nDescribe the image in detail. Be specific about graphs, such as bar plots, scatter plots, or others. ASSISTANT:'
lvlm.invoke(prompt, image_path)

'The image displays a graph showing the number of birds on a tree at different times of the day. The graph is a line graph with a line that goes up and down, indicating the number of birds at various times. The x-axis represents the time of the day, while the y-axis shows the number of birds on the tree. The graph is labeled with the time of the day, such as 12:00, 1:00, 2:00, and so on. The data is presented in a clear and organized manner, making it easy to understand the trend of birds on the tree throughout the day.'

## Doc Extraction

### Unstructured PDF extraction

In [5]:
from unstructured.partition.pdf import partition_pdf

# Path to save images
file_path = os.path.join(kit_dir, 'data', 'sample_docs', 'invoicesample.pdf')
output_path = os.path.splitext(file_path)[0]

# Get elements
raw_pdf_elements = partition_pdf(
    filename=file_path,
    extract_images_in_pdf=True,
    strategy='hi_res',
    hi_res_model_name='yolox',
    # Use layout model (YOLOX) to get bounding boxes (for tables) and find titles
    # Titles are any sub-section of the document
    infer_table_structure=True,
    chunking_strategy='by_title',
    max_characters=1000,
    new_after_n_chars=800,
    combine_text_under_n_chars=500,
    extract_image_block_output_dir=output_path,
)

Some weights of the model checkpoint at microsoft/table-transformer-structure-recognition were not used when initializing TableTransformerForObjectDetection: ['model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### View Elements

In [6]:
for i, element in enumerate(raw_pdf_elements):
    print(f'\033[95m ELEMENT {i}\033[00m')
    print(f'TYPE: {type(element)}')
    print(f'META: {element.metadata.to_dict()}')
    print(f'TEXT: {element.text}')
    print('\n\n##########\n')

[95m ELEMENT 0[00m
TYPE: <class 'unstructured.documents.elements.CompositeElement'>
META: {'filetype': 'application/pdf', 'languages': ['eng'], 'last_modified': '2024-09-09T08:43:38', 'page_number': 1, 'orig_elements': 'eJy9VtuO2zYQ/RVB7UMLmDLvl30rmqLYh2xSrNOXxcKgyJGXWVkSJHqdTdB/LyU5QS5OgBiwAT1ojmZs8pw5Q959yKGGLTRxHXx+leVCa+qAEuRLZRFXlCItMUWUKe+oFqryIl9k+Rai9TbaVPMhd23b+9DYCMMU1/a53cX1A4TNQ0wIZcykmgO8Dz4+JJRIwRLataGJY93dnTG0EIuMcF7w+0X2MaZEFmKMicCqoEeAuSIh+fA8RNiOO3kd3kF921kH+X/pg4cILoa2WbvaDsO669sypeGCSiZkSqhCDfG5g6n29ct8WnCz2dnNtKu7HJpNfj+hQ1xvWx+qABNnFFOOsEnPCusrzq6YHqu7VLludtsS+nG34yIivBv5yP9Yrf66WV2/uslWr8bUj/+7CrGelvu1LIYx70lZIWAAiAvvkHGaIQHWeEm1M4SfTRaiRKES6wIXZGJ9jqUQBRtjTfgo09fxnH+aKJrJ72gStiOtnZ3Wmi/fDNAPy7dtv4Fu+aJ1u5G0YWmHx3W3K+vg1u2+WdqAhmj7CD16DBGlxXTL7a6OIclo6/Vj0+5r8Ol3e4h9gCfolyOLy8FuuxrWvnXDMjRPbXAwQ8sqbHY9IIJI8bbbnLVZPm+Q63H3xxqkUq70BmukjeKIG61QSUud3pjWnlqR2vx8DYLJ1BGMsrkDPgFSFnoCGJeFOQZMJac6l2JxYee+gKZ5zv7eNXZvm8+VubF9b2N4gtWYeUQhji33UjJkTMUQp8nHmnuKDCNEYkwqX+JzKq

In [8]:
# Create a dictionary to store counts of each type
category_counts = {}

for element in raw_pdf_elements:
    category = str(type(element))
    if category in category_counts:
        category_counts[category] += 1
    else:
        category_counts[category] = 1

# Unique_categories will have unique elements
# TableChunk if Table > max chars set above
unique_categories = set(category_counts.keys())
category_counts

{"<class 'unstructured.documents.elements.CompositeElement'>": 2,
 "<class 'unstructured.documents.elements.Table'>": 1}

In [9]:
from langchain.schema import Document


# Categorize by type
categorized_elements = []
for element in raw_pdf_elements:
    if 'unstructured.documents.elements.Table' in str(type(element)):
        meta = element.metadata.to_dict()
        meta['type'] = 'table'
        categorized_elements.append(Document(page_content=element.metadata.text_as_html, metadata=meta))
    elif 'unstructured.documents.elements.CompositeElement' in str(type(element)):
        meta = element.metadata.to_dict()
        meta['type'] = 'text'
        categorized_elements.append(Document(page_content=str(element), metadata=meta))

# Tables
table_docs = [e for e in categorized_elements if e.metadata['type'] == 'table']
print(len(table_docs))

# Text
text_docs = [e for e in categorized_elements if e.metadata['type'] == 'text']
print(len(text_docs))

1
2


### Text and table summaries

In [10]:
from utils.model_wrappers.langchain_llms import SambaNovaCloud, SambaStudio
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import load_prompt

In [11]:
text_prompt = load_prompt(os.path.join(kit_dir, 'prompts', 'llama70b-text_summary.yaml'))
table_prompt = load_prompt(os.path.join(kit_dir, 'prompts', 'llama70b-table_summary.yaml'))

# Summary chain
model = SambaNovaCloud(
    max_tokens=500,
    model='llama3-8b',
)

# model = SambaStudio(
#     streaming=False,
#     model_kwargs={
#         'do_sample': False,
#         'temperature': 0.01,
#         'max_tokens_to_generate': 256,
#         'process_prompt': False,
#         'select_expert': 'Meta-Llama-3-70B-Instruct-4096',
#     },
# )

text_summarize_chain = {'element': lambda x: x} | text_prompt | model | StrOutputParser()
table_summarize_chain = {'element': lambda x: x} | table_prompt | model | StrOutputParser()

### Text Summaries

In [15]:
# Apply to text
texts = [i.page_content for i in text_docs if i.page_content != '']
if texts:
    text_summaries = text_summarize_chain.batch(texts, {'max_concurrency': 1})

In [16]:
text_summaries

['This is an invoice addressed to Denny Gunawan, with the following details:\n\n- Address: 221 Queen St, Melbourne VIC 3000\n- Alternative contact: 123 Somewhere St, Melbourne VIC 3000, Phone: (03) 1234 5678\n- Total amount due: $39.60\n- Invoice number: #20130304',
 'A receipt summary: \n\nA subtotal of $36.00 was calculated, then a 10% GST (Goods and Services Tax) was added, resulting in a total of $39.60.']

### Table summaries

In [17]:
# Apply to tables
tables = [i.page_content for i in table_docs]
if tables:
    table_summaries = table_summarize_chain.batch(tables, {'max_concurrency': 1})

In [18]:
table_summaries

['**Summary of the Table:**\n\nThe table contains a list of 5 fruits with their prices and quantities. The total price for each fruit is also included.\n\n- **Average Price:** $5.51\n- **Total Quantity:** 9\n- **Total Revenue:** $51.27']

### Image summary

In [19]:
prompt = 'Describe the image in detail. Be specific about graphs include name of axis, labels, legends and important numerical information'
image_paths = []
image_paths.extend(glob.glob(os.path.join(output_path, '*.jpg')))
image_paths.extend(glob.glob(os.path.join(output_path, '*.png')))

image_summaries = []
image_docs = []

for image_path in image_paths:
    result = lvlm.invoke(prompt, image_path)
    image_summaries.append(result)
    image_docs.append(
        Document(
            page_content=result,
            metadata={
                'type': 'image',
                'file_directory': os.path.dirname(image_path),
                'filename': os.path.basename(image_path),
            },
        )
    )

In [20]:
image_summaries

["The image features a logo for Sunny Farm, a farm located in Victoria, Australia. The logo is a sun with a yellow background, and it is placed on a white background. The sun is positioned in the middle of the logo, with the rays extending outwards. The logo is likely used to represent the farm's fresh produce and commitment to quality."]

In [21]:
image_docs

[Document(metadata={'type': 'image', 'file_directory': '/Users/jorgep/Documents/ask_public_own/ai-starter-kit-temp/multimodal_knowledge_retriever/data/sample_docs/invoicesample', 'filename': 'figure-1-1.jpg'}, page_content="The image features a logo for Sunny Farm, a farm located in Victoria, Australia. The logo is a sun with a yellow background, and it is placed on a white background. The sun is positioned in the middle of the logo, with the rays extending outwards. The logo is likely used to represent the farm's fresh produce and commitment to quality.")]

### add to vectorstore

In [22]:
import uuid

from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryByteStore
from utils.model_wrappers.api_gateway import APIGateway
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document

# The vectorstore to use to index the child chunks
vectorstore = Chroma(
    collection_name='summaries',
    embedding_function=APIGateway.load_embedding_model(
        type='sambastudio', batch_size=1, coe=True, select_expert='e5-mistral-7b-instruct-8192'
    ),
)

# The storage layer for the parent documents
store = InMemoryByteStore()
id_key = 'doc_id'

# The retriever (empty to start)
retriever = MultiVectorRetriever(vectorstore=vectorstore, docstore=store, id_key=id_key, search_kwargs={'k': 2})

  vectorstore = Chroma(
2024-09-19 11:01:53,588 [INFO] - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


In [23]:
# Add texts
if texts:
    doc_ids = [str(uuid.uuid4()) for _ in text_docs]
    summary_texts = [Document(page_content=s, metadata={id_key: doc_ids[i]}) for i, s in enumerate(text_summaries)]
    retriever.vectorstore.add_documents(summary_texts)
    retriever.docstore.mset(list(zip(doc_ids, text_docs)))

# Add tables
if tables:
    table_ids = [str(uuid.uuid4()) for _ in table_docs]
    summary_tables = [Document(page_content=s, metadata={id_key: table_ids[i]}) for i, s in enumerate(table_summaries)]
    retriever.vectorstore.add_documents(summary_tables)
    retriever.docstore.mset(list(zip(table_ids, table_docs)))

# Add images
if image_summaries:
    img_ids = [str(uuid.uuid4()) for _ in image_summaries]
    summary_img = [Document(page_content=s, metadata={id_key: img_ids[i]}) for i, s in enumerate(image_summaries)]
    retriever.vectorstore.add_documents(summary_img)
    retriever.docstore.mset(list(zip(img_ids, image_docs)))  # Store the image summary as the raw document

In [24]:
retriever.invoke('what is the final price in the invoice?')

[Document(metadata={'filetype': 'application/pdf', 'languages': ['eng'], 'last_modified': '2024-09-09T08:43:38', 'page_number': 1, 'orig_elements': 'eJzNVluL3DYY/SvCNNCWkdDd0r7lqYW228BOHsqyDLp5VuDLxJbbbNP898qXSYZmmmwKs10wGB3r2NI55/vk23dFqEMT2rSLvrgChaCKccooVLS0kGPloRFKwyAFl8wR5iktNqBoQjLeJJM57wrXdb2PrUlhmMe1eejGtLsPcX+fMkIZ05mzwn9En+4zSqRgGT10sU0T7/ZWlijPI1oQxO82YB1TXJbLWJUMqTPAyshIMTwMKTTTTl7Ft6G+ORgXivf5gQ8puBS7dudqMwy7Q9/ZPA0jqRku84Qq1iE9HMLMffVLMS+43Y9mP+/qtgjtvrib0SHtms7HKoZZM4oph1jna4vVFWdXTE3sQ2bu2rGxoZ92Oy0ihbeTHsX2x5fXP4Hffn09zTt+dBtTPa/1n554UcmqMgFKzzzkymiota0gNljZ0lTMcnkxTwRFJEtOCEZ6kvw41hLR2QKpET8LzIz/5ommmNMn9uR78HPXhwbEwzA2wHd114MhJmCyrBvgunbIaw1p7IHx8RAHF9s9CHVMCLys45vRNGDovKnDALw5RDsOoAr9ZOPYIHA9tu6EuAGN2bcGDMGDwWW7+zi8GQNwYz+MwwaE3iRQ5w+OH9829q3ZzIzKuFjHIQ55Sgf8GEFIy7rRaaCuTZ9fE38P22mLZ4IlrKCVNAQ6y3KxM66gDbaEVFfcYG8ZJ+piwdKkRHiq3ek+BWcFKGF6AQjR6jyykj4brqfMzs1oU5dMDf4C2+n+qLIOKjAjsIfEWw2z4AEqhjkUFfFKY+G89RdTn2CipkImudSWwv6ISIrkB7XVeYR+sbj/D/0fpTt1knMXBFSBiqy7ktBU3kHtMNU

In [25]:
retriever.invoke('what is the logo of the company')

[Document(metadata={'type': 'image', 'file_directory': '/Users/jorgep/Documents/ask_public_own/ai-starter-kit-temp/multimodal_knowledge_retriever/data/sample_docs/invoicesample', 'filename': 'figure-1-1.jpg'}, page_content="The image features a logo for Sunny Farm, a farm located in Victoria, Australia. The logo is a sun with a yellow background, and it is placed on a white background. The sun is positioned in the middle of the logo, with the rays extending outwards. The logo is likely used to represent the farm's fresh produce and commitment to quality."),
 Document(metadata={'filetype': 'application/pdf', 'languages': ['eng'], 'last_modified': '2024-09-09T08:43:38', 'page_number': 1, 'orig_elements': 'eJzNVluL3DYY/SvCNNCWkdDd0r7lqYW228BOHsqyDLp5VuDLxJbbbNP898qXSYZmmmwKs10wGB3r2NI55/vk23dFqEMT2rSLvrgChaCKccooVLS0kGPloRFKwyAFl8wR5iktNqBoQjLeJJM57wrXdb2PrUlhmMe1eejGtLsPcX+fMkIZ05mzwn9En+4zSqRgGT10sU0T7/ZWlijPI1oQxO82YB1TXJbLWJUMqTPAyshIMTwMKTTTTl7Ft6G+ORgXivf5gQ8puBS7dudqMwy7Q9/ZPA0jqRk

## Retrieval with raw text, raw tables and image summaries

In [26]:
from langchain.chains import RetrievalQA

prompt = load_prompt(os.path.join(kit_dir, 'prompts', 'llama70b-knowledge_retriever_custom_qa_prompt.yaml'))

chain = RetrievalQA.from_llm(
    llm=model, retriever=retriever, return_source_documents=True, input_key='question', output_key='answer'
)
chain.combine_documents_chain.llm_chain.prompt = prompt

In [27]:
chain.invoke({'question': 'what is the final price in the invoice?'})

{'question': 'what is the final price in the invoice?',
 'answer': 'Based on the provided context, I can see that the final price in the invoice is $39.60. This is clearly stated at the bottom of the invoice.',
 'source_documents': [Document(metadata={'filetype': 'application/pdf', 'languages': ['eng'], 'last_modified': '2024-09-09T08:43:38', 'page_number': 1, 'orig_elements': 'eJzNVluL3DYY/SvCNNCWkdDd0r7lqYW228BOHsqyDLp5VuDLxJbbbNP898qXSYZmmmwKs10wGB3r2NI55/vk23dFqEMT2rSLvrgChaCKccooVLS0kGPloRFKwyAFl8wR5iktNqBoQjLeJJM57wrXdb2PrUlhmMe1eejGtLsPcX+fMkIZ05mzwn9En+4zSqRgGT10sU0T7/ZWlijPI1oQxO82YB1TXJbLWJUMqTPAyshIMTwMKTTTTl7Ft6G+ORgXivf5gQ8puBS7dudqMwy7Q9/ZPA0jqRku84Qq1iE9HMLMffVLMS+43Y9mP+/qtgjtvrib0SHtms7HKoZZM4oph1jna4vVFWdXTE3sQ2bu2rGxoZ92Oy0ihbeTHsX2x5fXP4Hffn09zTt+dBtTPa/1n554UcmqMgFKzzzkymiota0gNljZ0lTMcnkxTwRFJEtOCEZ6kvw41hLR2QKpET8LzIz/5ommmNMn9uR78HPXhwbEwzA2wHd114MhJmCyrBvgunbIaw1p7IHx8RAHF9s9CHVMCLys45vRNGDovKnDALw5RDsOoAr9ZOPYIHA9tu6EuAGN2bcGDMGDwWW7+zi8GQNwYz+MwwaE3iRQ5w+OH98

In [28]:
chain.invoke('what is the logo of the company')

{'question': 'what is the logo of the company',
 'answer': "Based on the provided context, I can see that there is a mention of a logo for Sunny Farm, a farm located in Victoria, Australia. The description of the logo is a sun with a yellow background, placed on a white background. However, I don't see any further information about the logo in the context.\n\nThe second context appears to be a receipt or an invoice, with a subtotal, GST, and total amount. There is no mention of a company logo in this context.\n\nTherefore, I can only provide information about the logo of Sunny Farm based on the first context. The logo of Sunny Farm is a sun with a yellow background, placed on a white background.",
 'source_documents': [Document(metadata={'type': 'image', 'file_directory': '/Users/jorgep/Documents/ask_public_own/ai-starter-kit-temp/multimodal_knowledge_retriever/data/sample_docs/invoicesample', 'filename': 'figure-1-1.jpg'}, page_content="The image features a logo for Sunny Farm, a farm

## Retrieval with raw text, raw tables and raw images

In [39]:
query = 'what is the logo of the company?'

In [40]:
retriever.invoke(query)

[Document(metadata={'type': 'image', 'file_directory': '/Users/jorgep/Documents/ask_public_own/ai-starter-kit-temp/multimodal_knowledge_retriever/data/sample_docs/invoicesample', 'filename': 'figure-1-1.jpg'}, page_content="The image features a logo for Sunny Farm, a farm located in Victoria, Australia. The logo is a sun with a yellow background, and it is placed on a white background. The sun is positioned in the middle of the logo, with the rays extending outwards. The logo is likely used to represent the farm's fresh produce and commitment to quality."),
 Document(metadata={'filetype': 'application/pdf', 'languages': ['eng'], 'last_modified': '2024-09-09T08:43:38', 'page_number': 1, 'orig_elements': 'eJzNVluL3DYY/SvCNNCWkdDd0r7lqYW228BOHsqyDLp5VuDLxJbbbNP898qXSYZmmmwKs10wGB3r2NI55/vk23dFqEMT2rSLvrgChaCKccooVLS0kGPloRFKwyAFl8wR5iktNqBoQjLeJJM57wrXdb2PrUlhmMe1eejGtLsPcX+fMkIZ05mzwn9En+4zSqRgGT10sU0T7/ZWlijPI1oQxO82YB1TXJbLWJUMqTPAyshIMTwMKTTTTl7Ft6G+ORgXivf5gQ8puBS7dudqMwy7Q9/ZPA0jqRk

In [41]:
chain.invoke({'question': query})

{'question': 'what is the logo of the company?',
 'answer': 'Based on the provided context, I can see that there are two separate pieces of information. The first context describes a logo for Sunny Farm, a farm located in Victoria, Australia. The logo features a sun with a yellow background on a white background.\n\nThe second context appears to be a receipt or an invoice with a subtotal, GST, and total amount. There is no mention of a company logo in this context.\n\nTherefore, I can answer your question based on the first context. The logo of the company is a sun with a yellow background on a white background.',
 'source_documents': [Document(metadata={'type': 'image', 'file_directory': '/Users/jorgep/Documents/ask_public_own/ai-starter-kit-temp/multimodal_knowledge_retriever/data/sample_docs/invoicesample', 'filename': 'figure-1-1.jpg'}, page_content="The image features a logo for Sunny Farm, a farm located in Victoria, Australia. The logo is a sun with a yellow background, and it i

### filter image results

In [42]:
def get_retrieved_images(retriever, query):
    results = retriever.invoke(query)
    results = [result for result in results if result.metadata['type'] == 'image']
    return results

In [43]:
retrieved_images = get_retrieved_images(retriever, query)
retrieved_images

[Document(metadata={'type': 'image', 'file_directory': '/Users/jorgep/Documents/ask_public_own/ai-starter-kit-temp/multimodal_knowledge_retriever/data/sample_docs/invoicesample', 'filename': 'figure-1-1.jpg'}, page_content="The image features a logo for Sunny Farm, a farm located in Victoria, Australia. The logo is a sun with a yellow background, and it is placed on a white background. The sun is positioned in the middle of the logo, with the rays extending outwards. The logo is likely used to represent the farm's fresh produce and commitment to quality.")]

### Generate response over retrieved raw images 

In [44]:
def get_image_answers(retrieved_image_docs, query):
    image_answer_prompt_template = load_prompt(os.path.join(kit_dir, 'prompts', 'multimodal-qa.yaml'))
    image_answer_prompt = image_answer_prompt_template.format(question=query)
    answers = []
    for doc in retrieved_image_docs:
        image_path = os.path.join(doc.metadata['file_directory'], doc.metadata['filename'])
        answers.append(lvlm.invoke(image_answer_prompt, image_path))
    return answers

In [45]:
image_answers = get_image_answers(retrieved_images, query)
image_answers

['The logo of the company is a sunny farm.']

In [46]:
def get_retrieved_docs(retriever, query):
    results = retriever.invoke(query)
    results = [result for result in results if result.metadata['type'] != 'image']
    return results

In [47]:
context_docs = get_retrieved_docs(retriever, query)
context_docs

[Document(metadata={'filetype': 'application/pdf', 'languages': ['eng'], 'last_modified': '2024-09-09T08:43:38', 'page_number': 1, 'orig_elements': 'eJzNVluL3DYY/SvCNNCWkdDd0r7lqYW228BOHsqyDLp5VuDLxJbbbNP898qXSYZmmmwKs10wGB3r2NI55/vk23dFqEMT2rSLvrgChaCKccooVLS0kGPloRFKwyAFl8wR5iktNqBoQjLeJJM57wrXdb2PrUlhmMe1eejGtLsPcX+fMkIZ05mzwn9En+4zSqRgGT10sU0T7/ZWlijPI1oQxO82YB1TXJbLWJUMqTPAyshIMTwMKTTTTl7Ft6G+ORgXivf5gQ8puBS7dudqMwy7Q9/ZPA0jqRku84Qq1iE9HMLMffVLMS+43Y9mP+/qtgjtvrib0SHtms7HKoZZM4oph1jna4vVFWdXTE3sQ2bu2rGxoZ92Oy0ihbeTHsX2x5fXP4Hffn09zTt+dBtTPa/1n554UcmqMgFKzzzkymiota0gNljZ0lTMcnkxTwRFJEtOCEZ6kvw41hLR2QKpET8LzIz/5ommmNMn9uR78HPXhwbEwzA2wHd114MhJmCyrBvgunbIaw1p7IHx8RAHF9s9CHVMCLys45vRNGDovKnDALw5RDsOoAr9ZOPYIHA9tu6EuAGN2bcGDMGDwWW7+zi8GQNwYz+MwwaE3iRQ5w+OH9829q3ZzIzKuFjHIQ55Sgf8GEFIy7rRaaCuTZ9fE38P22mLZ4IlrKCVNAQ6y3KxM66gDbaEVFfcYG8ZJ+piwdKkRHiq3ek+BWcFKGF6AQjR6jyykj4brqfMzs1oU5dMDf4C2+n+qLIOKjAjsIfEWw2z4AEqhjkUFfFKY+G89RdTn2CipkImudSWwv6ISIrkB7XVeYR+sbj/D/0fpTt1knMXBFSBiqy7ktBU3kHtMNU

In [48]:
prompt = load_prompt(os.path.join(kit_dir, 'prompts', 'llama70b-knowledge_retriever_custom_qa_prompt.yaml'))
text_contexts = [doc.page_content for doc in context_docs]
full_context = '\n\n'.join(image_answers) + '\n\n' + '\n\n'.join(text_contexts)
formated_prompt = prompt.format(context=full_context, question=query)
formated_prompt
model.invoke(formated_prompt)

'Based on the provided context, I can tell you that the logo of the company is a sunny farm.'

This example workflow is consolidated in the provided [multimodal rag src module](../src/multimodal.py) to see an usage example please refer to the [multimodal rag notebook](./3_multimodal_rag_usage.ipynb) 