# Chat with Graphic PDFs

### Setup and Imports

In [1]:
import torch
from byaldi import RAGMultiModalModel
from pdf2image import convert_from_path
from transformers import LlavaForConditionalGeneration, AutoProcessor

  from .autonotebook import tqdm as notebook_tqdm


### Upload the PDF

In [2]:
pdf_path = "../content/kids.pdf"
images = convert_from_path(pdf_path)

In [3]:
images

[<PIL.PpmImagePlugin.PpmImageFile image mode=RGB size=1667x4167>]

### Load the ColPali Model

In [None]:
RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")

### Query the Document

In [None]:
RAG.index(
   input_path="/content/kids.pdf",
   index_name="image_index",
   store_collection_with_index=False,
   overwrite=True
)

### Query the Document

In [None]:
text_query = "What is needed for Healthy diet?"
results = RAG.search(text_query, k=1)
results

### Retrieved Result

In [None]:
images[results[0]["page_num"] - 1]

### Load the LLaVA Model

In [None]:
checkpoint = "Intel/llava-gemma-2b"
model = LlavaForConditionalGeneration.from_pretrained(checkpoint)
processor = AutoProcessor.from_pretrained(checkpoint)

### Preprocess the Inputs

In [None]:
prompt = processor.tokenizer.apply_chat_template(
   [{'role': 'user', 'content': "<image>\nWhat is written about Healthy Diet?"}],
   tokenize=False,
   add_generation_prompt=True
)
image_index = results[0]["page_num"] - 1
image = images[image_index]
inputs = processor(text=prompt, images=image, return_tensors="pt")

### Generate a response

In [None]:
generate_ids = model.generate(**inputs, max_length=200)
output = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
print(output)