Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bootstraprag/templates/llamaindex/rag_with_vision/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
HF_TOKEN='hf_'
113 changes: 113 additions & 0 deletions bootstraprag/templates/llamaindex/rag_with_vision/image_analyser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from huggingface_hub import login
from dotenv import load_dotenv, find_dotenv
from llama_index.multi_modal_llms.huggingface import HuggingFaceMultiModal
from llama_index.core.schema import ImageDocument
import os
from typing import Optional, List, Union


class ImageAnalyzer:
"""
A class to analyze images using the Qwen2-VL-2B-Instruct model from HuggingFace.
"""

def __init__(self, model_name: str = "Qwen/Qwen2-VL-2B-Instruct", max_new_tokens: int = 512):
"""
Initialize the ImageAnalyzer.

Args:
model_name (str): The name of the HuggingFace model to use
max_new_tokens (int): Maximum number of tokens to generate
"""
# Load environment variables
load_dotenv(find_dotenv())

# Login to HuggingFace
self._login()

# Initialize the model
self.model = HuggingFaceMultiModal.from_model_name(
model_name,
max_new_tokens=max_new_tokens
)

def _login(self) -> None:
"""
Login to HuggingFace using the token from environment variables.

Raises:
ValueError: If HF_TOKEN is not found in environment variables
"""
token = os.environ.get('HF_TOKEN')
if not token:
raise ValueError("HF_TOKEN not found in environment variables")
login(token=token)

def analyze_image(self,
image_path: str,
prompt: str = "Understand the Image and give the detailed summary.",
additional_images: Optional[List[str]] = None) -> str:
"""
Analyze an image or multiple images with a given prompt.

Args:
image_path (str): Path to the main image file
prompt (str): The prompt to use for analysis
additional_images (List[str], optional): List of paths to additional images

Returns:
str: The analysis response from the model

Raises:
FileNotFoundError: If the image file(s) cannot be found
"""
# Validate main image path
if not os.path.exists(image_path):
raise FileNotFoundError(f"Image not found at path: {image_path}")

# Create list of image documents
image_documents = [ImageDocument(image_path=image_path)]

# Add additional images if provided
if additional_images:
for add_image_path in additional_images:
if not os.path.exists(add_image_path):
raise FileNotFoundError(f"Additional image not found at path: {add_image_path}")
image_documents.append(ImageDocument(image_path=add_image_path))

# Generate response
print('Started analyzing...')
response = self.model.complete(prompt, image_documents=image_documents)

return response.text

def batch_analyze(self,
image_paths: List[str],
prompts: Union[str, List[str]]) -> List[str]:
"""
Analyze multiple images with either a single prompt or multiple prompts.

Args:
image_paths (List[str]): List of paths to image files
prompts (Union[str, List[str]]): Single prompt or list of prompts matching images

Returns:
List[str]: List of analysis responses

Raises:
ValueError: If number of prompts doesn't match number of images when using multiple prompts
"""
if isinstance(prompts, list) and len(prompts) != len(image_paths):
raise ValueError("Number of prompts must match number of images when using multiple prompts")

results = []
for idx, image_path in enumerate(image_paths):
current_prompt = prompts[idx] if isinstance(prompts, list) else prompts
try:
result = self.analyze_image(image_path, current_prompt)
results.append(result)
except Exception as e:
print(f"Error analyzing image {image_path}: {str(e)}")
results.append(None)

return results
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
18 changes: 18 additions & 0 deletions bootstraprag/templates/llamaindex/rag_with_vision/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from image_analyser import ImageAnalyzer
# Basic usage
analyzer = ImageAnalyzer()
result = analyzer.analyze_image("img.png")
print(result)

# Custom prompt
result = analyzer.analyze_image(
"img.png",
prompt="Describe the main objects in this image"
)

# Batch analysis
# image_paths = ["img1.png", "img2.png", "img3.png"]
# results = analyzer.batch_analyze(
# image_paths,
# prompts="Analyze this image"
# )
41 changes: 40 additions & 1 deletion bootstraprag/templates/llamaindex/rag_with_vision/readme.md
Original file line number Diff line number Diff line change
@@ -1 +1,40 @@
## In progress
## Image Analysis with Vision LM

A Python tool for image analysis using Qwen2-VL-2B-Instruct model via Llama-Index.

### Setup Project

1. Create `.env` file with your HuggingFace token:

```
HF_TOKEN=your_huggingface_token_here
```

2. Install dependencies:

```bash
pip install -r requirements.txt
```

3. Run the project:

```bash
python main.py
```

### Requirements

```
huggingface_hub
python-dotenv
llama-index
```

### Supported Models
```text
Qwen2 Vision
Florence2
Phi3.5 Vision
PaliGemma
Mllama
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
llama-index==0.11.21
llama-index-multi-modal-llms-huggingface==0.1.1
python-dotenv==1.0.1
Empty file.
Empty file.
Empty file.
Empty file.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name='bootstrap-rag',
version='0.0.11',
version='0.0.12',
long_description=long_description,
long_description_content_type="text/markdown",
packages=find_packages(),
Expand Down