diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/.env b/bootstraprag/templates/llamaindex/rag_with_vision/.env new file mode 100644 index 0000000..5663d97 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_vision/.env @@ -0,0 +1 @@ +HF_TOKEN='hf_' \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/image_analyser.py b/bootstraprag/templates/llamaindex/rag_with_vision/image_analyser.py new file mode 100644 index 0000000..073b0f5 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_vision/image_analyser.py @@ -0,0 +1,113 @@ +from huggingface_hub import login +from dotenv import load_dotenv, find_dotenv +from llama_index.multi_modal_llms.huggingface import HuggingFaceMultiModal +from llama_index.core.schema import ImageDocument +import os +from typing import Optional, List, Union + + +class ImageAnalyzer: + """ + A class to analyze images using the Qwen2-VL-2B-Instruct model from HuggingFace. + """ + + def __init__(self, model_name: str = "Qwen/Qwen2-VL-2B-Instruct", max_new_tokens: int = 512): + """ + Initialize the ImageAnalyzer. + + Args: + model_name (str): The name of the HuggingFace model to use + max_new_tokens (int): Maximum number of tokens to generate + """ + # Load environment variables + load_dotenv(find_dotenv()) + + # Login to HuggingFace + self._login() + + # Initialize the model + self.model = HuggingFaceMultiModal.from_model_name( + model_name, + max_new_tokens=max_new_tokens + ) + + def _login(self) -> None: + """ + Login to HuggingFace using the token from environment variables. + + Raises: + ValueError: If HF_TOKEN is not found in environment variables + """ + token = os.environ.get('HF_TOKEN') + if not token: + raise ValueError("HF_TOKEN not found in environment variables") + login(token=token) + + def analyze_image(self, + image_path: str, + prompt: str = "Understand the Image and give the detailed summary.", + additional_images: Optional[List[str]] = None) -> str: + """ + Analyze an image or multiple images with a given prompt. + + Args: + image_path (str): Path to the main image file + prompt (str): The prompt to use for analysis + additional_images (List[str], optional): List of paths to additional images + + Returns: + str: The analysis response from the model + + Raises: + FileNotFoundError: If the image file(s) cannot be found + """ + # Validate main image path + if not os.path.exists(image_path): + raise FileNotFoundError(f"Image not found at path: {image_path}") + + # Create list of image documents + image_documents = [ImageDocument(image_path=image_path)] + + # Add additional images if provided + if additional_images: + for add_image_path in additional_images: + if not os.path.exists(add_image_path): + raise FileNotFoundError(f"Additional image not found at path: {add_image_path}") + image_documents.append(ImageDocument(image_path=add_image_path)) + + # Generate response + print('Started analyzing...') + response = self.model.complete(prompt, image_documents=image_documents) + + return response.text + + def batch_analyze(self, + image_paths: List[str], + prompts: Union[str, List[str]]) -> List[str]: + """ + Analyze multiple images with either a single prompt or multiple prompts. + + Args: + image_paths (List[str]): List of paths to image files + prompts (Union[str, List[str]]): Single prompt or list of prompts matching images + + Returns: + List[str]: List of analysis responses + + Raises: + ValueError: If number of prompts doesn't match number of images when using multiple prompts + """ + if isinstance(prompts, list) and len(prompts) != len(image_paths): + raise ValueError("Number of prompts must match number of images when using multiple prompts") + + results = [] + for idx, image_path in enumerate(image_paths): + current_prompt = prompts[idx] if isinstance(prompts, list) else prompts + try: + result = self.analyze_image(image_path, current_prompt) + results.append(result) + except Exception as e: + print(f"Error analyzing image {image_path}: {str(e)}") + results.append(None) + + return results diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/img.png b/bootstraprag/templates/llamaindex/rag_with_vision/img.png new file mode 100644 index 0000000..ce19090 Binary files /dev/null and b/bootstraprag/templates/llamaindex/rag_with_vision/img.png differ diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/main.py b/bootstraprag/templates/llamaindex/rag_with_vision/main.py new file mode 100644 index 0000000..caece3b --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_vision/main.py @@ -0,0 +1,18 @@ +from image_analyser import ImageAnalyzer +# Basic usage +analyzer = ImageAnalyzer() +result = analyzer.analyze_image("img.png") +print(result) + +# Custom prompt +result = analyzer.analyze_image( + "img.png", + prompt="Describe the main objects in this image" +) + +# Batch analysis +# image_paths = ["img1.png", "img2.png", "img3.png"] +# results = analyzer.batch_analyze( +# image_paths, +# prompts="Analyze this image" +# ) \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/readme.md b/bootstraprag/templates/llamaindex/rag_with_vision/readme.md index 1897d0e..87660aa 100644 --- a/bootstraprag/templates/llamaindex/rag_with_vision/readme.md +++ b/bootstraprag/templates/llamaindex/rag_with_vision/readme.md @@ -1 +1,40 @@ -## In progress \ No newline at end of file +## Image Analysis with Vision LM + +A Python tool for image analysis using Qwen2-VL-2B-Instruct model via Llama-Index. + +### Setup Project + +1. Create `.env` file with your HuggingFace token: + +``` +HF_TOKEN=your_huggingface_token_here +``` + +2. Install dependencies: + +```bash +pip install -r requirements.txt +``` + +3. Run the project: + +```bash +python main.py +``` + +### Requirements + +``` +huggingface_hub +python-dotenv +llama-index +``` + +### Supported Models +```text +Qwen2 Vision +Florence2 +Phi3.5 Vision +PaliGemma +Mllama +``` \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_vision/requirements.txt new file mode 100644 index 0000000..b3e7433 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_vision/requirements.txt @@ -0,0 +1,3 @@ +llama-index==0.11.21 +llama-index-multi-modal-llms-huggingface==0.1.1 +python-dotenv==1.0.1 \ No newline at end of file diff --git a/bootstraprag/templates/qdrant/semantic_cache/__init__.py b/bootstraprag/templates/qdrant/semantic_cache/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/qdrant/semantic_cache/readme.md b/bootstraprag/templates/qdrant/semantic_cache/readme.md new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/qdrant/semantic_routing/__init__.py b/bootstraprag/templates/qdrant/semantic_routing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/qdrant/semantic_routing/readme.md b/bootstraprag/templates/qdrant/semantic_routing/readme.md new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py index 37861bd..6183719 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='bootstrap-rag', - version='0.0.11', + version='0.0.12', long_description=long_description, long_description_content_type="text/markdown", packages=find_packages(),