pavanjava · pavanjava · Nov 3, 2024 · Oct 28, 2024
diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/.env b/bootstraprag/templates/llamaindex/rag_with_vision/.env
@@ -0,0 +1 @@
+HF_TOKEN='hf_'
diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/image_analyser.py b/bootstraprag/templates/llamaindex/rag_with_vision/image_analyser.py
@@ -0,0 +1,113 @@
+from huggingface_hub import login
+from dotenv import load_dotenv, find_dotenv
+from llama_index.multi_modal_llms.huggingface import HuggingFaceMultiModal
+from llama_index.core.schema import ImageDocument
+import os
+from typing import Optional, List, Union
+
+
+class ImageAnalyzer:
+    """
+    A class to analyze images using the Qwen2-VL-2B-Instruct model from HuggingFace.
+    """
+
+    def __init__(self, model_name: str = "Qwen/Qwen2-VL-2B-Instruct", max_new_tokens: int = 512):
+        """
+        Initialize the ImageAnalyzer.
+
+        Args:
+            model_name (str): The name of the HuggingFace model to use
+            max_new_tokens (int): Maximum number of tokens to generate
+        """
+        # Load environment variables
+        load_dotenv(find_dotenv())
+
+        # Login to HuggingFace
+        self._login()
+
+        # Initialize the model
+        self.model = HuggingFaceMultiModal.from_model_name(
+            model_name,
+            max_new_tokens=max_new_tokens
+        )
+
+    def _login(self) -> None:
+        """
+        Login to HuggingFace using the token from environment variables.
+
+        Raises:
+            ValueError: If HF_TOKEN is not found in environment variables
+        """
+        token = os.environ.get('HF_TOKEN')
+        if not token:
+            raise ValueError("HF_TOKEN not found in environment variables")
+        login(token=token)
+
+    def analyze_image(self,
+                      image_path: str,
+                      prompt: str = "Understand the Image and give the detailed summary.",
+                      additional_images: Optional[List[str]] = None) -> str:
+        """
+        Analyze an image or multiple images with a given prompt.
+
+        Args:
+            image_path (str): Path to the main image file
+            prompt (str): The prompt to use for analysis
+            additional_images (List[str], optional): List of paths to additional images
+
+        Returns:
+            str: The analysis response from the model
+
+        Raises:
+            FileNotFoundError: If the image file(s) cannot be found
+        """
+        # Validate main image path
+        if not os.path.exists(image_path):
+            raise FileNotFoundError(f"Image not found at path: {image_path}")
+
+        # Create list of image documents
+        image_documents = [ImageDocument(image_path=image_path)]
+
+        # Add additional images if provided
+        if additional_images:
+            for add_image_path in additional_images:
+                if not os.path.exists(add_image_path):
+                    raise FileNotFoundError(f"Additional image not found at path: {add_image_path}")
+                image_documents.append(ImageDocument(image_path=add_image_path))
+
+        # Generate response
+        print('Started analyzing...')
+        response = self.model.complete(prompt, image_documents=image_documents)
+
+        return response.text
+
+    def batch_analyze(self,
+                      image_paths: List[str],
+                      prompts: Union[str, List[str]]) -> List[str]:
+        """
+        Analyze multiple images with either a single prompt or multiple prompts.
+
+        Args:
+            image_paths (List[str]): List of paths to image files
+            prompts (Union[str, List[str]]): Single prompt or list of prompts matching images
+
+        Returns:
+            List[str]: List of analysis responses
+
+        Raises:
+            ValueError: If number of prompts doesn't match number of images when using multiple prompts
+        """
+        if isinstance(prompts, list) and len(prompts) != len(image_paths):
+            raise ValueError("Number of prompts must match number of images when using multiple prompts")
+
+        results = []
+        for idx, image_path in enumerate(image_paths):
+            current_prompt = prompts[idx] if isinstance(prompts, list) else prompts
+            try:
+                result = self.analyze_image(image_path, current_prompt)
+                results.append(result)
+            except Exception as e:
+                print(f"Error analyzing image {image_path}: {str(e)}")
+                results.append(None)
+
+        return results
diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/img.png b/bootstraprag/templates/llamaindex/rag_with_vision/img.png
diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/main.py b/bootstraprag/templates/llamaindex/rag_with_vision/main.py
@@ -0,0 +1,18 @@
+from image_analyser import ImageAnalyzer
+# Basic usage
+analyzer = ImageAnalyzer()
+result = analyzer.analyze_image("img.png")
+print(result)
+
+# Custom prompt
+result = analyzer.analyze_image(
+    "img.png",
+    prompt="Describe the main objects in this image"
+)
+
+# Batch analysis
+# image_paths = ["img1.png", "img2.png", "img3.png"]
+# results = analyzer.batch_analyze(
+#     image_paths,
+#     prompts="Analyze this image"
+# )
diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/readme.md b/bootstraprag/templates/llamaindex/rag_with_vision/readme.md
@@ -1 +1,40 @@
-## In progress
+## Image Analysis with Vision LM
+
+A Python tool for image analysis using Qwen2-VL-2B-Instruct model via Llama-Index.
+
+### Setup Project
+
+1. Create `.env` file with your HuggingFace token:
+
+```
+HF_TOKEN=your_huggingface_token_here
+```
+
+2. Install dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+3. Run the project:
+
+```bash
+python main.py
+```
+
+### Requirements
+
+```
+huggingface_hub
+python-dotenv
+llama-index
+```
+
+### Supported Models
+```text
+Qwen2 Vision
+Florence2
+Phi3.5 Vision
+PaliGemma
+Mllama
+```
diff --git a/bootstraprag/templates/llamaindex/rag_with_vision/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_vision/requirements.txt
@@ -0,0 +1,3 @@
+llama-index==0.11.21
+llama-index-multi-modal-llms-huggingface==0.1.1
+python-dotenv==1.0.1
diff --git a/bootstraprag/templates/qdrant/semantic_cache/__init__.py b/bootstraprag/templates/qdrant/semantic_cache/__init__.py
diff --git a/bootstraprag/templates/qdrant/semantic_cache/readme.md b/bootstraprag/templates/qdrant/semantic_cache/readme.md
diff --git a/bootstraprag/templates/qdrant/semantic_routing/__init__.py b/bootstraprag/templates/qdrant/semantic_routing/__init__.py
diff --git a/bootstraprag/templates/qdrant/semantic_routing/readme.md b/bootstraprag/templates/qdrant/semantic_routing/readme.md
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name='bootstrap-rag',
-    version='0.0.11',
+    version='0.0.12',
     long_description=long_description,
     long_description_content_type="text/markdown",
     packages=find_packages(),