In [15]:
import sqlite3
import os 

class ImageDB:
    """
    Manages interactions with the SQLite image database.
    Assumes a table named 'results' with 'filename' and 'description' columns.
    """
    def __init__(self, db_path: str = 'database.db'):
        self.db_path = db_path
        # Ensure the database file exists and has the 'results' table
        if not os.path.exists(self.db_path):
            self.create_database_schema()

    def create_database_schema(self):
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.execute("""
                    CREATE TABLE IF NOT EXISTS results (
                        filename TEXT PRIMARY KEY UNIQUE,
                        description TEXT NOT NULL
                    )
                """)
                conn.commit()
                print(f"Database '{self.db_path}' and table 'results' created.")

        except sqlite3.Error as e:
            print(f"Error creating database schema: {e}")


    def add_image_description(self, content:str):
        if not content:
            return False
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.executemany(" INSERT OR REPLACE INTO results (filename,description) VALUES (?,?)", content)
                conn.commit()
                print('Descriptions were added/updated')
                return True
                
        except sqlite3.Error as e:
            print(f"Error adding filename and descriptions: {e}")
            return False
            
    def retrieve_descriptions_with_query (self, sqlite_query: str)-> list[tuple[str, str]]:
        if not sqlite_query:
            return []
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.execute(sqlite_query)
                return cursor.fetchall()
        except sqlite3.Error as e:
            print(f"Error retrieving descriptions: {e}")
            return []
    


In [16]:
import abc
import httpx
# abstract class
class APICaller(abc.ABC):
    def __init__(self, base_url:str):
        self.base_url = base_url

        
    @abc.abstractclassmethod
    def call_api(self, prompt: str,image_b64: str = None) ->str:
        pass


class OllamaAPICaller(APICaller):
    def __init__ (self, base_url= 'http://gemma:11434/api/generate', model= 'gemma3:4b'):
        super().__init__(base_url) 
        self.model = model

    def call_api(self, prompt, image_b64 ):
        headers = {"Content-Type": "application/json"}
        payload = {
            "model": self.model,
            "prompt": prompt,
            "stream": False
        }
        if image_b64:
            # The Ollama API expects a list of base64 strings for the 'images' field
            payload["images"] = [image_b64]
            
        try:
            with httpx.Client() as client:
                response = client.post(self.base_url, headers=headers, json=payload, timeout=None) 
                response.raise_for_status()
                response_data = response.json()
                return response_data.get("response", "No response found from Ollama.")
        except httpx.RequestError as exc:
            return f"An error occurred while requesting {exc.request.url!r}: {exc}"
        except httpx.HTTPStatusError as exc:
            return f"Error response {exc.response.status_code} while requesting {exc.request.url!r}: {exc.response.text}"
        except Exception as e:
            return f"An unexpected error occurred: {e}" 

In [18]:
import base64
import shutil
from PIL import Image

class DescribingImages():
    
    def __init__ (self, api_caller :APICaller, image_db:ImageDB):
        self.api_caller = api_caller
        self.image_db = image_db

    def image_to_base64(self,image_path):
        with open(image_path, 'rb') as img_file:
            return base64.b64encode(img_file.read()).decode('utf-8')

    def resize_image(self,original_path, resized_path, max_size=(512, 512)):
        with Image.open(original_path) as img_file:
            img_file.thumbnail(max_size)
            img_file.save(resized_path)

    def run (self,input_folder):
        resized_folder = './resized_images'      # Folder to save resized images
        if os.path.exists(resized_folder):
            # Clear the contents of the directory
            shutil.rmtree(resized_folder)
        # Create a clean directory
        os.makedirs(resized_folder)
        
        results = []
    
        for filename in os.listdir(input_folder):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                
                original_path = os.path.join(input_folder, filename)
                resized_path = os.path.join(resized_folder, filename)
                
                self.resize_image(original_path,resized_path)
                image_b64 = self.image_to_base64(resized_path)
                s = time.time()
                image_description = self.api_caller.call_api ("""You are an AI assistant specialized in image cataloging. Your task is to analyze the provided image and generate information that helps in its future retrieval from a database.

                                                                Please provide:
                                                                1.  **A concise, descriptive caption (1-2 sentences):** Summarize the main elements, actions, and setting.
                                                                2.  **A list of 5-10 relevant keywords:** These should be specific words or short phrases a user would likely use to search for this image. Prioritize nouns, verbs, and strong adjectives directly related to the content.
                                                                
                                                                Format your response clearly:
                                                                Caption: [Your concise caption here]
                                                                Keywords: [keyword1, keyword2, keyword3, ...]""",image_b64)
                
                e = time.time()
                print(f"{filename} is captioned. total time:{e-s}")
                results.append((filename,image_description))
                
            
        status = self.image_db.add_image_description(results)
        return results


In [20]:
from ipywidgets import widgets
from IPython.display import display, clear_output
import time

if __name__ == '__main__':


    
    ollama_base_url = "http://gemma:11434/api/generate"
    ollama_model_name = "gemma3:4b"
    ollama_api_caller = OllamaAPICaller(base_url=ollama_base_url, model=ollama_model_name)
    image_db = ImageDB('resultdatabase.db')  
    describing_images = DescribingImages(api_caller=ollama_api_caller, image_db=image_db)


    
    # Create Jupyter widgets for interaction
    query_input = widgets.Textarea(
        value="./images",
        placeholder="Enter your image folder",
        description="Input Folder:", # Changed label for clarity
        rows=2,
        layout=widgets.Layout(width='auto')
    )
    output_area = widgets.Output() # This is correctly named 'output_area'
    run_button = widgets.Button(description="Get Descriptions") # Changed label for clarity

    def on_run_button_clicked(b):
        with output_area:
            clear_output(wait=True) # Clears previous output in this specific area
            image_folder_path = query_input.value

            print(f"Processing images from: {image_folder_path}")
            results = describing_images.run(image_folder_path)
            
            if results:
                print(f"Successfully processed {len(results)} images.")

            else:
                print("No images processed or an error occurred.")


    run_button.on_click(on_run_button_clicked)

    # Display the widgets
    display(
        widgets.VBox([
            query_input,
            run_button,
            widgets.HTML("<h2> Output:</h2>"), 
            output_area 
        ])
    )
    

Database 'resultdatabase3.db' and table 'results' created.


VBox(children=(Textarea(value='./images', description='Input Folder:', layout=Layout(width='auto'), placeholde…

In [13]:
image_db = ImageDB('resultdatabase.db')  
image_db.retrieve_descriptions_with_query('SELECT * from results')

[('pexels-karolina-grabowska-4959907.jpg',
  "Here's a description of the photo in two sentences:\n\nThe photo shows a white receipt being held up by two hands. The background is a plain white, and the focus is clearly on the receipt itself."),
 ('pexels-andre-furtado-43594-1417255.jpg',
  "Here's a description of the photo in two sentences:\n\nA young couple is nestled together on a bench, gazing out at a stunning mountain vista with a serene lake below. The warm embrace suggests a moment of quiet connection amidst a breathtaking natural landscape."),
 ('2.jpg',
  "Here's a description of the photo in two sentences:\n\nThe image showcases a vibrant and artfully arranged avocado toast, topped with two perfectly cooked hard-boiled eggs, presented on a dark, textured plate. The composition includes a small block of ham and a vintage-style lantern, creating a rustic and inviting aesthetic."),
 ('pexels-alexander-mass-748453803-32195341.jpg',
  'Here’s a description of the photo in two sen