In [1]:
import sqlite3
import os 

class ImageDB:
    """
    Manages interactions with the SQLite image database.
    Assumes a table named 'results' with 'filename' and 'description' columns.
    """
    def __init__(self, db_path: str = 'resultdatabase.db'):
        self.db_path = db_path
        # Ensure the database file exists and has the 'results' table
        if not os.path.exists(self.db_path):
            self.create_database_schema()

    def create_database_schema(self):
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.execute("""
                    CREATE TABLE IF NOT EXISTS results (
                        filename TEXT PRIMARY KEY UNIQUE,
                        description TEXT NOT NULL
                    )
                """)
                conn.commit()
                print(f"Database '{self.db_path}' and table 'results' created.")

        except sqlite3.Error as e:
            print(f"Error creating database schema: {e}")


    def add_image_description(self, content:str):
        if not content:
            return False
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.executemany(" INSERT OR REPLACE INTO results (filename,description) VALUES (?,?)", content)
                conn.commit()
                print('Descriptions were added/updated')
                return True
                
        except sqlite3.Error as e:
            print(f"Error adding filename and descriptions: {e}")
            return False
            
    def retrieve_descriptions_with_query (self, sqlite_query: str)-> list[tuple[str, str]]:
        if not sqlite_query:
            return []
        try:
            with sqlite3.connect(self.db_path) as conn:
                cursor = conn.cursor()
                cursor.execute(sqlite_query)
                return cursor.fetchall()
        except sqlite3.Error as e:
            print(f"Error retrieving descriptions: {e}")
            return []
    


In [1]:
import abc
import httpx
# abstract class
class APICaller(abc.ABC):
    def __init__(self, base_url:str):
        self.base_url = base_url

        
    @abc.abstractclassmethod
    def call_api(self, prompt: str,image_b64: str , mime_type: str = None) ->str:
        pass


class OllamaAPICaller(APICaller):
    def __init__ (self, base_url= 'http://gemma:11434/api/generate', model= 'gemma3:4b'):
        super().__init__(base_url) 
        self.model = model

    def call_api(self, prompt, image_b64,mime_type: str = "image/png"  ):
        headers = {"Content-Type": "application/json"}
        payload = {
            "model": self.model,
            "prompt": prompt,
            "images" : [image_b64],
            "stream": False
        }            
        try:
            with httpx.Client() as client:
                response = client.post(self.base_url, headers=headers, json=payload, timeout=None) 
                response.raise_for_status()
                response_data = response.json()
                return response_data.get("response", "No response found from Ollama.")
        except httpx.RequestError as exc:
            return f"An error occurred while requesting {exc.request.url!r}: {exc}"
        except httpx.HTTPStatusError as exc:
            return f"Error response {exc.response.status_code} while requesting {exc.request.url!r}: {exc.response.text}"
        except Exception as e:
            return f"An unexpected error occurred: {e}" 

class GeminiAPICaller(APICaller):
    def __init__(self,  model = "gemini-2.5-flash-preview-05-20", api_key: str = None ):
        base_url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
        super().__init__(base_url) 
        self.api_key = api_key
        self.model = model
        
    def call_api(self, prompt,image_b64,  mime_type = "image/png" ) -> str: 
        headers = {
            "Content-Type": "application/json",
        }
        payload = {
            "contents": [
                {
                    "parts": [
                        {"text":prompt},
                        {"inline_data": {"mimeType": mime_type, "data": image_b64}}
                    ]
                }
            ]
        }
        try:
            with httpx.Client() as client:
                response = client.post(self.base_url, headers=headers, json=payload, timeout=100.0) 
                response.raise_for_status()
                response_data = response.json()
                if 'candidates' in response_data and response_data['candidates']:
                    first_candidate = response_data['candidates'][0]
                    if 'content' in first_candidate and 'parts' in first_candidate['content']:
                        for part in first_candidate['content']['parts']:
                            if 'text' in part:
                                return part['text']
                return "No text response found from Gemini."
        except httpx.RequestError as exc:
            return f"An error occurred while requesting {exc.request.url!r}: {exc}"
        except httpx.HTTPStatusError as exc:
            return f"Error response {exc.response.status_code} while requesting {exc.request.url!r}: {exc.response.text}"
        except Exception as e:
            return f"An unexpected error occurred: {e}"

In [2]:
import base64
import shutil
from PIL import Image
import mimetypes

class DescribingImages():
    
    def __init__ (self, api_caller :APICaller, image_db:ImageDB):
        self.api_caller = api_caller
        self.image_db = image_db

    def image_to_base64(self,image_path):
        with open(image_path, 'rb') as img_file:
            return base64.b64encode(img_file.read()).decode('utf-8')

    def resize_image(self,original_path, resized_path, max_size=(512, 512)):
        with Image.open(original_path) as img_file:
            img_file.thumbnail(max_size)
            img_file.save(resized_path)
    @staticmethod
    def get_mime_type(filename: str) -> str:
        mime_type, _ = mimetypes.guess_type(filename)
        return mime_type or "application/octet-stream"
        
    def run (self,input_folder):
        resized_folder = './resized_images'      # Folder to save resized images
        #1. clear resized folder eachtime
        # if os.path.exists(resized_folder):
        #     # Clear the contents of the directory
        #     shutil.rmtree(resized_folder)
        # # Create a clean directory
        # os.makedirs(resized_folder)
        # 2. don't clear resized folder instead of it check if the image is already exist don't caption it 
        os.makedirs(resized_folder,exist_ok = True)
        existing_resized_files = set(os.listdir(resized_folder))
        #
        results = []
    
        for filename in os.listdir(input_folder):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                if filename not in existing_resized_files:
                    try:
                        original_path = os.path.join(input_folder, filename)
                        resized_path = os.path.join(resized_folder, filename)
                        
                        self.resize_image(original_path,resized_path)
                        image_b64 = self.image_to_base64(resized_path)
                        mime_type = self.get_mime_type(filename)
                        s = time.time()
                        image_description = self.api_caller.call_api ("""You are an AI assistant specialized in image cataloging. Your task is to analyze the provided image and generate information that helps in its future retrieval from a database.
        
                                                                        Please provide:
                                                                        1.  **A concise, descriptive caption (1-2 sentences):** Summarize the main elements, actions, and setting.
                                                                        2.  **A list of 5-10 relevant keywords:** These should be specific words or short phrases a user would likely use to search for this image. Prioritize nouns, verbs, and strong adjectives directly related to the content.
                                                                        
                                                                        Format your response clearly:
                                                                        Caption: [Your concise caption here]
                                                                        Keywords: [keyword1, keyword2, keyword3, ...]""",image_b64, mime_type)
                        
                        e = time.time()
                        print(f"{filename} is captioned. total time:{e-s}")
                        results.append((filename,image_description))
                        
                    except Exception as exc: 
                        print(f"Error processing {filename}: {exc}")
                        #remove the file from resized folder
                        if os.path.exists(resized_path): os.remove(resized_path)
                        continue # Move to the next file
                else:
                    print(f"{filename} has already captioned. total time:0")
                
        try:
            status = self.image_db.add_image_description(results)
            print(f"Successfully added {len(results)} images to the database.")
        except Exception as db_exc:
            print(f"Error adding batch to database: {db_exc}")

        return results


NameError: name 'ImageDB' is not defined

In [3]:
from ipywidgets import widgets
from IPython.display import display, clear_output
import time
import pandas as pd

if __name__ == '__main__':


    
    ollama_base_url = "http://gemma:11434/api/generate"
    ollama_model_name = "gemma3:4b"
    ollama_api_caller = OllamaAPICaller(base_url=ollama_base_url, model=ollama_model_name)
    
    gemini_api_key = os.getenv("GEMINI_API_KEY")
    assert gemini_api_key is not None
    gemini_model_name = "gemini-2.5-flash-preview-05-20" 
    gemini_api_caller = GeminiAPICaller(model=gemini_model_name,api_key=gemini_api_key)
    
    image_db = ImageDB('resultdatabase.db')  
    ollama_describing_images = DescribingImages(api_caller=ollama_api_caller, image_db=image_db)
    gemini_describing_images = DescribingImages(api_caller=gemini_api_caller, image_db=image_db)


    
    # Create Jupyter widgets for interaction
    query_input = widgets.Textarea(
        value="./images",
        placeholder="Enter your image folder",
        description="Input Folder:", # Changed label for clarity
        rows=2,
        layout=widgets.Layout(width='50%', height='100px')
    )
    ollama_output_area = widgets.Output() 
    ollama_run_button = widgets.Button(description="Get Descriptions from Ollama")
    ollama_run_button.layout.width = 'auto' # auto width (default behavior)
    
    gemini_output_area = widgets.Output() 
    gemini_run_button = widgets.Button(description="Get Descriptions from Gemini") 
    gemini_run_button.layout.width = 'auto' # auto width (default behavior)

    def ollama_on_run_button_clicked(b):
        with ollama_output_area:
            clear_output(wait=True) # Clears previous output in this specific area
            image_folder_path = query_input.value

            print(f"Processing images from: {image_folder_path}")
            results = ollama_describing_images.run(image_folder_path)
            
            if results:
                print(f"Successfully processed {len(results)} images.")
                df = pd.DataFrame(image_db.retrieve_descriptions_with_query('SELECT * from results'),columns=["filename", "description"])
                pd.set_option("max_colwidth", None)
                df.head()


            else:
                print("No images processed or an error occurred.")

    def gemini_on_run_button_clicked(b):
        with gemini_output_area:
            clear_output(wait=True) # Clears previous output in this specific area
            image_folder_path = query_input.value

            print(f"Processing images from: {image_folder_path}")
            results = gemini_describing_images.run(image_folder_path)
            
            if results:
                print(f"Successfully processed {len(results)} images.")
                df = pd.DataFrame(image_db.retrieve_descriptions_with_query('SELECT * from results'),columns=["filename", "description"])
                pd.set_option("max_colwidth", None)
                df.head()


            else:
                print("No images processed or an error occurred.")


    ollama_run_button.on_click(ollama_on_run_button_clicked)
    gemini_run_button.on_click(gemini_on_run_button_clicked)

    # Display the widgets
    display(
        widgets.VBox([
            query_input,
            widgets.HBox([ollama_run_button, gemini_run_button]),
            widgets.HTML("<h2>Ollama result Output:</h2>"),
            ollama_output_area,
            widgets.HTML("<h2>Gemini result Output:</h2>"),
            gemini_output_area
        ])
    )
    

NameError: name 'os' is not defined

In [8]:
import pandas as pd
image_db = ImageDB('resultdatabase.db')  
df = pd.DataFrame(image_db.retrieve_descriptions_with_query("SELECT * from results"),columns=["filename", "description"])
pd.set_option("max_colwidth", None)
df.head()

Unnamed: 0,filename,description
0,1.png,"Caption: A perfectly roasted whole chicken, glistening with savory juices and topped with fresh green herbs, rests in a white rectangular baking dish on a wooden surface.\n\nKeywords: roasted chicken, whole chicken, baking dish, cooked, herbs, poultry, savory, meal, dinner, kitchen"
1,15.png,"Okay, here’s an analysis of the image:\n\nCaption: A lone figure walks along a sandy beach, suggesting a peaceful and solitary moment by the ocean.\n\nKeywords: sandy beach, lone figure, walking, ocean, coastal, solitude, peaceful, shore, seascape, coastal walk."


In [7]:
from IPython.core.display import HTML

HTML("""
<style>
    table.dataframe td, table.dataframe th {
        text-align: left !important;
    }
</style>
""")