# Image Feature Loader & DB-Saver
Dieses Notebook lädt Bilder, extrahiert Features und speichert sie in einer SQLite-Datenbank.

In [29]:

import os
from PIL import Image
import numpy as np
import sqlite3, pickle
from tqdm import tqdm

from features.embedding_vec import calc_embedding
from features.hash import calc_hash
from features.color_vec import calc_histogram
from loader import image_generator
from datetime import datetime

logfile = open("debug_log.txt", "a", encoding="utf-8")

In [31]:
# Verbindung zur Datenbank + Speichern der Vektoren
DB = r"C:\Users\moham\OneDrive\Dokumente\Big_Data\fork_repo_local\data\database.db"
PHOTO_FOLDER = r"C:\Users\moham\OneDrive\Dokumente\Big_Data\Test_bilder_verschachtelt"


conn = sqlite3.connect(DB)
cursor = conn.cursor()

batch_buffer= []

#batch size entsprechend ändern wenn mehr bilder verarbeitet werden sollen (aktuell 50 als test bei 150 bildern)
def save_to_db_batch(filename, image_path, color_hist, embedding, img_hash, resolution, file_size, batch_size=50):
    global batch_buffer

    blob_embedding = pickle.dumps(embedding)
    hist_str = ",".join([str(round(v, 6)) for v in color_hist])

    batch_buffer.append((filename, image_path, hist_str, blob_embedding, img_hash, resolution, file_size))

    if len(batch_buffer) >= batch_size:
        cursor.executemany("""
            INSERT OR REPLACE INTO image_features_test
            (filename, path, color_hist, embedding, image_hash, resolution, file_size)
            VALUES (?, ?, ?, ?, ?, ?, ?)
        """, batch_buffer)
        conn.commit()
        print(f"✓ Batch gespeichert: {len(batch_buffer)} Einträge")
        batch_buffer.clear()


total_images = sum(1 for _ in image_generator(PHOTO_FOLDER))

for filename, image_path in tqdm(image_generator(PHOTO_FOLDER), total=total_images):
    
    try:
        img = Image.open(image_path)

        color_hist = calc_histogram(img)

        embedding = calc_embedding(img)

        img_hash = calc_hash(img)
        assert isinstance(img_hash, str), "Hash ist KEIN String!"

        resolution = f"{img.width}x{img.height}"

        file_size = os.path.getsize(image_path)

        save_to_db_batch(filename, image_path, color_hist, embedding, img_hash, resolution, file_size)
        logfile.write(f"{filename} ✓ Gespeichert\n")
    except Exception as e:
        logfile.write(f"{filename} ❌ Fehler: {e}\n")


#damit der letzte Batch nicht verloren geht:
if batch_buffer:
    cursor.executemany("""
        INSERT OR REPLACE INTO image_features_test
        (filename, path, color_hist, embedding, image_hash, resolution, file_size)
        VALUES (?, ?, ?, ?, ?, ?, ?)
    """, batch_buffer)
    conn.commit()
    print(f"✓ Letzter Batch gespeichert: {len(batch_buffer)} Einträge")

    # Log-Eintrag für den letzten Batch mit Zeitstempel um zu wissen, wann die speicherung beendet wurde 
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    logfile.write(f"[{timestamp}] ✓ Letzter Batch gespeichert: {len(batch_buffer)} Einträge\n")
    batch_buffer.clear()

logfile.close()


 33%|███▎      | 52/158 [00:16<00:34,  3.08it/s]

✓ Batch gespeichert: 50 Einträge


 65%|██████▌   | 103/158 [00:29<00:06,  8.14it/s]

✓ Batch gespeichert: 50 Einträge


 96%|█████████▌| 152/158 [00:44<00:02,  2.75it/s]

✓ Batch gespeichert: 50 Einträge


100%|██████████| 158/158 [00:46<00:00,  3.37it/s]

✓ Letzter Batch gespeichert: 6 Einträge





C:\Users\moham\OneDrive\Dokumente\Big_Data\Test_bilder\matthias-mullie-VAxCHgJvZ0g-unsplash.jpg → vec[:5]: [1.8949633  0.7734968  1.456661   0.5185568  0.14204417]
C:\Users\moham\OneDrive\Dokumente\Big_Data\Test_bilder\max-bender-VmX3vmBecFE-unsplash.jpg → vec[:5]: [1.7741386  2.8704925  0.6573834  0.46344948 0.31365994]
C:\Users\moham\OneDrive\Dokumente\Big_Data\Test_bilder\max-van-den-oetelaar-uymG7UVPXpI-unsplash.jpg → vec[:5]: [0.04338714 0.8773901  0.35661784 1.8889567  1.1908364 ]
C:\Users\moham\OneDrive\Dokumente\Big_Data\Test_bilder\meric-dagli-OMr-b_O568k-unsplash.jpg → vec[:5]: [0.5777046  2.136457   0.7243774  0.40846688 0.09458669]
C:\Users\moham\OneDrive\Dokumente\Big_Data\Test_bilder\mike-benna-5Cv3surFZM8-unsplash.jpg → vec[:5]: [0.11148688 0.38410518 1.9140203  0.4428106  0.07263392]
