In [9]:
import os
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
from PIL import Image, ExifTags
import uuid

In [10]:
def find_image_files(root_dir, extensions=('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif')):
    image_files = []
    for subdir, dir, files in os.walk(root_dir):
        for file in files:
            if file.lower().endswith(extensions):
                image_files.append(os.path.join(subdir, file))
    return image_files

# Verwenden der Funktion, um Bildpfade aus dem 'data'-Verzeichnis zu sammeln
image_paths = find_image_files(os.getcwd() + '/resources/small_data_batch')

# Anzeigen der Anzahl der gefundenen Bilder und der ersten 10 Bildpfade zur Überprüfung
print(f"Anzahl gefundener Bilder: {len(image_paths)}")
if len(image_paths) > 10:
    print("Einige der gefundenen Bildpfade:", image_paths[:10])
else:
    print("Gefundene Bildpfade:", image_paths)


Anzahl gefundener Bilder: 2056
Einige der gefundenen Bildpfade: ['/Users/test/Documents/Uni/4.Semester/Big_Data/image_recommender/resources/small_data_batch/image_data/DIV2k/DIV2K_train_HR/DIV2K_train_HR/0298.png', '/Users/test/Documents/Uni/4.Semester/Big_Data/image_recommender/resources/small_data_batch/image_data/DIV2k/DIV2K_train_HR/DIV2K_train_HR/0267.png', '/Users/test/Documents/Uni/4.Semester/Big_Data/image_recommender/resources/small_data_batch/image_data/DIV2k/DIV2K_train_HR/DIV2K_train_HR/0501.png', '/Users/test/Documents/Uni/4.Semester/Big_Data/image_recommender/resources/small_data_batch/image_data/DIV2k/DIV2K_train_HR/DIV2K_train_HR/0515.png', '/Users/test/Documents/Uni/4.Semester/Big_Data/image_recommender/resources/small_data_batch/image_data/DIV2k/DIV2K_train_HR/DIV2K_train_HR/0273.png', '/Users/test/Documents/Uni/4.Semester/Big_Data/image_recommender/resources/small_data_batch/image_data/DIV2k/DIV2K_train_HR/DIV2K_train_HR/0529.png', '/Users/test/Documents/Uni/4.Semest

In [11]:
def extract_image_details(image_path):
    with Image.open(image_path) as img:
        img = img.convert('RGB')  # Konvertierung zu RGB
        
        # Farbwerte und Helligkeit
        pixels = list(img.getdata())
        avg_color = tuple(sum(col) // len(pixels) for col in zip(*pixels))
        avg_brightness = sum(sum(pixel) for pixel in pixels) // (3 * len(pixels))
        
        # Auflösung und DPI
        resolution = img.size  # (width, height)
        dpi = img.info.get('dpi', (0, 0)) 
        
        # Metadaten extrahieren
        try:
            exif_data = img._getexif()
            metadata = {ExifTags.TAGS[k]: v for k, v in exif_data.items() if k in ExifTags.TAGS} if exif_data else {}
        except AttributeError:
            metadata = {}

        # Dateidetails
        file_size = os.path.getsize(image_path)
        file_type = os.path.splitext(image_path)[1]
        
        # UUID
        unique_id = str(uuid.uuid4())
    
    return {
        'ID': unique_id,
        'File_Path': image_path,
        'Average_Color': avg_color,
        'Brightness': avg_brightness,
        'Resolution': resolution,
        'DPI': dpi,
        'File_Size': file_size,
        'File_Type': file_type,
        'Metadata': metadata
    }

In [12]:
def image_batch_generator(image_files, batch_size=5):
    total_batches = (len(image_files) + batch_size - 1) // batch_size
    progress_bar = tqdm(total=total_batches, desc="Processing images")
    
    for index in range(0, len(image_files), batch_size):
        batch = image_files[index:index + batch_size]
        details_list = [extract_image_details(image_path) for image_path in batch]
        df = pd.DataFrame(details_list)
        yield df
        progress_bar.update(1)
    
    progress_bar.close() 

In [13]:

for image_data in image_batch_generator(image_paths):
    print(image_data) 
# Hier SQLite Logik

Processing images:   0%|          | 0/412 [00:00<?, ?it/s]

KeyboardInterrupt: 