## Verificador

PoC para un verificador de tatuajes

---

### Load Dependencies

In [8]:
# This will help us to measure the time it took for the whole
# notebook to execute
import time
start_time = time.time()

import cv2
import os
from pathlib import Path
import re
import shutil
import numpy as np

In [9]:
# Define logs
from datetime import datetime
import logging

log_folder = 'logs'
if not os.path.exists(log_folder):
    os.makedirs(log_folder)

log_file = f"{log_folder}/run_{datetime.now():%Y%m%d}.log"

logging.basicConfig(
    filename=log_file,  # Use the dynamic log file name
    level=logging.DEBUG,  # Set the logging level
    format='%(asctime)s - %(levelname)s - %(message)s',  # Log format
    filemode='w'  # 'w' to overwrite the log file each time, 'a' to append
)

logging.info(f"Process started ...")

### Load dataset

In [None]:
base_path = Path("../datasets/BIVTatt-Dataset")
bound_box_path = Path(f"{base_path}/bounding_boxes")
images_path = Path(f"{base_path}/images")
pattern = r'^\d+_\d+\.JPG'
total_bound_boxes = [file.name for file in bound_box_path.iterdir() if file.is_file()]
all_images = [file.name for file in images_path.iterdir() if file.is_file()]
base_images = [file.name for file in images_path.iterdir() if file.is_file() and re.match(pattern, file.name)]

print ("Base images in data folder: ")
print("     Total of bounding boxes: ", len(total_bound_boxes))
print("     Total of images: ", len(all_images))
print("     Total of base images: ", len(base_images))
#print('')
#print("Base images and their variants")

logging.info("Base images in data folder: ")
logging.info(f"     Total of bounding boxes:    {len(total_bound_boxes)}")
logging.info(f"     Total of images:            {len(all_images)}")
logging.info(f"     Total of base images:       {len(base_images)}")
logging.info("")
logging.info("Base images and their variants")

base_image_variant_counts = {base_image: 0 for base_image in base_images}

for image in all_images:
    for base_image in base_images:
        if image.startswith(base_image[:-4]):
            base_image_variant_counts[base_image] += 1

for base_image, count in base_image_variant_counts.items():
    logging.info(f"    Base image '{base_image}' has {count} variants.")
    # print(f"    Base image '{base_image}' has {count} variants.")

logging.info("")

Base images in data folder: 
     Total of bounding boxes:  4411
     Total of images:  4411
     Total of base images:  161


### Preprocess

#### Setup workdir

In [11]:
working_path = f"{base_path}/workdir"

if not os.path.exists(working_path):
    os.makedirs(working_path)
else:
    shutil.rmtree(working_path)
    os.makedirs(working_path)

#### Create needed methods

In [12]:
size = (224, 224)

def load_image(image_path):
    # print(f"Image: {image_path}")    
    return cv2.imread(image_path)


def load_bbox(bbox_file):
    file_box = bbox_file.replace(".JPG", ".txt")
    file_box = file_box.replace(".jpg", ".txt")
    # print(f"Bounding box: {file_box}")
    with open(file_box, 'r') as file:
        bbox = file.readline().strip().split()
        bbox = [int(coord) for coord in bbox]
    return tuple(bbox)


def crop_image(image, bbox):
    x1, y1, x2, y2 = bbox

    if x2 <= x1 or y2 <= y1:
        raise ValueError(f"Invalid bounding box: {bbox}")

    # print("BBOX:")
    # print(f"    x1: {x1}")
    # print(f"    y1: {y1}")
    # print(f"    x2: {x2}")
    # print(f"    y2: {y2}")
    return image[y1:y2, x1:x2]


def resize_image(image):
    global size
    return cv2.resize(image, size)


def normalize_image(image):
    image = image / 255.0
    return image.astype(np.float32)


def convert_to_gray(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return image.astype(np.uint8)


def preprocess_image(image_name):
    image = load_image(f"{images_path}/{image_name}")
    bbox = load_bbox(f"{bound_box_path}/{image_name}")
    validate_image(image)
    image = crop_image(image, bbox)
    image = resize_image(image)
    image = normalize_image(image)
    image = convert_to_gray(image)
    return image


def validate_image(image):
    if image is None:
        raise ValueError(f"Image does not exist")
    
    if image.size == 0:
        raise ValueError(f"Image is empty")
    
    if len(image.shape) < 2:
        raise ValueError(f"Image has incorrect dimensions")
    
    # print(f"Image shape: {image.shape}")
    # print(f"Image is ok.")

has_error_ocurred = False
def preprocess_dataset():
    global has_error_ocurred
    preprocessed_images = []
    for root, _, files in os.walk(images_path):
        for file in files:
            if file.endswith(('.jpg', '.jpeg', '.png', 'JPG')):
                try:
                    preprocessed_image = preprocess_image(file)
                    preprocessed_images.append(preprocessed_image)
                except Exception as e:
                    logging.error(f"Image {file} could not be processed.")
                    logging.error(f"{e}")
                    if has_error_ocurred == False:
                        print("Some errors ocurred, please check the logs.")
                        print("")
                        has_error_ocurred = True

    return preprocessed_images

In [13]:
preprocessed_images = preprocess_dataset()

for idx, img in enumerate(preprocessed_images):
    output_path = os.path.join(working_path, f"preprocessed_image_{idx}.JPG")
    cv2.imwrite(output_path, img * 255)

print(f"Total of preprocessed images: {len(preprocessed_images)}")
logging.info(f"Total of preprocessed images: {len(preprocessed_images)}")

Some errors ocurred, please check the logs.

Total of preprocessed images: 4038


---

## Total Time

This show the total time of execution

In [14]:
# Sets the total time of execution
end_time = time.time()
execution_time = end_time - start_time

# Calculate minutes and seconds
minutes = execution_time // 60  # Integer division for whole minutes
seconds = execution_time % 60   # Remainder for leftover seconds

print(f"Total execution time: {minutes} minutes and {seconds:.2f} seconds")
logging.info("")
logging.info(f"Total execution time: {minutes} minutes and {seconds:.2f} seconds")
logging.info("")

Total execution time: 0.0 minutes and 8.22 seconds
