# Imports

In [9]:
from natsort import natsorted
from PIL import Image
import numpy as np
import urllib.request
import tarfile
import urllib
import pickle
import cv2
import os

# Functions

In [10]:
def download_and_extract_oxford_pet_dataset(destination_folder):
    """
    Download and extract the Oxford-IIIT Pet Dataset (images and annotations).

    :param destination_folder: Path to the directory where the files will be saved and extracted.
    """

    urls = {
        "images": "https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz",
        "annotations": "https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz"
    }

    os.makedirs(destination_folder, exist_ok=True) # creating destiantion folder

    for key, url in urls.items():
        print(f"Baixando {key}...")
        file_name = os.path.join(destination_folder, url.split("/")[-1])

        urllib.request.urlretrieve(url, file_name)
        print(f"Download concluído: {file_name}")

        print(f'Extraindo {key}...')
        with tarfile.open(file_name, "r:gz") as tar:
            tar.extractall(path=destination_folder)
        print(f"Extração concluída: {file_name}")

        os.remove(file_name)
        print(f"Arquivo compactado {file_name} removido.")


def load_and_resize_images(folder_path, target_size=(256,256), mode=None):
    """
    Loads images from a folder, resizes to a fixed size and transforms into a NumPy array.
    
    :param folder_path: Path to the folder containing the images.
    :param target_size: Fixed size (width, height) to resize images.
    :param mode: 0 if images, 1 if trimaps
    :return: NumPy array with all images resized.
    """

    file_names = natsorted(os.listdir(folder_path))

    image_arrays = []
    for filename in file_names:
        file_path = os.path.join(folder_path, filename)
        try:
            if mode == 0:
                with Image.open(file_path) as img:
                    img_resized = img.resize(target_size)  # redimensiona para 300x300
                    img_array = np.array(img_resized)     # converte para array NumPy
                    
                    if len(img_array.shape) == 2:
                        img_array = np.stack([img_array] * 3, axis=-1)
                    
                    image_arrays.append(img_array)
            
            elif mode ==1:
                with Image.open(file_path) as img:
                    img = img.convert("L")  
                    
                    img_resized = img.resize(target_size, resample=Image.NEAREST)
                    
                    trimap_array = np.array(img_resized)
                    
                    image_arrays.append(trimap_array)
            else:
                print(f"Mode not suported.")

        except Exception as e:
            print(f"Erro ao processar {filename}: {e}")
    
    # Verifica se todas as imagens têm a mesma forma
    shapes = [img.shape for img in image_arrays]
    if len(set(shapes)) > 1:
        print("Warning: Not all images have the same shape.")
    
    # Filtra as imagens que têm a forma correta
    if mode == 0:
        shape = 3
        correct_shape = target_size + (shape,)
    else:
        correct_shape = target_size
    
    for i,img in enumerate(image_arrays):
        if img.shape != correct_shape:
            print(f"Image {i} removed.")

    filtered_image_arrays = [img for img in image_arrays if img.shape == correct_shape]
    
    if len(filtered_image_arrays) < len(image_arrays):
        print(f"Removed {len(image_arrays) - len(filtered_image_arrays)} images with incorrect shape.")
    
    images_array = np.stack(filtered_image_arrays, axis=0)
    return images_array

def combine_classes(data_seg):
    """
    Combines classes 1 and 2 into a single class (1) and leaves the background as 0.
    
    :param annotation_path: Path to the segmentation file (trimap).
    :return: NumPy array with segmentation adjusted.
    """
    binary_annotation = []
    
    for img_seg in data_seg:
        binary_annotation.append(np.where(img_seg == 2, 0, 1))
    
    images_array = np.stack(binary_annotation, axis=0)

    return images_array

def gray_transforming(images):
    """
    Transforms image base to gray tone.

    :param data: Array of images to be preprocessed.
    """
    
    gray_images = np.array([cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in images])

    return gray_images

def delete_non_abyssinian_images(folder_path):
    """
    Delete all images in the folder that do not start with 'Abyssinian'.

    :param folder_path: Path to the folder containing the images.
    """
    for filename in os.listdir(folder_path):
        if not filename.startswith("Abyssinian"):
            file_path = os.path.join(folder_path, filename)
            try:
                os.remove(file_path)
                print(f"Deleted {file_path}")
            except Exception as e:
                print(f"Error deleting {file_path}: {e}")

def save_array_to_pkl(array, filename):
    """
    Save a numpy array to a .pkl file.

    :param array: Numpy array to be saved.
    :param filename: Name of the file to save the array.
    """
    with open(filename, "wb") as file:
        pickle.dump(array, file)
        print(f"Array saved to {filename}")

def delete_mat_files(folder_path):
    """
    Delete all files in the folder that have a .mat suffix.

    :param folder_path: Path to the folder containing the files.
    """
    for filename in os.listdir(folder_path):
        if filename.endswith(".mat"):
            file_path = os.path.join(folder_path, filename)
            try:
                os.remove(file_path)
                print(f"Deleted {file_path}")
            except Exception as e:
                print(f"Error deleting {file_path}: {e}")

# Downloading and extracting images

In [11]:
destination_folder = "./oxford_pet_dataset"
download_and_extract_oxford_pet_dataset(destination_folder)

# Deleting non abyssian images
folder_path = ["./oxford_pet_dataset/images/", "./oxford_pet_dataset/annotations/trimaps/"]

for folder in folder_path:
    delete_non_abyssinian_images(folder)
    delete_mat_files(folder)

Baixando images...
Download concluído: ./oxford_pet_dataset/images.tar.gz
Extraindo images...
Extração concluída: ./oxford_pet_dataset/images.tar.gz
Arquivo compactado ./oxford_pet_dataset/images.tar.gz removido.
Baixando annotations...
Download concluído: ./oxford_pet_dataset/annotations.tar.gz
Extraindo annotations...
Extração concluída: ./oxford_pet_dataset/annotations.tar.gz
Arquivo compactado ./oxford_pet_dataset/annotations.tar.gz removido.
Deleted ./oxford_pet_dataset/images/Egyptian_Mau_98.jpg
Deleted ./oxford_pet_dataset/images/Egyptian_Mau_10.jpg
Deleted ./oxford_pet_dataset/images/beagle_15.jpg
Deleted ./oxford_pet_dataset/images/basset_hound_56.jpg
Deleted ./oxford_pet_dataset/images/chihuahua_199.jpg
Deleted ./oxford_pet_dataset/images/Ragdoll_72.jpg
Deleted ./oxford_pet_dataset/images/Siamese_181.jpg
Deleted ./oxford_pet_dataset/images/american_bulldog_20.jpg
Deleted ./oxford_pet_dataset/images/pug_4.jpg
Deleted ./oxford_pet_dataset/images/Siamese_113.jpg
Deleted ./oxford

# Loading and preprocessing images

In [12]:
print("Loading images...")
folder_path = "./oxford_pet_dataset/images/"
data = load_and_resize_images(folder_path, mode=0)
print("Images loaded.")

print("")

print("Loading segmentations...")
folder_path = "./oxford_pet_dataset/annotations/trimaps/"
data_tri_segmented = load_and_resize_images(folder_path, mode=1)
data_segmented = combine_classes(data_tri_segmented)
print("Segmentations loaded.")

Loading images...
Image 4 removed.
Removed 1 images with incorrect shape.
Images loaded.

Loading segmentations...
Segmentations loaded.


Deleting respective segmentation:

In [13]:
data_segmented = np.delete(data_segmented, 4, axis=0)

In [14]:
print(data.shape)
print(data_segmented.shape)

(199, 256, 256, 3)
(199, 256, 256)


# Saving in a pickle file

In [15]:
save_array_to_pkl(data, "data.pkl")
save_array_to_pkl(data_segmented, "data_segmented.pkl")

Array saved to data.pkl
Array saved to data_segmented.pkl
