In [1]:
import zipfile
import cv2
import numpy as np
import os
from io import BytesIO
from tqdm import tqdm
from PIL import Image

In [13]:
file_path = '../data/raw/face Dataset.zip'
processed_path = '../data/processed/'
train_path = os.path.join(processed_path, 'train')
test_path = os.path.join(processed_path, 'test')

In [14]:
# Ensure the processed data directories exist
os.makedirs(train_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)

In [15]:
target_size = (224, 224)

In [16]:
def normalize_image(image):
    return image / 255.0

In [17]:
# read and preprocess images from zip
def preprocess_and_save_images(file_path, target_dir):
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        for file_info in tqdm(zip_ref.infolist(), desc='Processing images'):
            if file_info.filename.endswith('.jpg'):
                try:
                    with zip_ref.open(file_info.filename) as file:
                        image = Image.open(BytesIO(file.read())).convert('RGB')

                    image = np.array(image)
                    image_resized = cv2.resize(image, target_size)
                    image_normalized = normalize_image(image_resized)

                    target_path = os.path.join(target_dir, file_info.filename)
                    os.makedirs(os.path.dirname(target_path), exist_ok=True)

                    # Save the processed image
                    cv2.imwrite(target_path, (image_normalized * 255).astype(np.uint8))

                except Exception as e:
                    print(f"Error processing image {file_info.filename}: {e}")

In [18]:
print("Current Working Directory:", os.getcwd())


Current Working Directory: /home/lordmax/machine-learning-projects/cross-domain-face-recognition/notebooks


In [19]:
print("Does the file exist?", os.path.exists(file_path))

Does the file exist? True


In [20]:
preprocess_and_save_images(file_path, train_path)
preprocess_and_save_images(file_path, test_path)

print("Data preprocessing complete.")

Processing images: 100%|██████████| 3066/3066 [00:17<00:00, 175.91it/s]
Processing images: 100%|██████████| 3066/3066 [00:10<00:00, 282.10it/s]

Data preprocessing complete.



