Testing

Delete Previous Dataset

In [None]:
import shutil

def delete_folder(folder_path):
    try:
        shutil.rmtree(folder_path)
        print(f"Folder '{folder_path}' and its contents have been deleted successfully.")
    except Exception as e:
        print(f"Error occurred while deleting '{folder_path}': {e}")

# Example usage:
delete_folder("F:/thesis/data")

Main code

In [1]:
import os
import cv2
import numpy as np
import shutil
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import time

data_dir = "F:/datasets/dataset"
output_dir = "F:/thesis/data"
target_size = (224, 224)

# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Function to rename and save images
def save_image(img, class_name, dataset_type, serial):
    new_filename = f"{dataset_type}_class_{class_name}_{serial}.jpg"
    output_path = os.path.join(output_dir, dataset_type, class_name, new_filename)
    cv2.imwrite(output_path, img)

# Get sorted list of subdirectories (classes)
class_names = sorted(os.listdir(data_dir))

# Iterate through subfolders and resize images
start_time = time.time()
for class_name in class_names:
    class_dir = os.path.join(data_dir, class_name)
    if os.path.isdir(class_dir):
        # Create train, test, and validation directories
        for dataset_type in ["train", "test", "validation"]:
            dataset_type_dir = os.path.join(output_dir, dataset_type, class_name)
            if not os.path.exists(dataset_type_dir):
                os.makedirs(dataset_type_dir)
        
        images = []
        # Load and resize images
        for img_name in tqdm(os.listdir(class_dir), desc=f"Processing class {class_name}"):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path)
            img_resized = cv2.resize(img, target_size)
            images.append(img_resized)

        # Split data into train, test, and validation sets
        X_train, X_test = train_test_split(images, test_size=0.2, random_state=42)
        X_train, X_val = train_test_split(X_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

        # Save train images
        for i, img in enumerate(X_train, start=1):
            save_image(img, class_name, "train", i)

        # Save test images
        for i, img in enumerate(X_test, start=1):
            save_image(img, class_name, "test", i)

        # Save validation images
        for i, img in enumerate(X_val, start=1):
            save_image(img, class_name, "validation", i)

# Calculate total time taken
total_time = time.time() - start_time
print(f"Total time taken: {total_time:.2f} seconds")

Processing class 0: 100%|██████████| 231/231 [00:49<00:00,  4.66it/s]
Processing class 1: 100%|██████████| 224/224 [00:58<00:00,  3.82it/s]
Processing class 10: 100%|██████████| 253/253 [00:37<00:00,  6.74it/s]
Processing class 11: 100%|██████████| 199/199 [00:23<00:00,  8.44it/s]
Processing class 12: 100%|██████████| 218/218 [00:26<00:00,  8.26it/s]
Processing class 13: 100%|██████████| 233/233 [00:26<00:00,  8.83it/s]
Processing class 14: 100%|██████████| 244/244 [00:25<00:00,  9.48it/s]
Processing class 15: 100%|██████████| 261/261 [00:26<00:00,  9.76it/s]
Processing class 16: 100%|██████████| 214/214 [00:24<00:00,  8.87it/s]
Processing class 17: 100%|██████████| 222/222 [00:25<00:00,  8.80it/s]
Processing class 18: 100%|██████████| 226/226 [00:25<00:00,  9.01it/s]
Processing class 19: 100%|██████████| 237/237 [00:25<00:00,  9.19it/s]
Processing class 2: 100%|██████████| 203/203 [00:24<00:00,  8.38it/s]
Processing class 20: 100%|██████████| 236/236 [00:23<00:00, 10.04it/s]
Processin

Total time taken: 1004.03 seconds
