In [1]:
import os
import random
import shutil
from tqdm import tqdm
from PIL import Image
import numpy as np

# Set the paths for the dataset directory and the directories for training and testing sets
dataset_dir = "C:\\Users\\Win11\\OneDrive\\Desktop\\EEX_DATA"
train_dir = "C:\\Users\\Win11\\OneDrive\\Desktop\\MOD_TRAIN"
test_dir = "C:\\Users\\Win11\\OneDrive\\Desktop\\MOD_TEST"

# Create directories for training and testing sets if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Set the split ratio (e.g., 0.8 for 80% training and 20% testing)
split_ratio = 0.8

# Function to preprocess images (e.g., resize, normalize, data augmentation)
def preprocess_image(image_path, target_size=(224, 224)):
    image = Image.open(image_path)
    image = image.resize(target_size)  # Resize the image
    image = np.array(image) / 255.0    # Normalize pixel values to [0, 1]
    # Additional preprocessing steps (e.g., data augmentation) can be added here
    return image

# Iterate through the dataset directory and copy images to the training and testing directories
for class_name in os.listdir(dataset_dir):
    class_dir = os.path.join(dataset_dir, class_name)
    train_class_dir = os.path.join(train_dir, class_name)
    test_class_dir = os.path.join(test_dir, class_name)
    os.makedirs(train_class_dir, exist_ok=True)
    os.makedirs(test_class_dir, exist_ok=True)
    
    images = os.listdir(class_dir)
    num_train = int(len(images) * split_ratio)
    train_images = random.sample(images, num_train)
    test_images = [img for img in images if img not in train_images]
    
    # Copy images to the training directory
    for img_name in tqdm(train_images, desc=f'Copying training images for {class_name}'):
        img_path = os.path.join(class_dir, img_name)
        target_path = os.path.join(train_class_dir, img_name)
        shutil.copyfile(img_path, target_path)
    
    # Copy images to the testing directory
    for img_name in tqdm(test_images, desc=f'Copying testing images for {class_name}'):
        img_path = os.path.join(class_dir, img_name)
        target_path = os.path.join(test_class_dir, img_name)
        shutil.copyfile(img_path, target_path)

# Preprocess images in the training and testing directories
for phase in ['train', 'test']:
    phase_dir = os.path.join(train_dir if phase == 'train' else test_dir)
    for class_name in os.listdir(phase_dir):
        class_dir = os.path.join(phase_dir, class_name)
        for img_name in tqdm(os.listdir(class_dir), desc=f'Preprocessing {phase} images for {class_name}'):
            img_path = os.path.join(class_dir, img_name)
            preprocessed_img = preprocess_image(img_path)
            # Optionally, save the preprocessed image or replace the original image
            
print("Dataset splitting and preprocessing completed successfully!")

Copying training images for Normal: 100%|██████████| 1200/1200 [00:09<00:00, 121.66it/s]
Copying testing images for Normal: 100%|██████████| 300/300 [00:02<00:00, 135.73it/s]
Copying training images for Tuberculosis: 100%|██████████| 1200/1200 [00:12<00:00, 99.66it/s] 
Copying testing images for Tuberculosis: 100%|██████████| 300/300 [00:02<00:00, 114.61it/s]
Preprocessing train images for Normal: 100%|██████████| 1200/1200 [00:32<00:00, 36.57it/s]
Preprocessing train images for Tuberculosis: 100%|██████████| 1200/1200 [00:28<00:00, 41.79it/s]
Preprocessing test images for Normal: 100%|██████████| 300/300 [00:07<00:00, 38.59it/s]
Preprocessing test images for Tuberculosis: 100%|██████████| 300/300 [00:07<00:00, 40.68it/s]

Dataset splitting and preprocessing completed successfully!



