In [41]:
import tqdm
import os
import glob
from ipynb.fs.full.utils import CLASS_NAMES, makedirs
import cv2
import numpy as np
import random
from sklearn.model_selection import train_test_split
import shutil
import tqdm

<h2>Applying resize and Contrast-limited adaptive histogram equalization to Preprocess the image for normalizing pixel intensities</h2> 

In [42]:
def resize(img_p, size=(640, 480)):
    img = cv2.imread(img_p)
    return cv2.resize(img, size)

In [43]:
def median_filter(img):
    return cv2.medianBlur(img, 5)

In [44]:
def clahe(img):
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    lab_planes = list(cv2.split(lab))
    clahe_ = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(5, 5))
    lab_planes[0] = clahe_.apply(lab_planes[0])
    lab = cv2.merge(tuple(lab_planes))
    return cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)

In [47]:
def preprocess(img_p, ret=False):
    img = resize(img_p,size=(1000,1000))
    median_filtered = median_filter(img)
    clahe_applied = clahe(median_filtered)
    if ret:
        return {'Median Filtered': median_filtered, 'Contrast Enhanced': clahe_applied}
    return clahe_applied

In [48]:
input_dir = "Picked_data/"
output_dir = "ProcessedData"

# Iterate through each class folder
for class_folder in os.listdir(input_dir):
    class_path = os.path.join(input_dir, class_folder)
    if not os.path.isdir(class_path):
        continue
    
    output_class_path = os.path.join(output_dir, class_folder)
    os.makedirs(output_class_path, exist_ok=True)
    
    # Iterate through each image file in the class folder
    print('[INFO] Preprocessing For Class :: {0}'.format(class_folder))
    for image_file in tqdm.tqdm(os.listdir(class_path)):
        image_path = os.path.join(class_path, image_file)
        
        padded_image = preprocess(image_path)
        # Save the cropped/padded image to the output directory
        output_image_path = os.path.join(output_class_path, image_file)
        cv2.imwrite(output_image_path, padded_image)

[INFO] Preprocessing For Class :: nagarjuna_akkineni


100%|██████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 70.35it/s]


[INFO] Preprocessing For Class :: madhavan


100%|██████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 67.74it/s]


[INFO] Preprocessing For Class :: kamal_haasan


100%|██████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 74.48it/s]


[INFO] Preprocessing For Class :: saif_ali_khan


100%|██████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 73.25it/s]


[INFO] Preprocessing For Class :: mohanlal


100%|██████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 75.61it/s]


[INFO] Preprocessing For Class :: prabhas


100%|██████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 72.42it/s]


[INFO] Preprocessing For Class :: sanjay_dutt


100%|██████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 73.03it/s]


[INFO] Preprocessing For Class :: akshay_kumar


100%|██████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 71.94it/s]


[INFO] Preprocessing For Class :: ramya_krishnan


100%|██████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 70.17it/s]


[INFO] Preprocessing For Class :: salman_khan


100%|██████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 69.95it/s]


<h2>Splitting the Processed data into train and test</h2>

In [49]:

# Set the path to your image data directory
data_dir = "ProcessedData/"

# Set the directory names for train and test sets
train_dir = "train"
test_dir = "test"

# Create train and test directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Get the list of class directories
class_dirs = os.listdir(data_dir)

# Iterate over each class directory
for class_dir in class_dirs:
    class_path = os.path.join(data_dir, class_dir)
    
    #break
    # Get the list of image files in the class directory
    image_files = os.listdir(class_path)
    if class_path not in 'ProcessedData/.ipynb_checkpoints':
    #print(image_files)
    
        # Perform the train-test split while maintaining class distribution
        train_files, test_files = train_test_split(image_files, test_size=0.1, random_state=42)

        # Create class directories in train and test sets
        train_class_dir = os.path.join(train_dir, class_dir)
        test_class_dir = os.path.join(test_dir, class_dir)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)
        #print(train_class_dir)

        # Move the images to the corresponding train and test class directories
        for file in train_files:
            src = os.path.join(class_path, file)
            dst = os.path.join(train_class_dir, file)
            shutil.copy(src, dst)

        for file in test_files:
            src = os.path.join(class_path, file)
            dst = os.path.join(test_class_dir, file)
            shutil.copy(src, dst)


In [None]:
# def preprocess_image(image_path, target_size=(1000,1000)):
#     # Load the image
#     image = cv2.imread(image_path)
    
#     # Calculate the aspect ratio
#     height, width, _ = image.shape
#     aspect_ratio = width / height
    
#     # Perform cropping or padding based on aspect ratio
#     if aspect_ratio > target_size[0] / target_size[1]:
#         new_width = int(height * (target_size[0] / target_size[1]))
#         resized_image = cv2.resize(image, (new_width, target_size[1]))
#         padding = max((target_size[0] - new_width) // 2, 0)
#         padded_image = cv2.copyMakeBorder(resized_image, 0, 0, padding, padding, cv2.BORDER_CONSTANT, value=[0, 0, 0])
#     else:
#         new_height = int(width * (target_size[1] / target_size[0]))
#         resized_image = cv2.resize(image, (target_size[0], new_height))
#         padding = max((target_size[1] - new_height) // 2, 0)
#         padded_image = cv2.copyMakeBorder(resized_image, padding, padding, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])
    
#     return padded_image