## Read Source Folder Path

In [6]:
# The Database Folder Location
input_path = input("Enter the root folder path: ")
 #   file_path= 'C:/Users/Soumiz/Downloads/Dummy root'


Enter the root folder path: C:/Users/Soumiz/Downloads/Dummy root


### Check Folder Tree Structure

In [7]:
import os

def list_folders_tree(root_folder, level=0):
    prefix = "|   " * (level - 1) + "|-- " if level > 0 else ""
    print(f"{prefix}{os.path.basename(root_folder)}")

    for item in os.listdir(root_folder):
        item_path = os.path.join(root_folder, item)
        if os.path.isdir(item_path):
            list_folders_tree(item_path, level + 1)

if __name__ == "__main__":
    # input_path = input("Enter the root folder path: ")
    if os.path.isdir(input_path):
        list_folders_tree(input_path)
    else:
        print(f"The path {input_path} is not a valid directory.")


Dummy root
|-- level 1_1
|   |-- level 1_1_1
|   |-- level 1_1_2
|-- level 1_2
|   |-- level 1_2_1
|   |-- level 1_2_2


## Check Image Dimensions and Sizes, and Document Results in Separate Sheets of a Single Excel File for Each Folder of Entire Database

In [8]:
import os
import cv2
import pandas as pd

def get_image_info(image_path):
    # Read the image
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        return None, None
    # Get the dimensions of the image
    height, width = image.shape
    # Get the grayscale span
    min_val = image.min()
    max_val = image.max()
    return (height, width), (min_val, max_val)

def read_folders_and_save_info(path, root_name, level=0):
    # Check if the given path is a directory
    if not os.path.isdir(path):
        print(f"The path {path} is not a valid directory.")
        return

    data_dict = {}

    def read_subfolders(current_path, depth, folder_name):
        # List all entries in the current directory
        with os.scandir(current_path) as entries:
            folder_data = []
            for entry in entries:
                if entry.is_dir():
                    # Update the folder name for nested structure
                    new_folder_name = folder_name + "_" + entry.name
                    #                                                     print(f"Entering directory: {new_folder_name}")
                    # Recurse into the subdirectory
                    read_subfolders(entry.path, depth + 1, new_folder_name)
                elif entry.is_file():
                    # Get image information
                    #                                                      print(f"Processing file: {entry.name} in {folder_name}")
                    image_path = entry.path
                    size, span = get_image_info(image_path)
                    if size and span:
                        folder_data.append({
                            "File Name": entry.name,
                            "Size1": size[0],
                            "Size2": size[1],
                            "Grayscale Span1": span[0],
                            "Grayscale Span2": span[1],
                            "Path": current_path
                        })
            if folder_data:
                #                                           print(f"Adding data for folder: {folder_name}")
                data_dict[folder_name] = folder_data

    # Start reading subfolders from the given path
    read_subfolders(path, level, root_name)

    # Save data to an Excel file with separate sheets
    excel_file_path = os.path.dirname(path)  # Get the parent directory of the root folder
    excel_file_name = os.path.join(excel_file_path, root_name + ".xlsx")
    try:
        with pd.ExcelWriter(excel_file_name) as writer:
            for sheet_name, data in data_dict.items():
                # Ensure the sheet name is within the 31-character limit
                valid_sheet_name = sheet_name[-31:]  # Keep the last 31 characters
                df = pd.DataFrame(data)
                df.to_excel(writer, sheet_name=valid_sheet_name, index=False)
        print(f"Data saved to {excel_file_name}")
    except PermissionError:
        print(f"Permission denied: Unable to write to {excel_file_name}. Ensure the file is not open and you have write permissions.")

if __name__ == "__main__":
    '''# Input path from the user
    input_path = input("Enter the path to the directory: ")'''
    # Extract the root folder name for the file naming
    root_name = os.path.basename(os.path.normpath(input_path))
    read_folders_and_save_info(input_path, root_name)


Data saved to C:/Users/Soumiz/Downloads\Dummy root.xlsx


### Create Different folders maintaining hierarchy to save processed images 

In [10]:
import os
from pathlib import Path

def list_and_create_folders_tree(root_folder, new_root_folder, level=0):
    # Display the folder hierarchy
    prefix = "|   " * (level - 1) + "|-- " if level > 0 else ""
    # print(f"{prefix}{os.path.basename(root_folder)}")

    # Create the new folder structure with '_nrm' appended to the names
    new_folder_name = os.path.basename(root_folder) + "_nrm"
    new_folder_path = os.path.join(new_root_folder, new_folder_name)
    Path(new_folder_path).mkdir(parents=True, exist_ok=True)

    for item in os.listdir(root_folder):
        item_path = os.path.join(root_folder, item)
        if os.path.isdir(item_path):
            list_and_create_folders_tree(item_path, new_folder_path, level + 1)

if __name__ == "__main__":
    #input_path = input("Enter the root folder path: ")
    if os.path.isdir(input_path):
        new_root_path = input_path + "_nrm"       
        list_and_create_folders_tree(input_path, os.path.dirname(new_root_path))
    else:
        print(f"The path {input_path} is not a valid directory.")

print("Folders created to save processed images")

Folders created to save processed images


### Create Uniformity 
    1. Intensity  --> Normalisation method
    2. Size --> Padding method

### Normalization for lossless transformation

### Intensity scaling by normalisation


Method |	Description	|Loss of Information|	Use Cases|
---------|-----------------|--------------|-----------|
Normalization|	Rescales pixel values to a fixed range [0, 255].|	Low	| Consistent intensity range while preserving relative differences.
Standardization|	Centers data with mean 0 and standard deviation 1.|	Moderate	|Preparing data for models that require zero mean and unit variance.
Histogram Equalization|	Spreads pixel values to enhance contrast.	|Moderate to High|	Enhancing contrast and visibility of features.
Linear Scaling|	Linearly maps pixel values to a target range.|	Low	|Converting intensity values to a desired range while preserving relative differences.

In [11]:
import os
import cv2
from pathlib import Path

def normalize_image(image):
    min_val = image.min()
    max_val = image.max()
    if max_val > min_val:  # Avoid division by zero
        normalized = (image - min_val) / (max_val - min_val) * 255
        return normalized.astype('uint8')
    else:
        return image

def normalize_images_in_folders(input_path, output_path):
    # Check if the given path is a directory
    if not os.path.isdir(input_path):
        print(f"The path {input_path} is not a valid directory.")
        return

    def process_subfolders(current_path, current_output_path):
        # List all entries in the current directory
        with os.scandir(current_path) as entries:
            for entry in entries:
                if entry.is_dir():
                    # Update the output path for the subdirectory
                    new_output_path = os.path.join(current_output_path, entry.name + "_nrm")
                    # Recurse into the subdirectory
                    process_subfolders(entry.path, new_output_path)
                elif entry.is_file() and entry.name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                    # Normalize and save the image
                    image_path = entry.path
                    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
                    if image is not None:
                        normalized_image = normalize_image(image)
                        # Ensure the output folder exists
                        Path(current_output_path).mkdir(parents=True, exist_ok=True)
                        # Save the normalized image
                        normalized_image_path = os.path.join(current_output_path, entry.name)
                        cv2.imwrite(normalized_image_path, normalized_image)

    # Start processing subfolders from the given path
    process_subfolders(input_path, output_path)

if __name__ == "__main__":
    # Input path from the user
    input_path = input("Enter the path to the directory: ")
    # Determine the output path
    output_path = input_path + "_nrm"
    normalize_images_in_folders(input_path, output_path)

print("ALL IMAGES ARE NORMALISED")



Enter the path to the directory: C:/Users/Soumiz/Downloads/Dummy root
ALL IMAGES ARE NORMALISED


## Maximum available Height and Width of the Images in the Dataset

#### It is important to retain the size for lossless scalling of images

## 

In [12]:
import pandas as pd

# Load the Excel file
file_path =  input("Enter the path to the directory: ")
xls = pd.ExcelFile(file_path)

# Initialize variables to keep track of the largest sizes
max_size1 = 0
max_size2 = 0

# Loop through all sheets in the Excel file
for sheet_name in xls.sheet_names:
    df = pd.read_excel(xls, sheet_name=sheet_name)
    max_size1 = max(max_size1, df['Size1'].max())
    max_size2 = max(max_size2, df['Size2'].max())

print('Size1 (height):',max_size1) # 2713
print ('Size2 (width):',max_size2) # 2916 

#     C:\Users\Soumiz\Downloads\ChestXRay2017.xlsx       #

Enter the path to the directory: C:/Users/Soumiz/Downloads/Dummy root.xlsx
Size1 (height): 1858
Size2 (width): 2090


### Size Scaling by Padding for lossless Transformation



Method	|Visual Loss	|Data Loss|	Use Case
-------|-------------|-------------|---------
Nearest Neighbour|	High|	Minimal	|Simple tasks, low quality requirements
Bilinear Interpolation|	Moderate|	Moderate|	Smooth images, general use
Bicubic Interpolation|	Low|	Low|	High-quality resizing, detail preservation
Lanczos Resampling|	Very Low|	Minimal|	Professional and high-quality applications
Cropping|	High|	High|	Focusing on specific regions
Padding	|None|	None|	Ensures no data loss, uniform size

In [14]:
import os
from PIL import Image

def resize_and_pad_image(image_path, output_path, size1, size2):
    with Image.open(image_path) as img:
        # Calculate padding
        width, height = img.size
        new_width = size2
        new_height = size1
        
        # Create a new image with the desired size and black background
        new_img = Image.new("RGB", (new_width, new_height), (0, 0, 0))
        
        # Calculate position to paste the old image
        left = (new_width - width) // 2
        top = (new_height - height) // 2
        
        # Paste the old image onto the new image
        new_img.paste(img, (left, top))
        
        # Save the new image with quality settings
        if output_path.lower().endswith('.jpg') or output_path.lower().endswith('.jpeg'):
            new_img.save(output_path, format='JPEG', quality=95)  # Adjust quality as needed
        else:
            new_img.save(output_path)  # For PNG or other formats

def process_directory(root_folder_path, size1, size2):
    for subdir, _, files in os.walk(root_folder_path):
        for file in files:
            file_path = os.path.join(subdir, file)
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                output_path = os.path.join(subdir, f"{file}")
                resize_and_pad_image(file_path, output_path, size1, size2)
                print(f"Processed: {file_path}")

if __name__ == "__main__":
    # Example usage
    height = int(input("Enter the final height (size1): "))
    width = int(input("Enter the final width (size2): "))
    root_folder = input("Enter the root folder path: ")
    
    process_directory(root_folder, height, width)


Enter the final height (size1): 1858
Enter the final width (size2): 2090
Enter the root folder path: C:/Users/Soumiz/Downloads/Dummy root_nrm
Processed: C:/Users/Soumiz/Downloads/Dummy root_nrm\level 1_1_nrm\level 1_1_1_nrm\NORMAL2-IM-1350-0001.jpeg
Processed: C:/Users/Soumiz/Downloads/Dummy root_nrm\level 1_1_nrm\level 1_1_1_nrm\person282_virus_579.jpeg
Processed: C:/Users/Soumiz/Downloads/Dummy root_nrm\level 1_1_nrm\level 1_1_2_nrm\IM-0721-0001 - Copy.jpeg
Processed: C:/Users/Soumiz/Downloads/Dummy root_nrm\level 1_1_nrm\level 1_1_2_nrm\IM-0721-0001.jpeg
Processed: C:/Users/Soumiz/Downloads/Dummy root_nrm\level 1_2_nrm\level 1_2_1_nrm\IM-0115-0001 - Copy.jpeg
Processed: C:/Users/Soumiz/Downloads/Dummy root_nrm\level 1_2_nrm\level 1_2_1_nrm\IM-0115-0001.jpeg
Processed: C:/Users/Soumiz/Downloads/Dummy root_nrm\level 1_2_nrm\level 1_2_2_nrm\IM-0323-0001 - Copy.jpeg
Processed: C:/Users/Soumiz/Downloads/Dummy root_nrm\level 1_2_nrm\level 1_2_2_nrm\IM-0323-0001.jpeg
