In [3]:
!pip install opencv-python numpy pandas matplotlib scipy

Collecting opencv-python
  Downloading opencv_python-4.10.0.82-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python-4.10.0.82-cp37-abi3-win_amd64.whl (38.8 MB)
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
    --------------------------------------- 0.8/38.8 MB 16.3 MB/s eta 0:00:03
   - -------------------------------------- 1.4/38.8 MB 18.1 MB/s eta 0:00:03
   -- ------------------------------------- 2.4/38.8 MB 16.8 MB/s eta 0:00:03
   --- ------------------------------------ 3.5/38.8 MB 20.5 MB/s eta 0:00:02
   ----- ---------------------------------- 4.9/38.8 MB 22.4 MB/s eta 0:00:02
   ----- ---------------------------------- 5.1/38.8 MB 20.5 MB/s eta 0:00:02
   ----- ---------------------------------- 5.6/38.8 MB 17.8 MB/s eta 0:00:02
   ------ --------------------------------- 6.2/38.8 MB 17.3 MB/s eta 0:00:02
   ------- -------------------------------- 7.5/38.8 MB 19.1 MB/s eta 0:00:02
   -------- ------------------------------- 8.7/38.8 M

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\chilukalo\\AppData\\Local\\anaconda3\\Lib\\site-packages\\cv2\\cv2.pyd'
Consider using the `--user` option or check the permissions.



In [None]:
import os
import time
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import ndimage as ndi
from concurrent.futures import ThreadPoolExecutor, as_completed

def process_image(image_path, pixel_to_um=2):
    try:
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        blurred = cv2.GaussianBlur(image, (15, 15), 0)
        high_pass_kernel = np.array([[-1, -1, -1],
                                     [-1,  8, -1],
                                     [-1, -1, -1]])
        high_pass_filtered = cv2.filter2D(blurred, -2, high_pass_kernel)
        high_pass_filtered = cv2.normalize(high_pass_filtered, None, 0, 255, cv2.NORM_MINMAX)

        _, binary = cv2.threshold(high_pass_filtered, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        dist_transform = cv2.distanceTransform(binary, cv2.DIST_L2, 5)
        _, markers = cv2.threshold(dist_transform, 0.3 * dist_transform.max(), 255, 0)
        markers = np.uint8(markers)

        sure_bg = cv2.dilate(binary, np.ones((3, 3), np.uint8), iterations=5)
        sure_fg = cv2.erode(binary, np.ones((3, 3), np.uint8), iterations=5)
        unknown = cv2.subtract(sure_bg, sure_fg)

        _, markers = cv2.connectedComponents(sure_fg)
        markers = markers + 1
        markers[unknown == 255] = 0

        color_image = cv2.cvtColor(high_pass_filtered, cv2.COLOR_GRAY2BGR)
        cv2.watershed(color_image, markers)
        color_image[markers == -1] = [0, 255, 0]

        gray_color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY)
        blurred = cv2.GaussianBlur(gray_color_image, (5, 5), 0)
        binary = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)

        binary[:40, :] = 0
        binary[-40:, :] = 0
        binary[:, :20] = 0
        binary[:, -20:] = 0

        num_labels, labels = cv2.connectedComponents(binary)
        area_threshold = 70

        filtered_labels = np.zeros_like(labels)
        for i in range(1, num_labels):
            cell_mask = labels == i
            if np.sum(cell_mask) >= area_threshold:
                filtered_labels[cell_mask] = i

        split_labels = split_cells(filtered_labels, area_threshold)

        for i in range(1, split_labels.max() + 1):
            cell_mask = split_labels == i
            if np.any(cell_mask):
                filled_cell = ndi.binary_fill_holes(cell_mask).astype(int)
                split_labels[filled_cell > 0] = i

        colored_cells = np.zeros((split_labels.shape[0], split_labels.shape[1], 3), dtype=np.uint8)
        for i in range(1, split_labels.max() + 1):
            cell_mask = split_labels == i
            if np.any(cell_mask):
                contours, _ = cv2.findContours(cell_mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                color = np.random.randint(0, 255, size=3).tolist()
                cv2.drawContours(colored_cells, contours, -1, color, -1)

        gray = cv2.cvtColor(colored_cells, cv2.COLOR_BGR2GRAY)
        _, binary = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)

        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        solid_contours = [cnt for cnt in contours if is_solid_region(cnt)]

        regions_properties = []
        for cnt in solid_contours:
            area = cv2.contourArea(cnt) * (pixel_to_um ** 2)
            perimeter = cv2.arcLength(cnt, True) * pixel_to_um
            equivalent_diameter = np.sqrt(4 * area / np.pi)
            area_weighted_equivalent_diameter = area * equivalent_diameter
            x, y, w, h = cv2.boundingRect(cnt)
            min_diameter = min(w, h) * pixel_to_um
            max_diameter = max(w, h) * pixel_to_um
            mean_diameter = np.mean([w, h]) * pixel_to_um
            aspect_ratio = w / h
            elongation = 1 - (w / h)
            if len(cnt) >= 5:
                ellipse = cv2.fitEllipse(cnt)
                (major_axis, minor_axis), angle = ellipse[1], ellipse[2]
                orientation = angle
            else:
                orientation = np.nan
            roundness = (4 * area) / (np.pi * (mean_diameter ** 2))
            centroid = (x + w // 2, y + h // 2)
            regions_properties.append({
                'Projected area (µm²)': area,
                'Perimeter (µm)': perimeter,
                'Equivalent diameter (µm)': equivalent_diameter,
                'Area weighted equivalent diameter (µm)': area_weighted_equivalent_diameter,
                'Mean diameter (µm)': mean_diameter,
                'Minimum diameter (µm)': min_diameter,
                'Maximum diameter (µm)': max_diameter,
                'Length (µm)': h * pixel_to_um,
                'Width (µm)': w * pixel_to_um,
                'Aspect ratio': aspect_ratio,
                'Roundness': roundness,
                'Elongation': elongation,
                'Orientation (°)': orientation,
                'Centroid': centroid
            })

        return regions_properties, solid_contours, color_image
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return [], [], None

def split_cells(labels, area_threshold):
    new_labels = np.zeros_like(labels)
    current_label = 1
    for i in range(1, labels.max() + 1):
        cell_mask = labels == i
        if np.sum(cell_mask) >= area_threshold:
            dist_transform = cv2.distanceTransform(cell_mask.astype(np.uint8), cv2.DIST_L2, 5)
            _, split_markers = cv2.threshold(dist_transform, 0.2 * dist_transform.max(), 255, 0)
            split_markers = np.uint8(split_markers)
            num_splits, split_labels = cv2.connectedComponents(split_markers)
            for j in range(1, num_splits):
                split_mask = split_labels == j
                if np.sum(split_mask) >= area_threshold:
                    new_labels[split_mask] = current_label
                    current_label += 1
        else:
            new_labels[cell_mask] = current_label
            current_label += 1
    return new_labels

def is_solid_region(contour):
    area = cv2.contourArea(contour)
    perimeter = cv2.arcLength(contour, True)
    return area > 100 and perimeter / area < 0.1

def calculate_statistics(df):
    stats = df.describe().T
    stats['median'] = df.median()
    stats = stats[['mean', 'std', 'min', 'max', 'median']]
    stats = stats.T
    stats['Statistic'] = stats.index
    stats = stats.reset_index(drop=True)
    return stats

def plot_and_save_image(image, contours, centroids, output_path):
    plt.figure(figsize=(10, 10))
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    
    for contour in contours:
        plt.plot(contour[:, 0, 0], contour[:, 0, 1], 'b-', linewidth=1)
    
    for centroid in centroids:
        plt.plot(centroid[0], centroid[1], 'r.', markersize=10)
    
    plt.axis('off')
    plt.savefig(output_path, bbox_inches='tight')
    plt.close()

def process_file(root, file, base_dir, export_dir, pixel_to_um=2):
    try:
        image_path = os.path.join(root, file)
        properties, contours, processed_image = process_image(image_path, pixel_to_um)
        properties_df = pd.DataFrame(properties)

        output_subfolder_key = os.path.relpath(root, base_dir)
        output_image_path = os.path.join(export_dir, output_subfolder_key, f"{os.path.splitext(file)[0]}_processed.png")
        os.makedirs(os.path.dirname(output_image_path), exist_ok=True)
        centroids = [prop['Centroid'] for prop in properties]
        plot_and_save_image(processed_image, contours, centroids, output_image_path)

        return output_subfolder_key, file, properties_df
    except Exception as e:
        print(f"Error processing file {file} in {root}: {e}")
        return None

def traverse_and_process(base_dir, export_dir, max_files=2, pixel_to_um=2):
    start_time = time.time()
    file_count = 0

    subfolder_md_dfs = {}
    subfolder_td_dfs = {}

    with ThreadPoolExecutor(max_workers=24) as executor:
        futures = []

        for root, dirs, files in os.walk(base_dir):
            if 'Exported Results' in root:
                continue

            if os.path.basename(base_dir) not in root and base_dir not in root:
                continue

            md_files = [f for f in files if f.endswith('.PNG') and 'MD' in f][:max_files]
            td_files = [f for f in files if f.endswith('.PNG') and 'TD' in f][:max_files]

            for file in md_files:
                futures.append(executor.submit(process_file, root, file, base_dir, export_dir, pixel_to_um))

            for file in td_files:
                futures.append(executor.submit(process_file, root, file, base_dir, export_dir, pixel_to_um))

        for future in as_completed(futures):
            result = future.result()
            if result:
                subfolder_key, file, properties_df = result
                if 'MD' in file:
                    if subfolder_key not in subfolder_md_dfs:
                        subfolder_md_dfs[subfolder_key] = properties_df
                    else:
                        subfolder_md_dfs[subfolder_key] = pd.concat([subfolder_md_dfs[subfolder_key], properties_df], ignore_index=True)
                else:
                    if subfolder_key not in subfolder_td_dfs:
                        subfolder_td_dfs[subfolder_key] = properties_df
                    else:
                        subfolder_td_dfs[subfolder_key] = pd.concat([subfolder_td_dfs[subfolder_key], properties_df], ignore_index=True)

                file_count += 1
                print(f"Processed file: {file}")

    for subfolder_key, combined_md_df in subfolder_md_dfs.items():
        output_subfolder = os.path.join(export_dir, subfolder_key)
        os.makedirs(output_subfolder, exist_ok=True)

        if 'Centroid' in combined_md_df.columns:
            combined_md_df = combined_md_df.drop(columns=['Centroid'])

        output_file_md = os.path.join(output_subfolder, f"{os.path.basename(subfolder_key)} MD.xlsx")

        stats_md_df = calculate_statistics(combined_md_df)

        with pd.ExcelWriter(output_file_md) as writer:
            combined_md_df.to_excel(writer, sheet_name='Data', index=False)
            stats_md_df.to_excel(writer, sheet_name='Statistics', index=False)

    for subfolder_key, combined_td_df in subfolder_td_dfs.items():
        output_subfolder = os.path.join(export_dir, subfolder_key)
        os.makedirs(output_subfolder, exist_ok=True)

        if 'Centroid' in combined_td_df.columns:
            combined_td_df = combined_td_df.drop(columns=['Centroid'])

        output_file_td = os.path.join(output_subfolder, f"{os.path.basename(subfolder_key)} TD.xlsx")

        if not combined_td_df.empty:
            stats_td_df = calculate_statistics(combined_td_df)

            with pd.ExcelWriter(output_file_td) as writer:
                combined_td_df.to_excel(writer, sheet_name='Data', index=False)
                stats_td_df.to_excel(writer, sheet_name='Statistics', index=False)

    end_time = time.time()
    execution_time = end_time - start_time
    print(f"Total execution time: {execution_time:.2f} seconds")
    print(f"Total files processed: {file_count}")

base_dir = r'C:\Users\chilukalo\OneDrive - Carlisle Global\Desktop\SEM\Mike'
export_dir = r'C:\Users\chilukalo\OneDrive - Carlisle Global\Desktop\SEM\Mike'

traverse_and_process(base_dir, export_dir, max_files=24)


In [2]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import ndimage as ndi

def process_image(image_path, pixel_to_um=2):
    try:
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        blurred = cv2.GaussianBlur(image, (15, 15), 0)
        high_pass_kernel = np.array([[-1, -1, -1],
                                     [-1,  8, -1],
                                     [-1, -1, -1]])
        high_pass_filtered = cv2.filter2D(blurred, -2, high_pass_kernel)
        high_pass_filtered = cv2.normalize(high_pass_filtered, None, 0, 255, cv2.NORM_MINMAX)

        _, binary = cv2.threshold(high_pass_filtered, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        dist_transform = cv2.distanceTransform(binary, cv2.DIST_L2, 5)
        _, markers = cv2.threshold(dist_transform, 0.3 * dist_transform.max(), 255, 0)
        markers = np.uint8(markers)

        sure_bg = cv2.dilate(binary, np.ones((3, 3), np.uint8), iterations=5)
        sure_fg = cv2.erode(binary, np.ones((3, 3), np.uint8), iterations=5)
        unknown = cv2.subtract(sure_bg, sure_fg)

        _, markers = cv2.connectedComponents(sure_fg)
        markers = markers + 1
        markers[unknown == 255] = 0

        color_image = cv2.cvtColor(high_pass_filtered, cv2.COLOR_GRAY2BGR)
        cv2.watershed(color_image, markers)
        color_image[markers == -1] = [0, 255, 0]

        gray_color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY)
        blurred = cv2.GaussianBlur(gray_color_image, (5, 5), 0)
        binary = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)

        binary[:40, :] = 0
        binary[-40:, :] = 0
        binary[:, :20] = 0
        binary[:, -20:] = 0

        num_labels, labels = cv2.connectedComponents(binary)
        area_threshold = 70

        filtered_labels = np.zeros_like(labels)
        for i in range(1, num_labels):
            cell_mask = labels == i
            if np.sum(cell_mask) >= area_threshold:
                filtered_labels[cell_mask] = i

        split_labels = split_cells(filtered_labels, area_threshold)

        for i in range(1, split_labels.max() + 1):
            cell_mask = split_labels == i
            if np.any(cell_mask):
                filled_cell = ndi.binary_fill_holes(cell_mask).astype(int)
                split_labels[filled_cell > 0] = i

        colored_cells = np.zeros((split_labels.shape[0], split_labels.shape[1], 3), dtype=np.uint8)
        for i in range(1, split_labels.max() + 1):
            cell_mask = split_labels == i
            if np.any(cell_mask):
                contours, _ = cv2.findContours(cell_mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                color = np.random.randint(0, 255, size=3).tolist()
                cv2.drawContours(colored_cells, contours, -1, color, -1)

        gray = cv2.cvtColor(colored_cells, cv2.COLOR_BGR2GRAY)
        _, binary = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)

        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        solid_contours = [cnt for cnt in contours if is_solid_region(cnt)]

        regions_properties = []
        for cnt in solid_contours:
            area = cv2.contourArea(cnt) * (pixel_to_um ** 2)
            perimeter = cv2.arcLength(cnt, True) * pixel_to_um
            equivalent_diameter = np.sqrt(4 * area / np.pi)
            area_weighted_equivalent_diameter = area * equivalent_diameter
            x, y, w, h = cv2.boundingRect(cnt)
            min_diameter = min(w, h) * pixel_to_um
            max_diameter = max(w, h) * pixel_to_um
            mean_diameter = np.mean([w, h]) * pixel_to_um
            aspect_ratio = w / h
            elongation = 1 - (w / h)
            if len(cnt) >= 5:
                ellipse = cv2.fitEllipse(cnt)
                (major_axis, minor_axis), angle = ellipse[1], ellipse[2]
                orientation = angle
            else:
                orientation = np.nan
            roundness = (4 * area) / (np.pi * (mean_diameter ** 2))
            centroid = (x + w // 2, y + h // 2)
            regions_properties.append({
                'Projected area (µm²)': area,
                'Perimeter (µm)': perimeter,
                'Equivalent diameter (µm)': equivalent_diameter,
                'Area weighted equivalent diameter (µm)': area_weighted_equivalent_diameter,
                'Mean diameter (µm)': mean_diameter,
                'Minimum diameter (µm)': min_diameter,
                'Maximum diameter (µm)': max_diameter,
                'Length (µm)': h * pixel_to_um,
                'Width (µm)': w * pixel_to_um,
                'Aspect ratio': aspect_ratio,
                'Roundness': roundness,
                'Elongation': elongation,
                'Orientation (°)': orientation,
                'Centroid': centroid
            })

        return regions_properties, solid_contours, color_image
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return [], [], None

def split_cells(labels, area_threshold):
    new_labels = np.zeros_like(labels)
    current_label = 1
    for i in range(1, labels.max() + 1):
        cell_mask = labels == i
        if np.sum(cell_mask) >= area_threshold:
            dist_transform = cv2.distanceTransform(cell_mask.astype(np.uint8), cv2.DIST_L2, 5)
            _, split_markers = cv2.threshold(dist_transform, 0.2 * dist_transform.max(), 255, 0)
            split_markers = np.uint8(split_markers)
            num_splits, split_labels = cv2.connectedComponents(split_markers)
            for j in range(1, num_splits):
                split_mask = split_labels == j
                if np.sum(split_mask) >= area_threshold:
                    new_labels[split_mask] = current_label
                    current_label += 1
        else:
            new_labels[cell_mask] = current_label
            current_label += 1
    return new_labels

def is_solid_region(contour):
    area = cv2.contourArea(contour)
    perimeter = cv2.arcLength(contour, True)
    return area > 100 and perimeter / area < 0.1

def calculate_statistics(df):
    stats = df.describe().T
    stats['median'] = df.median()
    stats = stats[['mean', 'std', 'min', 'max', 'median']]
    stats = stats.T
    stats['Statistic'] = stats.index
    stats = stats.reset_index(drop=True)
    return stats

def plot_and_save_image(image, contours, centroids, output_path):
    plt.figure(figsize=(10, 10))
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    
    for contour in contours:
        plt.plot(contour[:, 0, 0], contour[:, 0, 1], 'b-', linewidth=1)
    
    for centroid in centroids:
        plt.plot(centroid[0], centroid[1], 'r.', markersize=10)
    
    plt.axis('off')
    plt.savefig(output_path, bbox_inches='tight')
    plt.close()

def test_image_processing(image_path):
    base_dir = os.path.dirname(image_path)
    export_dir = os.path.join(base_dir, 'Exported Results')
    os.makedirs(export_dir, exist_ok=True)
    
    pixel_to_um = 2
    properties, contours, processed_image = process_image(image_path, pixel_to_um)
    properties_df = pd.DataFrame(properties)

    output_image_path = os.path.join(export_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_processed.png")
    centroids = [prop['Centroid'] for prop in properties]
    plot_and_save_image(processed_image, contours, centroids, output_image_path)

    print(f"Processed image: {image_path}")
    print(f"Exported processed image to: {output_image_path}")

# Example usage:
image_path = r'C:\Users\chilukalo\OneDrive - Carlisle Global\Desktop\SEM\Mike\11-C MD.1.PNG'
test_image_processing(image_path)


KeyboardInterrupt: 