# Folder-wise Fast Microscopy Picture Analysis
The following code opens a folder of the SlideScanner Microscope and automatically sets ROIs to the darkest and brightest portions of the picture. Consequently, it removes the background through the ROIs at the darkest positions and calculates the mean fluorescence at the brighter spots. 

In [None]:
## Import necessary libraries
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tifffile as tiff
import os
import pandas as pd
from PIL import Image

import gc
import concurrent.futures as cf
import multiprocessing

matplotlib.use('Agg')  # Use the Agg backend for non-interactive plotting

from AutoImgUtils import * 

# Function Definitions

In [None]:
def process_images(stacked_image_path, config):
    '''
    # Function to process a single set of 4-channel images and return mean fluorescence values, mean background values, and number of positive and negative results

            Parameters:
                base_name (string): The path to the folder containing the 4-channel images
                lower_thresh_chan (int): A list of 4 integers, the lower threshold for each channel
                upper_thresh (int): The upper threshold for the channel of interest
                background_threshold (int): The threshold for the background values, if left empty, it will be 2 standard deviations below the mean
                radius_factor (int): The radius of the circular ROIs around detected points, if none is given, the mean radius will be calculated from the region properties
                channel_of_interest (int): The channel of interest, default is 1
                single_ch_background (bool): If True, the background will be calculated for each channel separately, if False, the background will be calculated for all channels together
                mask_channel (int): The channel to use for masking bright spots (ROIs), default is 1

            Returns:
                mean_fluorescence (dict): A dictionary containing the mean fluorescence values for each channel
                mean_fluorescence_value (float): The mean fluorescence value for each channel
                background_values (dict): A dictionary containing the mean background values for each channel
                mean_background_value (float): The mean background value for the channel of interest
                positive_results (dict): A dictionary containing the number of positive results for each channel
                negative_results (dict): A dictionary containing the number of negative results for each channel
                corrected_total_fluorescence (dict): A dictionary containing the corrected total cell fluorescence for each channel
    '''
    
    # Extract parameters with defaults
    lower_thresh_chan = config.get('lower_thresh_factor', [2, 3, 2, 2])
    upper_thresh = config.get('upper_thresh', 60000)
    background_threshold = config.get('background_threshold', None)
    radius_factor = config.get('radius_factor', None)
    mask_channel = config.get('mask_channel', 1)
    channel_of_interest = config.get('channel_of_interest', 1)
    single_ch_background = config.get('single_ch_background', True)

    channel_of_interest -= 1
    mask_channel -= 1
    
    image = tiff.imread(stacked_image_path)
    image = np.moveaxis(image, 0, -1)
    base_name = os.path.splitext(stacked_image_path)[0]

    n_channels = image.shape[2]

    # Initialize dictonaries to store the mean fluorescence and background values 
    mean_fluorescence = {f'Channel {i+1}': [] for i in range(n_channels)}
    background_values = {f'Channel {i+1}': [] for i in range(n_channels)}
    positive_results = {f'Channel {i+1}': [] for i in range(n_channels)}
    corrected_total_fluorescence = {f'Channel {i+1}': [] for i in range(n_channels)}

    # Subtract background from the image and display the background ROIs on the channel of interest
    if single_ch_background:
        background_values, mean_background_value, background_subtracted_image = bg_substraction_ROI_single_ch(image, background_threshold,channel_of_interest, display_rois=False)
    else:
        background_values, mean_background_value, background_subtracted_image = bg_substraction_ROI(image, background_threshold, display_rois=False)

    # Display the histogram of the background subtracted image    
    fig, axs = plt.subplots(1, n_channels , figsize=(n_channels*4, 10))

    for ax, channel_index in zip(axs, range(n_channels)):
        ax.hist(image[:, :, channel_index].ravel(), bins=256, color='gray', alpha=0.75)
        ax.set_title(f"Histogram for Channel {channel_index+1}")
        ax.set_xlabel("Pixel intensity")
        ax.set_ylabel("Frequency")
        ax.set_yscale('log')
        ax.set_xscale('log')
        ax.axvline(mean_background_value[channel_index], color='r', linestyle='dashed', linewidth=1)
        ax.axvline(lower_thresh_chan[channel_index] * np.std(image[:, :, channel_index]) + np.mean(image[:, :, channel_index]), color='g', linestyle='dashed', linewidth=1)

    # Hide x labels and tick labels for top plots and y ticks for right plots.
    for ax in axs.flat:
        ax.label_outer()
    
    output_path = base_name + "_0_histogram.png"
    plt.savefig(output_path, bbox_inches='tight', pad_inches = 0)
    
    # plt.show()
    
    hist_background_image = np.zeros_like(background_subtracted_image)

    for channel in range(n_channels):
        hist_background_image[:,:,channel] = np.where(background_subtracted_image[:,:,channel] < 1, 1, background_subtracted_image[:,:,channel])


    # Apply thresholds to find maximum values in the background-subtracted depending on channel of interest, avoiding very bright spots
    # Mask out very bright spots
    channel_thresh = 2 * np.std(background_subtracted_image[:, :, mask_channel]) + np.mean(background_subtracted_image[:, :, mask_channel])
    print(f'Channel threshold for mask channel {mask_channel+1}: {channel_thresh}')
    thresh = (background_subtracted_image[:, :, mask_channel] > channel_thresh) & (background_subtracted_image[:, :, mask_channel] < upper_thresh)

    # Label the thresholded regions and return the number of cells
    labels = measure.label(thresh)
    props = measure.regionprops(labels)
    
    if radius_factor is None:
        # First identify regions in the mask channel
        mask_props = props.copy()
        
        # Calculate mask channel centroids for later comparison
        mask_centroids = [(int(prop.centroid[1]), int(prop.centroid[0])) for prop in mask_props]
        
        # Find regions in the channel of interest for radius calculation
        channel_thresh_interest = lower_thresh_chan[channel_of_interest] * np.std(background_subtracted_image[:, :, channel_of_interest]) + np.mean(background_subtracted_image[:, :, channel_of_interest])
        print(f'Channel threshold for channel of interest {channel_of_interest+1}: {channel_thresh_interest}')
        thresh_interest = (background_subtracted_image[:, :, channel_of_interest] > channel_thresh_interest) & (background_subtracted_image[:, :, channel_of_interest] < upper_thresh)
        
        # Label regions in the channel of interest
        labels_interest = measure.label(thresh_interest)
        props_interest = measure.regionprops(labels_interest)
        
        # Only use regions in the channel of interest that overlap with mask channel regions
        matched_radii = []
        
        for mask_prop in mask_props:
            mask_x, mask_y = int(mask_prop.centroid[1]), int(mask_prop.centroid[0])
            
            # Look for any overlapping regions in channel of interest
            for interest_prop in props_interest:
                interest_x, interest_y = int(interest_prop.centroid[1]), int(interest_prop.centroid[0])
                
                # Calculate distance between centroids
                distance = np.sqrt((mask_x - interest_x)**2 + (mask_y - interest_y)**2)
                
                # If centroids are close enough (within reasonable distance), consider them matched
                max_distance = np.sqrt(mask_prop.area) / 2  # Half the "radius" of the mask region
                if distance <= max_distance:
                    # For matched region, use the radius from the channel of interest
                    interest_radius = np.sqrt(interest_prop.area / np.pi)
                    matched_radii.append(interest_radius)
                    break
        
        if len(matched_radii) > 0:
            mean_radius = np.mean(matched_radii)
            std_radius = np.std(matched_radii)
            radius_factor = int(mean_radius)
            
            # Define size thresholds
            min_radius = mean_radius - 3 * std_radius  # 3 standard deviations below mean
            max_radius = mean_radius + 3 * std_radius  # 3 standard deviations above mean
            
            # Filter regions based on size
            filtered_props = [prop for prop in mask_props 
                            if min_radius <= np.sqrt(prop.area/np.pi) <= max_radius]
            
            print(f'Original number of regions: {len(mask_props)}')
            print(f'Filtered number of regions: {len(filtered_props)}')
            print(f'Removed {len(mask_props) - len(filtered_props)} regions as outliers')
            print(f'Mean radius from matched regions in channel of interest {channel_of_interest+1}: {mean_radius:.2f} ± {std_radius:.2f} pixels')
            print(f'Size range: {min_radius:.2f} - {max_radius:.2f} pixels')
            print(f'Found {len(matched_radii)} matching regions between mask and channel of interest')
            
            # Update props to use filtered regions
            props = filtered_props

        else:
            print(f'No regions found in channel of interest {channel_of_interest+1}, using mask channel instead')
            # Fall back to using the mask channel for radius calculation
            areas = [prop.area for prop in mask_props]
            radii = [np.sqrt(area/np.pi) for area in areas]
            mean_radius = np.mean(radii)
            std_radius = np.std(radii)
            radius_factor = int(mean_radius)
            
            # Define size thresholds
            min_radius = mean_radius - 3 * std_radius
            max_radius = mean_radius + 3 * std_radius
            
            filtered_props = [prop for prop in mask_props 
                           if min_radius <= np.sqrt(prop.area/np.pi) <= max_radius]
            
            print(f'Using mask channel. Mean radius: {mean_radius:.2f} ± {std_radius:.2f} pixels')
            print(f'Size range: {min_radius:.2f} - {max_radius:.2f} pixels')
            props = filtered_props
    # Create circular ROIs around detected points
    rois = []

    for prop in props:
        y, x = prop.centroid
        radius = radius_factor 
        rois.append((int(x), int(y), int(radius)))
    
    # # Display circular ROIs on the background-subtracted masked channel
    # roi_image = np.stack([normalize(background_subtracted_image[:, :, mask_channel])]*3, axis=-1)  # Convert to RGB

    # for roi in rois:
    #     x, y, radius = roi
    #     rr, cc = draw.disk((y, x), radius, shape=roi_image.shape)
    #     roi_image[rr, cc] = [0, 1, 0]  # Green for ROIs

    # plt.imshow(roi_image)
    # plt.title(f'ROIs on Background-Subtracted Mask_Channel {mask_channel+1}')
    # plt.show()

    # Calculate mean fluorescence values for each channel and check if the signal is present
    for channel_index in range(n_channels):
        channel = background_subtracted_image[:, :, channel_index]
        channel_thresh = lower_thresh_chan[channel_index] * np.std(channel) + np.mean(channel)

        for roi in rois:
            x, y, radius = roi
            rr, cc = draw.disk((y, x), radius, shape=channel.shape)
            roi_area = channel[rr, cc]
            mean_value = np.mean(roi_area)
            integrated_density = np.sum(roi_area)
            area_of_cell = len(rr)
            corrected_fluorescence = integrated_density - (area_of_cell * mean_background_value[channel_index])

            if mean_value > channel_thresh:
                positive_results[f'Channel {channel_index+1}'].append((x, y, radius))
                mean_fluorescence[f'Channel {channel_index+1}'].append(mean_value)
                corrected_total_fluorescence[f'Channel {channel_index+1}'].append(corrected_fluorescence)
            else:
                positive_results[f'Channel {channel_index+1}'].append(None)
                corrected_total_fluorescence[f'Channel {channel_index+1}'].append(None)
    
    # fig, axs = plt.subplots(1, n_channels , figsize=(n_channels *5,20))

    # axs[0].imshow(normalize(image[:,:,0]), cmap='gray')
    # axs[1].imshow(normalize(image[:,:,1]), cmap='gray')
    # axs[2].imshow(normalize(image[:,:,2]), cmap='gray')
    # axs[3].imshow(normalize(image[:,:,3]), cmap='gray')

    # for ax, channel_index in zip(axs, range(n_channels)):
    #     ax.set_title(f'Channel {channel_index+1} (Raw)')

    # # Hide x labels and tick labels for top plots and y ticks for right plots.
    # for ax in axs.flat:
    #     ax.label_outer()
    #     ax.axis('off')
    
    # plt.show()

    # Display positive ROIs for each channel
    fig, ax = plt.subplots(1, 4, figsize=(n_channels * 5, 20))

    for channel_index in range(n_channels):
        channel_image = np.stack([normalize(background_subtracted_image[:, :, channel_index])]*3, axis=-1)  # Convert to RGB

        for roi in positive_results[f'Channel {channel_index+1}']:
            if roi is not None:
                x, y, radius = roi
                rr, cc = draw.disk((y, x), radius, shape=channel_image.shape)
                channel_image[rr, cc] = [0, 1, 0]  # Green for positive ROIs

        ax[channel_index].imshow(channel_image)
        ax[channel_index].set_title(f'Positive ROIs on Channel {channel_index + 1}')
        ax[channel_index].axis('off')

    output_path = base_name + "_1_ROIs.png"
    plt.savefig(output_path, bbox_inches='tight', pad_inches=0)    
    # plt.show()
    
    # Force garbage collection to free memory
    gc.collect()

    return mean_fluorescence, background_values, mean_background_value, positive_results, corrected_total_fluorescence

In [3]:
def process_file(filepath, config):
    """Process a single file with the given parameters and return results."""
    
    try:
        print(f"Processing {os.path.basename(filepath)}...")
        mean_fluorescence, background_values, mean_background_value, positive_results, corrected_total_fluorescence = process_images(
            filepath, config)
        
        # Generate result dictionary
        result = {'Base Name': filepath}
        for channel, values in mean_fluorescence.items():
            result[f'{channel} Fluorescence mean value'] = np.mean(values) if values else None
            result[f'{channel} Mean Background'] = np.mean(background_values[channel]) if background_values[channel] else None
            result[f'{channel} Positive Results'] = sum(x is not None for x in positive_results[channel])
            result[f'{channel} Negative Results'] = sum(x is None for x in positive_results[channel])
            
            # Handle empty lists gracefully
            ctf_values = [x for x in corrected_total_fluorescence[channel] if x is not None]
            result[f'{channel} Corrected Total Fluorescence'] = np.mean(ctf_values) if ctf_values else None
            
        plt.close('all')  # Close all figures
        return result
    except Exception as e:
        print(f"Error processing {filepath}: {str(e)}")
        return None

# Main Workflow Point

In [4]:
# Alternative method to select the folder (open main folder with all subfolders)
main_folder_path = select_folder()

In [None]:
# Process all sets of images in the folder and collect the results
results = []

# Thresholds for ROI detection and background exclusion
lower_thresh_factor = [2, 3, 2, 2]  # Adjust this value based on your needs
upper_thresh = 60000  # Adjust this value to exclude very bright spots
background_threshold = None  # Adjust this value based on your needs
radius_factor = 10  # Factor to determine the radius of the circular ROIs
mask_channel = 1  # Channel to use for masking bright spots
channel_of_interest = 4  # Channel of interest for Background ROI detection

for root, dirs, files in os.walk(main_folder_path):
    for filename in files:
        if filename.endswith(".tiff"):
            base_name = os.path.join(root, filename)
            try:
                mean_fluorescence, background_values, mean_background_value, positive_results, corrected_total_fluorescence = process_images(base_name, 
                                                                                                            lower_thresh_factor, 
                                                                                                            upper_thresh, 
                                                                                                            background_threshold, 
                                                                                                            radius_factor, 
                                                                                                            mask_channel, 
                                                                                                            channel_of_interest, 
                                                                                                            single_ch_background = True)
                plt.close('all')
            except:
                 print(f"Error processing {base_name}. Skipping...")
                 pass    
            # Collect results into a list of dictionaries for easy conversion to DataFrame
            result = {'Base Name': base_name}
            for channel, values in mean_fluorescence.items():
                result[f'{channel} Fluorescence mean value'] = np.mean(values)
                # result[f'{channel} Background Values'] = background_values[channel]
                result[f'{channel} Mean Background'] = np.mean(background_values[channel])
                result[f'{channel} Positive Results'] = sum(x is not None for x in positive_results[channel]) 
                result[f'{channel} Negative Results'] = sum(x is None for x in positive_results[channel])
                result[f'{channel} Corrected Total Fluorescence'] = np.mean([x for x in corrected_total_fluorescence[channel] if x is not None])
            results.append(result)
    # Convert the results to a DataFrame and save to CSV
    df = pd.DataFrame(results)
    df.to_csv(os.path.join(root, 'mean_fluorescence_results.csv'), index=False)

    print("Processing complete. Results saved to mean_fluorescence_results.csv")


In [None]:
# Define processing configuration
config = {
    'lower_thresh_factor': [2, 3, 2, 2],
    'upper_thresh': 60000,
    'background_threshold': None,
    'radius_factor': None,
    'mask_channel': 1,
    'channel_of_interest': 4,
    'single_ch_background': True
}

# Collect all TIFF files to process
files_to_process = []
for root, dirs, files in os.walk(main_folder_path):
    for filename in files:
        if filename.endswith(".tiff"):
            files_to_process.append(os.path.join(root, filename))

# Process files in parallel or sequentially
print(f"Found {len(files_to_process)} files to process")
results = []

try:
    # First try sequential processing for one file to validate configuration
    if files_to_process:
        print("Testing configuration with first file...")
        test_result = process_file(files_to_process[0], config=config)
        if test_result:
            results.append(test_result)
            print("Configuration test successful, continuing with remaining files...")
        
        # Process remaining files
        if len(files_to_process) >= 1:
            # Use ThreadPoolExecutor instead of ProcessPoolExecutor to avoid serialization issues
            max_workers = max(1, os.cpu_count() // 2) # Limit workers to avoid memory issues
            print(f"Processing remaining files with {max_workers} parallel workers")
            
            # Show a progress counter
            total_files = len(files_to_process)
            completed = 0
            
            with cf.ThreadPoolExecutor(max_workers=max_workers) as executor:
                future_to_file = {executor.submit(process_file, filepath, config=config): 
                                filepath for filepath in files_to_process}
                
                for future in cf.as_completed(future_to_file):
                    filepath = future_to_file[future]
                    try:
                        result = future.result()
                        if result:
                            results.append(result)
                        
                        # Update progress
                        completed += 1
                        print(f"Completed {completed+1}/{total_files+1} files ({(completed+1)/(total_files+1)*100:.1f}%)")
                    except Exception as e:
                        print(f"Error processing {filepath}: {str(e)}")
except Exception as e:
    print(f"Error in processing pipeline: {str(e)}")

# Convert the results to a DataFrame and save to CSV
if results:
    df = pd.DataFrame(results)
    output_path = os.path.join(main_folder_path, 'mean_fluorescence_results.csv')
    df.to_csv(output_path, index=False)
    print(f"Processing complete. Results saved to {output_path}")
else:
    print("No results were successfully processed.")

Found 5 files to process
Testing configuration with first file...
Processing DARK01_1_2_1_NB_5CO_Cal_DARK_1d-Scene-5-ScanRegion4-OME.ome.tiff...
Background threshold for channel 4: 513.0006522961979
Channel threshold for mask channel 1: 2980.0407531189057
Channel threshold for channel of interest 4: 1867.0118261822718
Original number of regions: 11019
Filtered number of regions: 3918
Removed 7101 regions as outliers
Mean radius from matched regions in channel of interest 4: 14.83 ± 3.54 pixels
Size range: 4.19 - 25.46 pixels
Found 3430 matching regions between mask and channel of interest
Configuration test successful, continuing with remaining files...
Processing remaining files with 10 parallel workers
Processing DARK01_1_2_1_NB_5CO_Cal_DARK_1d-Scene-5-ScanRegion4-OME.ome.tiff...
Processing DARK01_1_4_1_NB_5CO_scFL_DARK_4d-Scene-2-ScanRegion1-OME.ome.tiff...
Processing DARK01_1_4_1_NB_5CO_scFL_DARK_4d-Scene-6-ScanRegion5-OME.ome.tiff...
Processing DARK01_1_4_2_NB_5CO_Cal_DARK_4d-Scen