In [2]:
import io
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
import torch
from ultralytics import YOLO
from PIL import Image, ImageDraw
import numpy as np
import pandas as pd
from IPython.display import clear_output
import socket
import time
from datetime import datetime
import shutil
from random import randint
import random
import re
import colorsys
import math
from scipy.spatial import Voronoi
import matplotlib.colors as mcolors
import matplotlib.path as mpltPath

In [11]:
# Functions

def sanitize_for_filesystem(input_string, len = 6):
    safe_string = re.sub(r'[^a-zA-Z0-9]', '', input_string)
    return safe_string[:len]

def save_tile_results(res2, save_path, max_tile_cols = 6, divider_width = 4, show=False):
    divider_color = (255, 0, 0)  # Divider color in RGB (red in this example)

    def calculate_canvas_size(images, max_cols, img_width, img_height, divider_width):
        #rows = (len(images) + max_cols - 1) // max_cols
        canvas_width = (img_width + divider_width) * min(len(images), max_cols) - divider_width
        canvas_height = (img_height * 2 + divider_width) #* rows - divider_width
        return canvas_width, canvas_height

    image_pairs = []

    for result in res2:
        img_orig = result.orig_img
        img_labeled = result.plot(labels=False, conf=False)
        img_orig_pil = Image.fromarray(img_orig)
        img_labeled_pil = Image.fromarray(img_labeled)
        image_pairs.append((img_orig_pil, img_labeled_pil))

    if not image_pairs:
        print("No images to display.")
    else:
        img_width, img_height = image_pairs[0][0].size
        canvas_width, canvas_height = calculate_canvas_size(image_pairs, max_tile_cols, img_width, img_height, divider_width)
        canvas = Image.new('RGB', (canvas_width, canvas_height), "white")
        draw = ImageDraw.Draw(canvas)

        for i, (img_orig, img_labeled) in enumerate(image_pairs[:max_tile_cols]):
            col = i % max_tile_cols; row = i // max_tile_cols
            top_left_x = col * (img_width + divider_width); top_left_y = row * (img_height * 2 + divider_width)
            canvas.paste(img_orig, (top_left_x, top_left_y))
            canvas.paste(img_labeled, (top_left_x, top_left_y + img_height + divider_width))

            # if col > 0:
            #     draw.line([(1+ top_left_x - divider_width, top_left_y), (1+top_left_x - divider_width, top_left_y + img_height * 2)], fill=divider_color, width=divider_width)
            #     draw.line([(0, top_left_y - divider_width), (canvas_width, top_left_y - divider_width)], fill=divider_color, width=divider_width)
        if (show): display(canvas)
        if (save_path!=""): canvas.save(save_path)

def results_toDF(res2, addIDCol = False):
    data = []
    for result in res2:
        boxes = result.boxes
        inst = 0
        for box in boxes:
            x, y, w, h = box.xywh[0].tolist()
            inst += 1
            data.append({
                'Fullpath': result.path,
                'Filename' : os.path.basename(result.path),
                'Instance' : inst,
                'Class': box.cls[0].item(),
                'Conf': box.conf[0].item(),
                'x': x, 'y': y,
                'w': w, 'h': h,
                'xc' : x + w/2,
                'yc' : y + h/2,
                'Circ Area' : (0.858 * w * h)
            })
    df = pd.DataFrame(data)
    if (addIDCol): 
        df['ID'] = df.groupby('path').cumcount()
    return df

def results_toCSV(res2, save_path):
    df = results_toDF(res2)
    df.to_csv(save_path, index=False)

def polygon_area(coords):
    x = coords[:, 0]; y = coords[:, 1]
    i = np.arange(len(x))
    # 'shoelace' formula
    # return 0.5*np.abs(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1)))
    return np.abs(np.sum(x[i-1]*y[i]-x[i]*y[i-1])*0.5)

def find_parents(classes, image):
    class0_indices = np.where(classes == 0)[0]
    class1_indices = np.where(classes == 1)[0]
    
    class1_centers = image.boxes.xywh[class1_indices]
    class0_centers = image.boxes.xywh[class0_indices]
    
    class1_parents = []
    for class1_coord in class1_centers.numpy():
                # print("the class1 instance: ", class1_coord)
                
                droplet_x = class1_coord[0]
                droplet_y = class1_coord[1]
                # print("droplet x, y coords: ", droplet_x, droplet_y)
                min_distance_from_cells = math.inf
                parent_cell_index = 0
                for i, class0_coord in enumerate(class0_centers.numpy()):
                    # print("the class0 instance: ", class0_coord)
                    
                    cell_x = class0_coord[0]
                    cell_y = class0_coord[1]
                    # print("cell x, y coords: ", cell_x, cell_y)
                    point1 = np.array((droplet_x, droplet_y))
                    point2 = np.array((cell_x, cell_y))
                    dist = np.linalg.norm(point1 - point2)
                    # print("dist from class0 instance: ", dist)
                    if dist < min_distance_from_cells:
                        min_distance_from_cells = dist
                        parent_cell_index = i
                        
                # print("parent cell index: ", parent_cell_index)        
                class1_parents.append(class0_indices[parent_cell_index])
    return class1_parents

def find_first_file(m_folder, m_contains):
    """
    ok so basically this is looking at each subdirectory (including the current directory) and looking at the files in it
    and then checking if m_contains is in each subdirectory
    
    suggestion: you don't have to do for file in files: if m_contains in file. you can just do if m_contains in files
        - file is a string
        - m_contains is also a string
        - you can just check if m_contains is in the list, files
    Oh wait, no. m_contains in file checks if file has the string m_contains as a substring
        
    so basically this function finds the path of where m_contains is located
    actually, this function finds the path of the first file in m_folder that contains the substring m_contains and returns it as a str
    """
    for root, dirs, files in os.walk(m_folder):
        for file in files:
            if m_contains in file:
                return os.path.join(root, file)
    return None

def create_multichannel_array(folder_path):
    image_arrays = []
    image_names = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg','.bmp','.tif')):
                file_path = os.path.join(root, file)
                t_img = Image.open(file_path).convert('L')  # Convert to grayscale if not already
                t_arr = np.array(t_img)
                if t_arr.ndim == 2:  # Ensure the image is grayscale
                    image_arrays.append(t_arr)
                    image_names.append(file)
    
    if not image_arrays:
        return None  # Or raise an exception if you prefer

    # Stack the arrays along a new axis to create a multi-channel array
    multi_channel_array = np.stack(image_arrays, axis=-1)
    return multi_channel_array, image_names

def get_color_by_id(point_region_id, total_ids):
    hue = point_region_id / total_ids # Scale the hue by the number of unique IDs, wrapping around the hue circle
    saturation = 0.9; value = 0.9  # Keep saturation and value high for bright colors
    rgb = colorsys.hsv_to_rgb(hue, saturation, value)
    return tuple(int(c * 255) for c in rgb) # Convert to 0-255 scale for RGB colors in PIL

def get_vor_boundaries(boxes, ImgIfDesired = None):
    points= []; vor_verts = {}
    for idx in range(len(boxes)): points.append((boxes[idx][0], boxes[idx][1]))
    vor = Voronoi(points)
    for point_region_id, region_id in enumerate(vor.point_region): #this is needed to preserve the order
        if (-1 not in vor.regions[region_id]):
            region_vertices = vor.vertices[vor.regions[region_id]]
            vor_verts[point_region_id] = region_vertices.tolist()

    vor_verts_list = []; default_triangle_height = 2; default_triangle_base_length = 4
    for idx, point in enumerate(points):
        if idx in vor_verts:  # Voronoi region exists
            vor_verts_list.append(vor_verts[idx])
        else:  # Create default triangle for missing regions
            bl_vertex = (point[0] - default_triangle_base_length / 2, point[1])
            br_vertex = (point[0] + default_triangle_base_length / 2, point[1])
            top_vertex = (point[0], point[1] + default_triangle_height)  
            vor_verts_list.append([bl_vertex, br_vertex, top_vertex])
    
    if (ImgIfDesired):
        drawV = ImageDraw.Draw(ImgIfDesired)
        r = 2  # radius of the points
        for point_region_id, point in enumerate(points):
            outline_color = get_color_by_id(point_region_id, len(points))
            left_up_point = (point[0] - r, point[1] - r)
            right_down_point = (point[0] + r, point[1] + r)
            if vor_verts.get(point_region_id) and len(vor_verts[point_region_id]) > 0:
                polygon_vertices_tuples = [tuple(vertex) for vertex in vor_verts[point_region_id]]
                drawV.polygon(polygon_vertices_tuples, width=3, outline=outline_color)
            drawV.ellipse([left_up_point, right_down_point], fill=outline_color)

        display(imgV)
    return vor_verts_list

def find_mask_intensities(img_data, image_array, file_name, shift_x = 0, shift_y = 0, include_headers = True, meta_name = "NA", tile_name = "NA"):
    sto = io.StringIO()
    sth = ''; d = '\t'

    def bstr_h(sth1):
        nonlocal sth
        sth += sth1

    def bstr_m(st1):
        sto.write(st1)

    def bstr_m_start():
        nonlocal sth, sto
        st = sth + '\r' + sto.getvalue()
        sto.close()
        sto = io.StringIO()
        sto.write(st)

    def get_mask(vertices):
        polygon_path = mpltPath.Path(vertices) # Create a path object from the vertices
        inside_polygon = polygon_path.contains_points(class_points)
        mask = inside_polygon.reshape(xx.shape) # Reshape the mask back to the image shape
        return mask

    width =image_array.shape[1]; height = image_array.shape[0]; channels = image_array.shape[2]
    boxes = img_data.boxes.cpu()
    img_box_centers = boxes.xywh 
    img_mask_coords = None if img_data.masks is None else img_data.masks.xy
    img_vor_coords = get_vor_boundaries(img_box_centers)

    first = include_headers; masks = {}
    print("width =",width,"height =",height,"chs =",channels,"boxes =",len(img_box_centers),"vor =",len(img_vor_coords))
    xx, yy = np.meshgrid(np.arange(width),np.arange(height)) # Create a mesh grid of coordinate values
    x_flat = xx.flatten(); y_flat = yy.flatten()
    class_points = np.vstack((x_flat, y_flat)).T # Create a list of (x, y) points from the flattened grid
    for idx in range(len(img_box_centers)):
        if (idx % 250 == 0): print("Measuring Intensities",idx)
        bbox_xywh = img_box_centers[idx]
        bbox_corners = [[bbox_xywh[0] - bbox_xywh[2], bbox_xywh[1] + bbox_xywh[3]],[bbox_xywh[0] + bbox_xywh[2], bbox_xywh[1] + bbox_xywh[3]] ,[bbox_xywh[0] + bbox_xywh[2], bbox_xywh[1] - bbox_xywh[3]], [bbox_xywh[0] - bbox_xywh[2], bbox_xywh[1] - bbox_xywh[3]]]
        vor_corners = img_vor_coords[idx]
        polys = { "box": bbox_corners, "poly": img_mask_coords, "vor": vor_corners }
        masks = {key: get_mask(value) for key, value in polys.items() if value}

        # want to add parentID here
        if (first): bstr_h('FileName' + d + 'MetaName' + d + 'TileName' + d + 'ObjectID' + d + 'Class'                      + d + 'Confidence'                  + d + 'cx' + d + 'cy' + d)
        bstr_m(             file_name + d + meta_name  + d +  tile_name + d + str(idx)   + d + str(boxes[idx].cls.item()) + d + str(boxes[idx].conf.item()) + d + str(bbox_xywh[0].item() + shift_x) + d + str(bbox_xywh[1].item() + shift_y))

        # Look at each mask for each channel
        for c in range(channels):
            cs = str(c)
            for key in masks:
                selected_pixels = image_array[:, :, c][masks[key]]
                area = len(selected_pixels)
                if (first and c==0): bstr_h(key + ' AreaP' + d)
                if (c==0): bstr_m(               str(area) + d)

                sum = np.sum(selected_pixels)
                avg = np.average(selected_pixels)
                std = np.std(selected_pixels)
                if (first): bstr_h(key + ' Total Intensity wv' + cs + d + key + ' Avg Intensity wv' + cs + d + key + ' Std Intensity wv' + cs + d)
                bstr_m(                    str(sum)                 + d + str(avg)                       + d + str(std)                       + d)

        if (first): bstr_m_start(); first = False
        bstr_m('\r')
    return sto.getvalue()

def Predict_OnPartsOfImage(model, original_image_name, full_image_arr_predict, full_image_arr_measure = None, save_path = None, new_w = 256, new_h = 256, 
                           overlap_amount = 0, fill_edges = False, include_headers = True, meta_name = "NA", maxdets = 6666, minconf = 0.25):
    """_summary_

    Args:
        model (YOLO model): the model used to make predictions
        original_image_name (string): this is like the path to the image I think
        full_image_arr_predict (numpy array): it's the RGB image created from the TO_DAPI image
        full_image_arr_measure (numpy array, optional): _description_. Defaults to None.
        save_path (_type_, optional): _description_. Defaults to None.
        new_w (int, optional): _description_. Defaults to 256.
        new_h (int, optional): _description_. Defaults to 256.
        overlap_amount (int, optional): _description_. Defaults to 0.
        fill_edges (bool, optional): _description_. Defaults to False.
        include_headers (bool, optional): whether to include the first row with the column names or not. we pass First as the bool in the code below. Defaults to True.
        meta_name (str, optional): _description_. Defaults to "NA".
        maxdets (int, optional): _description_. Defaults to 6666.
        minconf (float, optional): _description_. Defaults to 0.25.
    
    Returns:
        
    """
    def get_piece(t_arr, x, y):
        piece = t_arr[y:min(y + new_h, t_arr.shape[0]), x:min(x + new_w, t_arr.shape[1])] # Calculate the dimensions of the piece 
        # actually, I think this just cuts out a piece of the array
        # between y and [the minimum between the height of the array and y + new_h (default 256, so basically the height of the slice)] 
        #   and x and [the minimum between the width and the  ]
        
        if fill_edges: # Create a new array filled with zeros (black) of the desired final size
            filled_piece = np.zeros((new_h, new_w), dtype=t_arr.dtype)
            filled_piece[:piece.shape[0], :piece.shape[1]] = piece
            piece = filled_piece
        return piece
            
    t_arr = full_image_arr_predict # np array of TO_DAPI image
    first = include_headers # whether or not to include column names
    st = io.StringIO() # an object for writing strings to
    
    # t_arr.shape[0] is the number of rows of the array
    # ok so by default, willie is splitting the image vertically by 256 pixels
    for y in range(0, t_arr.shape[0], new_h - overlap_amount):
        
        # then we're going through the columns. t_arr.shape[1] is the columns in the array
        # we're moving over 256 pixels at a time too
        for x in range(0, t_arr.shape[1], new_w - overlap_amount):
            piece_pred = get_piece(t_arr, x, y)
            piece_meas = get_piece(full_image_arr_measure, x, y) if (full_image_arr_measure is not None) else piece_pred
            tilename = str(x) + "," + str(y); print("Region:",tilename)
            predictions = model.predict(piece_pred, show=False, max_det=maxdets) #minconf
            #img_array=predictions[0].plot(labels=False, boxes=True, masks=True); display(Image.fromarray(img_array[..., ::-1]))
            st.write(find_mask_intensities(predictions[0], piece_meas, original_image_name, x, y, first, meta_name, tilename))
            first = False

    strRet = st.getvalue()
    if (save_path is not None):
        with open(save_path, 'a') as file: file.write(strRet)
        st.close()
    print("Done with File")
    return strRet

def work_on_folder(model, SubFolder, PredContains, IncludeHeaders = True, save_path = None, maxdet = 6000):
    """_summary_

    Args:
        model (_type_): _description_
        SubFolder (_type_): _description_
        PredContains (_type_): _description_
        IncludeHeaders (bool, optional): whether or not to include the header. we pass in First as the bool in our example. Defaults to True.
        save_path (_type_, optional): _description_. Defaults to None.
        maxdet (int, optional): _description_. Defaults to 6000.

    Returns:
        _type_: _description_
    """
    file_pred = find_first_file(SubFolder, PredContains) # so this is a str of the file path
    # it's the file path for the image to predict on
    
    # rn, we're predicting on the images with TO_DAPI in their name, and apparently we're getting one of them from each subfolder
    pred_arr = np.array(Image.open(file_pred).convert('RGB'))  # Convert to RGB, creates a numpy.ndarray out of file_pred apparently
    # how does it convert it?
    
    meas_arr, names = create_multichannel_array(SubFolder) # with all the images in the subfolder?
    st = Predict_OnPartsOfImage(model, file_pred, pred_arr, meas_arr, None, 553, 553, 0, False, IncludeHeaders, SubFolder, maxdet)
    if (save_path is not None): 
        with open(save_path, 'a') as file: file.write(st)
    print("Done with Files")
    return st, names

Creates a text file in S:\Phys\FIV925 XSection\Datasets\Creed\20240406 that's a data table

In [9]:
# variables
model_path = r"S:\Phys\FIV925 XSection\Datasets\Creed\01a\YO 553 0328 MAXI\map75=0296662 yolov9c  idx=1 ep=8 btch=16 rnd=4717152\weights\best.pt"
m_folder = r"S:\Phys\FIV925 XSection\Datasets\Creed\20240406"
m_contains = "T0_DAPI"
res_append = "2"

model = YOLO(model_path) 

"""
what is os.walk(m_folder)?
    ok idk, it's like a generator object, whatever that means
    but next(os.walk(m_folder)) is a tuple where:
        - next(os.walk(m_folder))[0] is a folder name
        - next(os.walk(m_folder))[1] is a list of subdirectories in the folder
        - next(os.walk(m_folder))[2] is another list, but it's of the files in the directory that aren't folders
"""
first_level_subfolders = next(os.walk(m_folder))[1]  # Get first level of folders only
First = True # ok this must make it so that they're saving out the header
stio = io.StringIO() # an object to write strings to
namedict = {} # initializing dict I guess

# for loop
for subfolder in first_level_subfolders: # for each subfolder in the list of subfolders in m_folder
    print(subfolder,"---------------------------------") # first_level_subfolders is a list of strings, so subfolder is a string, and this line prints it
    subfolder_path = os.path.join(m_folder, subfolder) # creates a string to contain the path of the subfolder
    st, names = work_on_folder(model, subfolder_path, m_contains, First) # ok so now apparently we're running the work_on_folder function
    # passing m_contains makes work_on_folder create a numpy array of the first image in subfolder_path with m_contains in the name
    # so, we're only looking at images with TO_DAPI in its name
    stio.write(st)
    namedict[subfolder] = names
    First = False

# Save out the main data
save_path = os.path.join(m_folder, "Res00"+res_append+".txt")
strRet = stio.getvalue(); stio.close()
with open(save_path, 'a') as file: file.write(strRet)

# Now save out the name information
save_path = os.path.join(m_folder, "Res00"+res_append+"_Names.txt")
rows = [f"{subfolder}\t{idx}\t{name}" for subfolder, names in namedict.items() for idx, name in enumerate(names)]
with open(save_path, 'w') as txtfile:
    txtfile.write("Subfolder\tIndex\tName\n") 
    txtfile.write("\n".join(rows))

print("Done with Folder")

SET1_CROPPED ROI2 ---------------------------------
Region: 0,0

0: 576x576 1305 Nucs, 30.6ms
Speed: 15.0ms preprocess, 30.6ms inference, 10.0ms postprocess per image at shape (1, 3, 576, 576)
width = 553 height = 553 chs = 13 boxes = 1305 vor = 1305
Measuring Intensities 0


KeyboardInterrupt: 