In [1]:
import os, sys

import os, sys

sys.path.append(os.path.abspath("../.."))

from utilities.create_detection_masks import *
from utilities import Otsu_Grid_utilities as otsu_grid
from utilities import Otsu_Local_utilities as otsu_local

import glob
import cv2
from skimage import data, filters, segmentation

from skimage import data
from skimage.filters.rank import entropy
from skimage.morphology import disk, ball, black_tophat, white_tophat
from skimage.measure import shannon_entropy, label, regionprops
from skimage.morphology import square, disk
from skimage.io import imread

import scipy
from scipy.signal import find_peaks
import numpy as np

%matplotlib ipympl

import matplotlib.pyplot as plt
import numpy as np


from tqdm.notebook import tqdm

# from mpl_interactions import ipyplot as iplt
import concurrent.futures
import time
from datetime import timedelta

from itertools import repeat
import multiprocessing

In [2]:
def dump_mask(dest_filename, mask):
    m2 = mask.astype(np.uint8)
    out_im = Image.fromarray(m2, mode='L')
    out_im.save(dest_filename)
    
def load_file(filename):
    basename = os.path.basename(filename)
    base_no_ext = basename.split('.')[0]
    # print(basename)

    header = get_FITS_header(filename)
    wl_resized  = imread(filename)

    center = [wl_resized.shape[0]//2,wl_resized.shape[1]//2]
    radius = header['SOLAR_R']

    pixel_nb = header['NAXIS1']
    pix_bit = pow(2, 12) - 1 
    poly_order = 2

    # centered images
    xce = int(pixel_nb/2)
    yce = int(pixel_nb/2)

    pixMat_flat = clv_correction(wl_resized,
                                header['SOLAR_R'],
                                xce,
                                yce,
                                poly_order,
                                pix_bit)

    return pixMat_flat, center, radius

In [3]:
def local_hist_bbox_new(image, tophat_mask):
    
    values = image[tophat_mask == 1]
    hist_val, hist_edges = np.histogram(values, bins=255) 
    
    peaks, properties = find_peaks(hist_val,prominence=0.8*max(hist_val[50:255]))
    print(peaks,properties)
    
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 2))
    ax.hist(values.ravel(), bins=255)
    ax.vlines(hist_edges[peaks[-1]],0, max(hist_val) ,color='r')
    ax.vlines(hist_edges[properties['left_bases'][-1]],0, max(hist_val) ,color='g')
    ax.vlines(hist_edges[properties['right_bases'][-1]],0, max(hist_val) ,color='g')
    fig.show()
    
    return 

def local_hist_bbox(image, bbox, padding):
    # Bounding box (min_row, min_col, max_row, max_col)
    init_region = image[bbox[0] : bbox[2] , bbox[1] : bbox[3]]
    img_cpy= image.copy()
    
    #original image
    region = img_cpy[bbox[0] : bbox[2] , bbox[1] : bbox[3]]
    
    center = [image.shape[0]//2, image.shape[1]//2]
    outside = create_circular_mask( image.shape[1], image.shape[0] ,center, radius)
    tmp = outside[bbox[0] : bbox[2] , bbox[1] : bbox[3]]
    
    tmp2 = np.stack((tmp[:,:,None], region[:,:, None]), axis =-1)
    tmp2 = np.max(tmp2, axis=-1).squeeze()
    
    thresholds = search_max_threshold2(region, tmp2)
    levels = np.digitize(tmp2, bins=thresholds)
    
    return levels, thresholds


def find_bboxes(labels):
    props_labels = regionprops(labels)
    
    bboxes = []
    for prop in props_labels:
        if prop.area > 15:
            bbox = np.array(prop.bbox)
            bboxes.append(bbox)

    return bboxes  

def get_tophat(tophat_radius, image):
    
    tophat_disk = disk(tophat_radius)

    spots = speed_black_tophat(image, tophat_disk)
    
    return spots

def speed_black_tophat(image,footprint=None,out=None):
    kernel = footprint
    blackhat = cv2.morphologyEx(image, cv2.MORPH_BLACKHAT, kernel)
    
    return blackhat

def local_hist_bbox_2(image, sun_radius, mask, bbox, method='MAX'):
    # Bounding box (min_row, min_col, max_row, max_col)
    init_region = image[bbox[0] : bbox[2] , bbox[1] : bbox[3]]
    img_cpy= image.copy()
    
    #original image
    region = img_cpy[bbox[0] : bbox[2] , bbox[1] : bbox[3]]
    
    center = [image.shape[0]//2, image.shape[1]//2]
    
    outside = create_circular_mask( image.shape[1], image.shape[0] ,center, sun_radius)
    tmp = outside[bbox[0] : bbox[2] , bbox[1] : bbox[3]]

    
    tmp2 = np.stack((tmp[:,:,None], region[:,:, None]), axis =-1)
    tmp2 = np.max(tmp2, axis=-1).squeeze()
    
    thresholds = search_max_threshold3(region, mask, tmp2, method=method)
    levels = np.digitize(tmp2, bins=thresholds)*mask
    
    return levels, thresholds

def find_bboxes2(labels):
    props_labels = regionprops(labels)
    
    bboxes = []
    masks = []
    for prop in props_labels:
        if prop.area > 15:
            bbox = np.array(prop.bbox)
            bboxes.append(bbox)
            masks.append(prop.image)

    return bboxes , masks

def bbox_f_tophat2(image, sun_radius, tophat_thresholds, tophat_radius, padding, method='MAX'):
    padding = int(padding)
    spots = get_tophat(tophat_radius, image)
        
    all_outs = []
    for tophat_threshold in tophat_thresholds:
    
        spots_cpy = spots.copy()
        spots_cpy[spots_cpy<=tophat_threshold] = 0
        spots_cpy[spots_cpy>tophat_threshold] = 1


        label_im = label(spots_cpy)
        props_bboxes, props_masks = find_bboxes2(label_im)

        out_mask = np.zeros_like(image)

        #for i, bbox in tqdm(enumerate(props_bboxes[:])):
        for i, bbox in enumerate(props_bboxes[:]):
            prop_mask = props_masks[i]

            minX,maxX = max(bbox[0]-padding, 0), min( bbox[2]+padding, out_mask.shape[0])
            minY,maxY = max(bbox[1]-padding, 0), min( bbox[3]+padding, out_mask.shape[1])

            bbox2 = [minX,minY,maxX,maxY]

            deltasX = [bbox[0]-minX, (maxX-bbox[2])]
            deltasY = [bbox[1]-minY, (maxY-bbox[3])]


            new_prop_mask = np.zeros((maxX-minX,maxY-minY))
            new_prop_mask[deltasX[0]:deltasX[0]+prop_mask.shape[0],
                          deltasY[0]: deltasY[0]+prop_mask.shape[1]] = prop_mask

            levels = local_mask(image, sun_radius, new_prop_mask, bbox2, method)       
            #levels, _ = local_hist_bbox_2(image, sun_radius, new_prop_mask, bbox2, method)

            #if _ != [0,0]:
                #out_mask[bbox2[0] : bbox2[2] , bbox2[1] : bbox2[3]] += levels
            out_mask[bbox2[0] : bbox2[2] , bbox2[1] : bbox2[3]] += levels

        global final_mask
        final_mask = out_mask.astype(np.uint8)
        
        all_outs.append((out_mask, spots))
        
    return all_outs

In [4]:
def apply_local_tophat(image, sun_radius, tophat_threshold=300, tophat_radius=35, padding=35, method='MAX'):  
    local_masks = bbox_f_tophat2(image, sun_radius, tophat_threshold, tophat_radius, padding, method)

    return local_masks

def apply(img, root_dir, method, tophat_thresh=400,context_width = 5,tophat_radius = 40):
    basename = os.path.splitext(os.path.basename(img))[0]
    pmf, center, radius = load_file(img)
    m = create_circular_mask(pmf.shape[0], pmf.shape[1], radius=radius)
    pmf = pmf*m
    try:
        out_masks = apply_local_tophat(pmf, radius, tophat_thresh, tophat_radius, context_width, method=method)

        assert len(out_masks) == len(tophat_thresh)

        for i, t in enumerate(tophat_thresh):
            dest_dir = os.path.join(root_dir, f'2023_T{t}')

            if not os.path.exists(dest_dir):
                os.makedirs(dest_dir)

            outfile = os.path.join(dest_dir, basename+'.png')
            cur_out_mask, _ = out_masks[i]
            dump_mask(outfile, cur_out_mask)
    except ValueError:
        print(f'error on : {basename}')
    
    except AssertionError:
        print(f'error on : {basename}')
    return

# Test Cells to separate penumbra and umbra

In [5]:
def local_mask(image, sun_radius, mask, bbox, method='MAX'):
    init_region = image[bbox[0] : bbox[2] , bbox[1] : bbox[3]]
    img_cpy= image.copy()
    
    #original image
    region = img_cpy[bbox[0] : bbox[2] , bbox[1] : bbox[3]]
    
    center = [image.shape[0]//2, image.shape[1]//2]
    
    outside = create_circular_mask( image.shape[1], image.shape[0] ,center, sun_radius)
    tmp = outside[bbox[0] : bbox[2] , bbox[1] : bbox[3]]

    tmp2 = np.stack((tmp[:,:,None], region[:,:, None]), axis =-1)
    tmp2 = np.max(tmp2, axis=-1).squeeze()
    
    #levels =  discriminate_penumbra(region, mask, tmp2, method=method)
    levels =  discriminate_penumbra2(region, mask, tmp2, method=method)
    
    return levels

def discriminate_penumbra2(region, mask, tmp2, show=False, method='MAX'):
    import cv2 as cv
    
    edges = auto_canny(tmp2, mask,sigma=0.33,show=show)
    edges_V1 = edges.copy()
    if np.sum(edges) == 0:
        #print('failed')
        for sig in [0.1 , 0.2, 0.3 ]:
            edges = canny_with_pooling(tmp2,mask,sig, show=show)
            if np.sum(edges) > 0:
                break
    try:
        assert np.sum(edges) > 0
    except AssertionError:
        print('lol')
        auto_canny(tmp2, mask,sigma=0.33,show=True)
        print('canny_pooling')
        canny_with_pooling(tmp2,mask,0.05, show=True)
        canny_with_pooling(tmp2,mask,0.1, show=True)
        canny_with_pooling(tmp2,mask,0.2, show=True)
        canny_with_pooling(tmp2,mask,0.33, show=True)
        canny_with_pooling(tmp2,mask,0.4, show=True)
        canny_with_pooling(tmp2,mask,0.5, show=True)
        print('lol2')
        fig, ax = plt.subplots(nrows=1, ncols=5, figsize=(9, 3))
        ax[0].imshow(tmp2,cmap='gray')
        ax[1].imshow(region)
        ax[2].imshow(edges_V1)
        ax[3].imshow(edges)
        ax[4].imshow(edges*mask)
        plt.show()
        
        assert np.sum(edges) > 0
        
    
    pen_um_mask = splitter(tmp2,mask,edges)
                
    return pen_um_mask

def discriminate_penumbra(region, mask, tmp2, show=False, method='MAX'):
    #Apply Canny filter inside
    import cv2 as cv
    
    edges = auto_canny(tmp2, mask)
    
    splitter(tmp2,mask,edges)
    if show:
        fig, ax = plt.subplots(nrows=1, ncols=6, figsize=(9, 3))
        ax[0].imshow(tmp2,cmap='gray')
        ax[1].imshow(mask)
        ax[2].imshow(edges)
        ax[2].imshow(edges*mask)
        ax[3].imshow(auto_canny(tmp2,mask,sigma=1))
        ax[4].imshow(auto_canny(tmp2,mask,sigma=2))
        ax[5].imshow(auto_canny(tmp2,mask,sigma=.1))
        plt.show()
            
    return edges

from skimage.transform import rescale, resize, downscale_local_mean
def canny_with_pooling(img, mask, sigma, show=False):
    
    masked_img = img*mask
    
    minpool = skimage.measure.block_reduce(img, (2,2), np.min)
    maxpool = skimage.measure.block_reduce(img, (2,2), np.max)
    meanpool = skimage.measure.block_reduce(img, (2,2), np.mean)
    
    msk2 = skimage.measure.block_reduce(mask, (2,2), np.max)
    
    min_canny = auto_canny(minpool,msk2,sigma=sigma,show=show)
    
    thresh = 0.4
    min_canny = resize(min_canny, img.shape,anti_aliasing=False)
    min_canny = np.digitize(min_canny, bins=[thresh])
    
    #max_canny = auto_canny(maxpool,msk2,sigma=sigma)
    #mean_canny = auto_canny(meanpool,msk2,sigma=sigma)
    #max_canny = resize(max_canny, img.shape,anti_aliasing=True)
    #max_canny = np.digitize(max_canny, bins=[thresh])
    #mean_canny = resize(mean_canny, img.shape,anti_aliasing=True)
    #mean_canny = np.digitize(mean_canny, bins=[thresh])
    
    
    
    return min_canny

def auto_canny(image, mask, sigma=0.2, show=False):
    # compute the median of the single channel pixel intensities
    img = image.copy()
    img = cv2.normalize(img,img, 0, 255, cv2.NORM_MINMAX)
    img =img.astype(np.uint8)
    #clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4,4))
    #img = clahe.apply(img)
    
    v = np.median(img[mask.astype(bool)])
    
    # apply automatic Canny edge detection using the computed median
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    #edged = cv2.Canny(img, lower, upper, L2gradient=True)
    edged = cv2.Canny(img, lower, upper)
    # return the edged image
    
    if show:
        fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(9, 3))
        ax[0].imshow(image,cmap='gray')
        ax[1].imshow(edged,cmap='gray')
        ax[2].hist(img.ravel(), bins=255) 
        hist_val, hist_edges = np.histogram(img, bins=255)
        #hist_val, hist_edges = np.histogram(img[mask.astype(np.bool)], bins=255)
        ax[2].vlines(v, 0, max(hist_val ), color='r')
        ax[2].vlines((1.0 + sigma) * v, 0, max(hist_val) , color='b') 
        ax[2].vlines((1.0 - sigma) * v, 0, max(hist_val) , color='g') 
        fig.suptitle(f'SIGMA: {sigma}, median: {v}, lower {lower}, upper: {upper}')
        plt.show()
    
    return edged

def splitter(image,mask, canny_edges, show=False):
    max_intensity = np.max(image[mask>0])
    min_intensity = np.min(image[mask>0])
    
    num_linspace = 20
    th = np.linspace(max_intensity, min_intensity, num=num_linspace, endpoint=False)
    
    umbra_tree = UmbraSplitTree(th, mask.copy(), canny_edges)
    
    if show:
        fig, ax = plt.subplots(nrows=1, ncols=4+num_linspace, figsize=(num_linspace, 4))
        #fig, ax = plt.subplots(nrows=1, ncols=4+num_linspace)
        ax[0].imshow(image,cmap='gray')
        ax[1].imshow(mask,cmap='gray')
        ax[2].imshow(canny_edges,cmap='gray')
        for a in ax:
            a.axis('Off')
    
    for i,t in enumerate(th):
        idx = image[:,:] < t
        m2 = mask.copy()
        m3 = np.zeros_like(m2)
        
        m2[idx] = 2
        m3[idx] = 1
        
        umbra_tree.update(t, m3)
        
        if show:
            ax[4+i].imshow(m2)
    
    best_mask = umbra_tree.get_best_mask()
    best_mask += mask
    
    if show:
        ax[3].imshow(image,cmap='gray')
        ax[3].imshow(best_mask,alpha=.5)
        plt.show()
        
    return best_mask
    

In [6]:
from copy import deepcopy

def compute_dist(edges_mask, edges_canny, showErrors=True):
    st_mask = np.stack(edges_mask, axis=-1)
    st_canny = np.stack(edges_canny, axis=-1)
    
    matrix = scipy.spatial.distance.cdist(st_mask, st_canny, metric='euclidean')
    
    if 0 in matrix.shape:
        print(matrix.shape)
        print(edges_mask)
        print(edges_canny)
    
    min_distances = np.min(matrix,axis=-1)  
    dist = st_mask.shape[0] + np.sum(min_distances)/((np.sum(min_distances==0)/st_mask.shape[0])+1e-10)
    
    return dist

def canny_distance(mask, canny, show=False):
    #find contours
    contours, hierarchy = cv2.findContours(255*mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    #create an empty image for contours
    img_contours = np.zeros(mask.shape)
    # draw the contours on the empty image
    img_contours = cv2.drawContours(img_contours, contours, -1, (1), 1)
    
    ones_mask = np.nonzero(img_contours)
    ones_canny = np.nonzero(canny)
    
    try:
        dist = compute_dist(ones_mask, ones_canny)
    except ValueError:
        fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(6, 3))
        ax[0].imshow(mask ,cmap='gray')
        ax[0].imshow(img_contours ,alpha=.5)
        ax[1].imshow(canny,cmap='gray')
        fig.show()
        return np.Inf
        
    
    if show:
        fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(6, 3))
        ax[0].imshow(mask ,cmap='gray')
        ax[0].imshow(img_contours ,alpha=.5)
        ax[1].imshow(canny,cmap='gray')
        fig.suptitle(f'dist = {dist}')
    
    return dist

class UmbraSplitTreeNode():
    def __init__(self,th, mask, canny, parent):
        self.parent = parent
        
        self.threshold_history = [th]
        self.mask = mask.copy()
        self.canny_edges = canny
        
        score = canny_distance(self.mask, self.canny_edges)
        if self.parent is not None:
            b = self.parent.best_canny
            score = np.minimum(score, b)           
        self.canny_scores = [score]
        
        self.best_canny = self.canny_scores[0]
        self.best_mask = self.mask.copy()
        
        self.children = None
        
    def get_best(self):
        return self.best_mask, self.best_canny
        
    def update(self, new_th, new_mask, show=False):
        self.threshold_history.append(new_th)
        
        tmp_mask = self.mask * new_mask
        
        label_im = label(tmp_mask)
        regions = regionprops(label_im)
        
        if show:
            fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(6, 3))
            ax[0].imshow(self.mask ,cmap='gray')
            ax[0].imshow(new_mask , alpha=.5)
            ax[1].imshow(tmp_mask,cmap='gray')

        if len(regions) == 0: # region disappeared -> terminal status return mask with best canny fit
            pass
        elif len(regions)== 1: # region shrinked -> check for canny distance, update if better
            self.mask = tmp_mask
            score = canny_distance(self.mask, self.canny_edges)
            if self.parent is not None:
                b = self.parent.best_canny
                score = np.minimum(score, b)
                
            self.canny_scores.append(score)
            if score <= self.best_canny: # We fit better the canny edges
                self.best_canny = score
                self.best_mask = tmp_mask
                if show:
                    ax[1].set_title(f'best = {score}')
                    
        else: # region was split -> create nodes , 1 for each region
            if show:
                ax[1].set_title(f'region splitted')            
            self.children = []
            for i, region in enumerate(regions):
                child_mask = np.zeros_like(tmp_mask)
                child_mask[label_im == region.label] = 1
                self.children.append(UmbraSplitTreeNode(new_th, child_mask, self.canny_edges, self))
        if show:
            plt.show()
        
        
class UmbraSplitTree():
    def __init__(self, th_init, mask_init, canny):
        self.root = UmbraSplitTreeNode(th_init, mask_init, canny,None)
        self.leaves = [self.root]
        
    def print(self):
        indent_lvl = 0
        to_print: List = [self.root]
        added = [1]
        while len(to_print)>0:
            cur: UmbraSplitTreeNode = to_print.pop(0)
            print(indent_lvl*"\t",'TreeNode',': ', cur.best_canny, f'({cur.canny_scores[0]})')
            added[-1] -= 1

            if cur.children is not None:
                to_print = cur.children + to_print
                added.append(len(cur.children))
                indent_lvl += 1

            while (len(added)>0) and (added[-1] == 0):
                indent_lvl -=1
                added.pop(-1)
        return
        
    def get_best_mask(self):
        bests_lst = []
        
        for leaf in self.leaves:
            cur  = leaf
            best_on_path = cur
            best_mask, best_score = cur.get_best()
            
            while cur.parent is not None:
                cur_best_m, cur_best_score  = cur.get_best()
                
                if cur_best_score <= best_score:
                    best_score = cur_best_score
                    best_mask = cur_best_m
                
                cur = cur.parent
                    
            # print(f'leaf: {leaf.best_canny} -> best_on_branch: {best_score}')
            bests_lst.append(best_mask)
            
        best_masks = np.stack(bests_lst, axis=-1)
        or_mask = np.logical_or.reduce(best_masks,axis=-1)
        
        return or_mask.astype(np.float64)
        
    def update(self, new_th, new_mask,show=False):
        n = len(self.leaves)
        new_leaves = []
        
        #for leaf in tqdm(self.leaves):
        for leaf in self.leaves:
            leaf.update(new_th, new_mask)
            if leaf.children is not None:
                self.leaves.remove(leaf)
                new_leaves.extend(leaf.children)
                
        self.leaves.extend(new_leaves)               
        
        if show:
            tmp_len = len(self.leaves)
            fig, ax = plt.subplots(nrows=1, ncols=tmp_len, figsize=(6, 3))
            if tmp_len == 1:
                ax.imshow(self.leaves[0].mask)
            else:
                for i, leaf in enumerate(self.leaves):
                    ax[i].imshow(leaf.mask)
    

In [7]:
# images_dir = "../../datasets/segmentation/ManualAnnotation"
root_dir = "../../datasets/segmentation/ManualAnnotation"
# root_dir = "../../datasets/segmentation/All/train"
# root_dir = "../../datasets/segmentation/All/test"
# root_dir = "../../datasets/segmentation/test_GT"
images_dir = os.path.join(root_dir,"image")

images = sorted(glob.glob(os.path.join(images_dir, '**/*.FTS'),recursive=True))

print(len(images))

36


In [8]:
msks = sorted(glob.glob(os.path.join(images_dir, '**/*.FTS'),recursive=True))
for m in tqdm(msks[:]):
    cur = imread(m)
#     print(cur.shape)
    assert cur.shape == (2048,2048)

  0%|          | 0/36 [00:00<?, ?it/s]

In [9]:
st = 25
# tophat_thresh= range(250, 500 + st, st)
tophat_thresh = [
    525,
#     500,
#     475,
#     450,
#     425,
#     400,
#     375,
#     350,
#     325,
#     300,
#     275,
#     250,
    
]

context_width = 5
tophat_radius = 40
padding=5

methods = ['MIN']

num_cpu = multiprocessing.cpu_count()
num_cpu = 16

for method in methods:

#         executor = concurrent.futures.ProcessPoolExecutor(max_workers=int(np.floor(0.9*num_cpu)))
        with concurrent.futures.ProcessPoolExecutor(max_workers=int(np.floor(0.9*num_cpu))) as executor:
            to_process = images[:]
#             to_process = images[964:]
            # to_process = images[225:230]
#             to_process = images[230:240]
#             to_process = images[:1]
            #to_process = images[3882:]
        #     to_process = images[3883:]

            start = time.time()
            for i in tqdm(executor.map(apply, to_process, repeat(root_dir), repeat(method),
                                       repeat(tophat_thresh), repeat(padding), repeat(tophat_radius))):
                    pass
            end = time.time()
            print(f"Total elapsed time {str(timedelta(seconds=end - start))}")


0it [00:00, ?it/s]

Total elapsed time 0:00:42.832532


In [10]:
# tophat_thresh=400
# context_width = 5
# tophat_radius = 40
# padding=5
# dest_dir = os.path.join(root_dir,f'T{tophat_thresh}_canny')
# # def apply(img, dest_dir, method,tophat_thresh=400,context_width = 5,tophat_radius = 40):

# #for im in tqdm(images[10:11]):
# #for im in tqdm(images[11:12]):
# #for im in tqdm(images[16:17]):
# #for im in tqdm(images[26:27]):
# #or im in tqdm(images[29:30]):
# #for im in tqdm(images[30:31]):
# for im in tqdm(images[:]):
#     apply(im, dest_dir, method="MAX")

# Convert Backgound/Umbra/Penubra masks to Background/foreground

In [13]:
import os
import glob
import skimage.io as io

# ignore warnings
import warnings
warnings.filterwarnings("ignore")

from tqdm.notebook import tqdm
import concurrent.futures
import numpy as np

from itertools import repeat
import multiprocessing

tophat_thresh = [
    525,
    500,
    475,
    450,
    425,
    400,
    375,
    350,
    325,
    300,
    275,
    250,
]


num_cpu = multiprocessing.cpu_count()

root_output_dir = "../../datasets/segmentation/ManualAnnotation"
# root_output_dir = "../../datasets/segmentation/All/test"
# root_output_dir = "../../datasets/segmentation/All/train"

def to_fg_bg(m, cur_out_dir_fgbg):
    cur_bn = os.path.basename(m).split('.')[0]
    cur_fgbg = os.path.join(cur_out_dir_fgbg, cur_bn + '.png')
    if not os.path.exists(cur_fgbg):
        im = io.imread(m)
        im[im>0] = 1
        io.imsave(cur_fgbg, im)

for t in tophat_thresh:
    cur_out_dir = os.path.join(root_output_dir, f'2023_T{t}')
    print(cur_out_dir)
    cur_out_dir_fgbg = cur_out_dir + '_fgbg'
    print(cur_out_dir_fgbg)
    
    if not os.path.exists(cur_out_dir_fgbg):
        os.mkdir(cur_out_dir_fgbg)

    msks = sorted(glob.glob(os.path.join(cur_out_dir, '**/*.png'),recursive=True))
    
    with concurrent.futures.ProcessPoolExecutor(max_workers=int(np.floor(0.9*num_cpu))) as executor:
        for i in tqdm(executor.map(to_fg_bg, msks, repeat(cur_out_dir_fgbg))):
            pass
    

../../datasets/segmentation/ManualAnnotation/2023_T525
../../datasets/segmentation/ManualAnnotation/2023_T525_fgbg


0it [00:00, ?it/s]

../../datasets/segmentation/ManualAnnotation/2023_T500
../../datasets/segmentation/ManualAnnotation/2023_T500_fgbg


0it [00:00, ?it/s]

../../datasets/segmentation/ManualAnnotation/2023_T475
../../datasets/segmentation/ManualAnnotation/2023_T475_fgbg


0it [00:00, ?it/s]

../../datasets/segmentation/ManualAnnotation/2023_T450
../../datasets/segmentation/ManualAnnotation/2023_T450_fgbg


0it [00:00, ?it/s]

../../datasets/segmentation/ManualAnnotation/2023_T425
../../datasets/segmentation/ManualAnnotation/2023_T425_fgbg


0it [00:00, ?it/s]

../../datasets/segmentation/ManualAnnotation/2023_T400
../../datasets/segmentation/ManualAnnotation/2023_T400_fgbg


0it [00:00, ?it/s]

../../datasets/segmentation/ManualAnnotation/2023_T375
../../datasets/segmentation/ManualAnnotation/2023_T375_fgbg


0it [00:00, ?it/s]

../../datasets/segmentation/ManualAnnotation/2023_T350
../../datasets/segmentation/ManualAnnotation/2023_T350_fgbg


0it [00:00, ?it/s]

../../datasets/segmentation/ManualAnnotation/2023_T325
../../datasets/segmentation/ManualAnnotation/2023_T325_fgbg


0it [00:00, ?it/s]

../../datasets/segmentation/ManualAnnotation/2023_T300
../../datasets/segmentation/ManualAnnotation/2023_T300_fgbg


0it [00:00, ?it/s]

../../datasets/segmentation/ManualAnnotation/2023_T275
../../datasets/segmentation/ManualAnnotation/2023_T275_fgbg


0it [00:00, ?it/s]

../../datasets/segmentation/ManualAnnotation/2023_T250
../../datasets/segmentation/ManualAnnotation/2023_T250_fgbg


0it [00:00, ?it/s]

# Visualize the generated Masks 

In [32]:
import ipywidgets as widgets

from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap, Normalize

#dataset_root = "../../datasets/segmentation/All/test_GT"
# dataset_root = "../../datasets/segmentation/All/train"
# dataset_root = "../../datasets/segmentation/All/test"
dataset_root = "../../datasets/segmentation/ManualAnnotation"

st = 25

# tophat_thresh= range(250, 500 + st, st)
tophat_thresh = range(300, 525 + st, st)
seg_types = [ f'2023_T{t}_fgbg' for t in tophat_thresh ]

print(seg_types)


image_folder     = os.path.join(dataset_root, 'image')
gt_folder        = os.path.join(dataset_root, 'GroundTruth')
generated_folders = {t : os.path.join(dataset_root, t) for t in seg_types}

image_lst = sorted(glob.glob(os.path.join(image_folder, '**/*.FTS'), recursive=True))
print(len(image_lst))

generated_lsts = {t : sorted(glob.glob(os.path.join(generated_folders[t], '*.png'))) for t in seg_types}
gt_lst = sorted(glob.glob(os.path.join(gt_folder, '*.png')))

print(generated_lsts.keys())

max_idx = len(image_lst)

cmap_gen = cm.turbo
cmap_gen = cmap_gen(range(255))
cmap_gen = ListedColormap([(0, 0, 0, 0), *cmap_gen])

cmap_gt = cm.autumn
cmap_gt = cmap_gt(range(255))
cmap_gt = ListedColormap([(0, 0, 0, 0), *cmap_gt])


def refresh(slider): 
    xlims0 = axes0[0].get_xlim()
    ylims0 = axes0[0].get_ylim()
    
    axes0[0].clear()
    
    test_img = imread(image_lst[idx_slider.value])
    
    axes0[0].set_title(os.path.basename(image_lst[idx_slider.value]))
    
    if img_cb.value:
        axes0[0].imshow(test_img, cmap="gray", interpolation="None")
#         axes0[0].invert_yaxis()
        axes0[0].get_xlim()[::-1]
    if gt_cb.value:
        gt_label = imread(gt_lst[idx_slider.value])
        axes0[0].imshow(gt_label, cmap=cmap_gt, interpolation="None", alpha=.5)
    for i, cb in enumerate(gen_cbs):
        if cb.value:
            tmp = imread(generated_lsts[seg_types[i]][idx_slider.value])
            cs = axes0[0].imshow(tmp, cmap=cmap_gen, interpolation="None", alpha=.7)
        
    if xlims0 != (0.0, 1.0):
        axes0[0].set_xlim(xlims0)
        axes0[0].set_ylim(ylims0)
        
    
    return

max_rows = 1
max_cols = 1

plt.ioff()
plt.style.use('default')
fig_widget0, axes0 = plt.subplots(nrows=max_rows, ncols=max_cols, figsize=(8,5))
try:
    len(axes0)
except TypeError:
    axes0 = [axes0]

plt.ion()
img_cb = widgets.Checkbox(value=True, description='img')
gt_cb = widgets.Checkbox(value=False, description='gt')
gen_cbs = [widgets.Checkbox(value=False, description=f'{t.replace("COMPARE_LTH_","")}') for t in seg_types]
idx_slider = widgets.IntSlider(value=0, min=0, max=max_idx-1, step=1, description="Image Index")


# Input image to predict
test_img = imread(image_lst[0])
#prediction
gen_label = imread(generated_lsts[seg_types[0]][0])

print(gen_cbs)
print(test_img.dtype)
print(gen_label.dtype)

axes0[0].set_title(os.path.basename(image_lst[0]))
axes0[0].imshow(test_img, cmap="gray", interpolation="None")


img_cb.observe(refresh, names='value')
gt_cb.observe(refresh, names='value')
for cb in gen_cbs:
    cb.observe(refresh, names='value')
idx_slider.observe(refresh, names='value')

h_len = 4

vbox = widgets.VBox([ widgets.HBox(gen_cbs[i*h_len:(i+1)*h_len]) for i in range(ceil(len(seg_types)/h_len)) ])

widgets.VBox([widgets.HBox([idx_slider,gt_cb]), vbox, fig_widget0.canvas])

['2023_T300_fgbg', '2023_T325_fgbg', '2023_T350_fgbg', '2023_T375_fgbg', '2023_T400_fgbg', '2023_T425_fgbg', '2023_T450_fgbg', '2023_T475_fgbg', '2023_T500_fgbg', '2023_T525_fgbg']
36
dict_keys(['2023_T300_fgbg', '2023_T325_fgbg', '2023_T350_fgbg', '2023_T375_fgbg', '2023_T400_fgbg', '2023_T425_fgbg', '2023_T450_fgbg', '2023_T475_fgbg', '2023_T500_fgbg', '2023_T525_fgbg'])
[Checkbox(value=False, description='2023_T300_fgbg'), Checkbox(value=False, description='2023_T325_fgbg'), Checkbox(value=False, description='2023_T350_fgbg'), Checkbox(value=False, description='2023_T375_fgbg'), Checkbox(value=False, description='2023_T400_fgbg'), Checkbox(value=False, description='2023_T425_fgbg'), Checkbox(value=False, description='2023_T450_fgbg'), Checkbox(value=False, description='2023_T475_fgbg'), Checkbox(value=False, description='2023_T500_fgbg'), Checkbox(value=False, description='2023_T525_fgbg')]
uint16
uint8


VBox(children=(HBox(children=(IntSlider(value=0, description='Image Index', max=35), Checkbox(value=False, des…

# Verify that masks are correctly created

In [39]:
tophat_thresh = [
    525,
#     500,
#     475,
#     450,
#     425,
#     400,
#     375,
#     350,
#     325,
#     300,
]


# root_dir = "../../datasets/segmentation/All/train"
root_output_dir = "../../datasets/segmentation/All/test"
# root_output_dir = "../../datasets/segmentation/All/train"

erroneous_masks = {t:[] for t in tophat_thresh}
for t in tophat_thresh:
    cur_out_dir = os.path.join(root_output_dir, f'2023_T{t}_fgbg')
    print(cur_out_dir)
    msks = sorted(glob.glob(os.path.join(cur_out_dir, '**/*.png'),recursive=True))
    
    print(len(msks))
    for i,m in tqdm(enumerate(msks[200:220])):
#     for i,m in tqdm(enumerate(msks[:])):
        try:
            cur = imread(m)
        #     print(cur.shape)
            assert cur.shape == (2048,2048)
        except:
            erroneous_masks[t].append(i)

/globalscratch/users/n/s/nsayez/deepsun_bioblue/All/test/2023_T500_fgbg
1199


0it [00:00, ?it/s]

Format of next cell: threshold, num_errors -> errors indices

Ok if 0

In [26]:
for k,v in erroneous_masks.items():
    print(k, ' ', len(v),'->',v)

500   0 -> []
475   0 -> []
450   0 -> []
425   0 -> []
400   0 -> []
375   0 -> []
350   0 -> []
325   0 -> []
300   0 -> []


# Playground

In [54]:
to_remove = []
for name in images[3877:3890]:
    to_remove.append(os.path.basename(name).split('.')[0])
    print(to_remove[-1])
    
print(to_remove)

UPH20150716131500
UPH20150716141018
UPH20150716141019
UPH20150716141021
UPH20150716141022
UPH20150716141023
UPH20150716141024
UPH20150716141025
UPH20150716141500
UPH20150716141525
UPH20150716143000
UPH20150716144500
UPH20150716144904
['UPH20150716131500', 'UPH20150716141018', 'UPH20150716141019', 'UPH20150716141021', 'UPH20150716141022', 'UPH20150716141023', 'UPH20150716141024', 'UPH20150716141025', 'UPH20150716141500', 'UPH20150716141525', 'UPH20150716143000', 'UPH20150716144500', 'UPH20150716144904']


In [61]:
fits = sorted(glob.glob(os.path.join(dataset_root, 'image','*')))
print(len(fits))

masks_dirs = [  'T300_canny',
                'T350_canny',
                'T400_canny',
                'T450_canny',
             ]
              
for cur_name in fits:
    # print(cur_name)
    cur_bn = os.path.basename(cur_name).split('.')[0]
    # print(cur_bn)
    if cur_bn in to_remove:
        print('delete')
        os.remove(cur_name)
        
    
for th in masks_dirs:
    print(th)
    cur_dir = os.path.join(dataset_root, th)
    
    files = sorted(glob.glob(os.path.join(dataset_root, th,'*')))
    
    for cur_name in files:
        # print(cur_name)
        cur_bn = os.path.basename(cur_name).split('.')[0]
        # print(cur_bn)
        if cur_bn in to_remove:
            print('delete')
            os.remove(cur_name)
            
    print(len(files))


4777
T300_canny
4777
T350_canny
4777
T400_canny
4777
T450_canny
4777
