# Ground truth counting and exporting
This script is what we used to validate our cell counts done in whole or in part in other notebooks (those usually focus just on test data, not training). 

In [None]:
%load_ext autoreload
%autoreload 2

from pathlib import Path
from cellpose import plot, models, core, utils, io, dynamics
import skimage.io
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('dark_background')
import matplotlib as mpl
%matplotlib inline
mpl.rcParams['figure.dpi'] = 300
import time, os, sys
from tifffile import imread
# use_GPU = core.use_gpu()
# print('>>> GPU activated? %d'%use_GPU)

import omnipose

In [None]:
base = '/home/kcutler/DataDrive/omnipose_all'
dataset = ['phase','fluor']
subset = ['train','test']
basedir = [[os.path.join(base,d,s + '_sorted') for s in subset] for d in dataset]

base = '/home/kcutler/DataDrive/omnipose_all/OSF/'
dataset = ['worm']
subset = ['train','test']
basedir = [[os.path.join(base,d,s) for s in subset] for d in dataset]

# base = '/home/kcutler/DataDrive/'
# dataset = ['luca']
# subset = ['train','test']
# basedir = [[os.path.join(base,d,s) for s in subset] for d in dataset]

def getname(path,suffix='_masks'):
    return os.path.splitext(Path(path).name)[0].replace(suffix,'')

def get_GT(basedir):
    mask_filter = '_masks'
    img_names = io.get_image_files(basedir,mask_filter,look_one_level_down=True)
    mask_names = io.get_label_files(img_names, mask_filter)
    #annoyingly, the names are not sorted properly because the paths are all different for the test folder...
    mask_names = sorted(mask_names,key=getname)
    img_names = sorted(img_names,key=getname)
    imgs = [skimage.io.imread(f) for f in img_names]
    # masks_gt = [utils.format_labels(utils.clean_boundary(skimage.io.imread(f))) for f in mask_names]
    masks_gt = [omnipose.utils.format_labels(omnipose.utils.clean_boundary(skimage.io.imread(f)),clean=True) for f in mask_names]
    return imgs, masks_gt, img_names, mask_names

J = len(dataset)
K = len(subset)
imgs = [[[] for k in range(K)] for j in range(J)]
masks_gt = [[[] for k in range(K)] for j in range(J)]
img_names = [[[] for k in range(K)] for j in range(J)]
mask_names = [[[] for k in range(K)] for j in range(J)]
for j in range(J):
    for k in range(K):
        print((basedir[j][k]))
        imgs[j][k], masks_gt[j][k], img_names[j][k], mask_names[j][k] = get_GT(basedir[j][k])
# train_imgs, train_masks, train_img_names, train_mask_names = get_GT(train_dir)
# test_imgs, test_masks, test_img_names, train_mask_names = get_GT(train_dir)

### Count images and cells in dataset


In [None]:
# Count images
import fastremap, ncolor
from matplotlib.colors import ListedColormap
from omnipose import utils
from cellpose import plot
cmap = ListedColormap([color for color in utils.sinebow(5).values()])

imagecount = np.zeros([J,K],int)
cellcount = np.zeros([J,K],int)
show = False
for j in range(J):
    for k in range(K):
        c = len(imgs[j][k])
        imagecount[j,k] += c

        for maski,imgi,name in zip(masks_gt[j][k],imgs[j][k],img_names[j][k]):
            # print(name)
            c =  len(fastremap.unique(maski))
            cellcount[j,k] += c
            if maski.shape!=imgi.shape:
                print(name,'has mask mismatch')
            if show:
                fig = plt.figure(figsize=(10,10))
                p = utils.normalize99(imgi)
                img0 = np.stack((p,p,p),axis=2)
                outli = plot.outline_view(img0,maski)

                pic = np.hstack((img0,utils.rescale(outli),cmap(ncolor.label(maski))[:,:,:3]))
                plt.imshow(pic)
                plt.axis('off')
                plt.show()

    print(np.sum(imagecount[j]),np.sum(cellcount[j]))

In [None]:
basedir = '/home/kcutler/DataDrive/omnipose_all/'



for j in range(J):
    text = []
    text += ['{} dataset\n'.format(dataset[j])]
    for k in range(K):
        text+=[subset[k]+':',
               '\t image count: {}'.format(imagecount[j,k]),
               '\t cell count: {}'.format(cellcount[j,k])]
    text+=['total:',
           '\t image count: {}'.format(np.sum(imagecount[j])),
           '\t cell count: {}'.format(np.sum(cellcount[j])),'\n']
    
    with open(os.path.join(basedir,'{}_datatet_stats.txt'.format(dataset[j])), "w") as text_file:
        [print(t,file=f) for t in text for f in [None,text_file]]
    

In [None]:
J

## Check to make sure that there is no duplication between test and train datasets

In [None]:
for j in range(J):
    # A = ['a','b','c']
    # B = ['c']
    A = img_names[j][0] #assuming two categories 
    B = img_names[j][1]
    print([x for x in A if x in B])

### Export images, masks, and thumbnails

In [None]:
from PIL import Image
import tifffile
from omnipose.utils import rescale, format_labels
offset = [0,len(imgs[0])]
# cmap2 = mpl.cm.get_cmap('gray')
# cmap2 = mpl.cm.get_cmap('magma')

clean = 1
save_ncolor = 0
save_color = 0
display = 0
mrcnn = 0 # export to mrcnn folder
matlab = 0 # export for morphometrics and supersegger processing

# get all the jpg files from the current folder
for j in range(J):
# for j in [1]:
    for k in range(K):
        # for f,im in zip(img_names[k],imgs[k]):
        for i,im,mask in zip(range(len(imgs[j][k])),imgs[j][k],masks_gt[j][k]):
            # name = 'img'+str(i+offset[k])
            # name = '%03d' % (i+offset[k],) + '_img'
            name = '%03d' % (i,) 
            if mask.shape!=im.shape:
                print(name,'has mask mismatch')
            if clean:
                #save a copy with new naming convention        
                savedir = basedir[j][k]+'_website'
                io.check_dir(savedir)
                io.imsave(os.path.join(savedir,name+'_img.tif'),im)  
                io.imsave(os.path.join(savedir,name+'_masks.tif'),mask)    

                # convert to thumbnail image
                im = (omnipose.utils.normalize99(im)*255).astype(np.uint8)
                im = Image.fromarray(im)
                im.thumbnail((128, 128), Image.ANTIALIAS)
                savedir = basedir[j][k]+'_website_thumbnails'
                io.check_dir(savedir)
                # im.save(os.path.join(savedir,getname(f)+'_thumbnail.tif'))
                im.save(os.path.join(savedir,name+'_thumbnail.tif'))

            #Also save an ncolor version
            if save_ncolor:
                pic = cmap(ncolor.label(mask))
                # pic[:,:,-1] = mask>0 #transparency, not needed
                savedir = basedir[j][k]+'_ncolor'
                io.check_dir(savedir)
                skimage.io.imsave(os.path.join(savedir,name+'_ncolor_masks.tif'),(pic*(2**8-1)).astype(np.uint8))

            # can also save rescaled version of label matrix
            if save_color:
                cmap2 = ListedColormap([color for color in utils.sinebow(mask.max()).values()])
                pic = cmap2(omnipose.utils.rescale(mask))
                pic[:,:,-1] = mask>0 #transparency
                savedir = basedir[j][k]+'_rescaled_sinebow'
                io.check_dir(savedir)
                skimage.io.imsave(os.path.join(savedir,name+'_masks_rescaled.tif'),(pic*(2**8-1)).astype(np.uint8))

            if display:
                fig = plt.figure(figsize=[4]*2)
                plt.imshow(np.hstack((utils.rescale(im),ncolor.label(mask))))
                plt.axis('off')
                plt.show()

            if j==0: # only meant for phase 
                if mrcnn:
                    name = getname(mask_names[j][k][i]) # use original names for this export 
                    savedir = os.path.join('/home/kcutler/DataDrive/omnipose_all/RGB_for_mrcnn',subset[k])
                    io.check_dir(savedir)
                    # io.imsave(os.path.join(savedir,name+'_img.tif'),np.stack([im]*3))  
                    # io.imsave(os.path.join(savedir,name+'_masks.tif'),mask)    

                    mgt = omnipose.utils.format_labels(mask,clean=True)
                    tifffile.imsave(os.path.join(savedir,name+'_masks.tif'),mgt.astype('int64'))

                    img = np.repeat((rescale(im)*(2**16-1)).astype('uint16')[:,:,np.newaxis],repeats=3,axis=-1) # must use rgb images
                    tifffile.imsave(os.path.join(savedir,name+'.tif'),img) 


                if matlab and subset[k]=='test':
                    name = getname(mask_names[j][k][i]) # use original names for this export 
                    savedir = os.path.join('/home/kcutler/DataDrive/omnipose_all/matlab')
                    io.check_dir(savedir)     
                    img = (rescale(im)*(2**16-1)).astype('uint16')
                    tifffile.imsave(os.path.join(savedir,name+'.tif'),img) 

In [None]:
(np.stack([im]*3)).shape

In [None]:
getname(mask_names[0])

In [None]:
mask_names[0][0]

In [None]:
t = io.imread('/home/kcutler/DataDrive/omnipose_all/morphometrics_eval/5I_crop_ensemble_1_Gparent.tif')

In [None]:
plt.imshow(t)

In [None]:
basedir

In [None]:
import pyqtgraph as pg
pg.__version__

In [None]:
import mahotas

In [None]:
mahotas.__version__

In [None]:
import numpy

In [None]:
numpy.__version__