In [None]:
import sys
from pathlib import Path
home = str(Path.home())
sys.path.append(home+'/ccount/ccount/workflow/scripts')
sys.path.append(home+'/ccount/ccount/workflow/scripts/ccount')
from os.path import exists

from ccount.blob.io import load_locs, save_crops, load_crops
from ccount.blob.misc import crops_stat
from ccount.clas.pca_tsne import pca_tsne

from ccount.img.read_czi import read_czi, parse_image_arrays
from ccount.blob.crop_blobs import crop_blobs
from ccount.blob.plot import plot_flat_crop, plot_flat_crops, pop_label_flat_crops, show_rand_crops

import numpy as np
import pandas as pd
import subprocess

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# show file names of all blobs that needs to be labeled
%ls /mnt/d/ccount/dev2021/training_data/E2f4_CFUe_2OCT20/res/filter_merge/*npy*
%ls /mnt/d/ccount/dev2021/training_data/E2f4_CFUe_2OCT20/data/*czi

# User Parameters

In [None]:
# pos/clas.npy.gz, blob_crops/crops.npy.gz, or blob_locs/locs.npy.gz
name = 'E2f4_CFUe_KO_1-Stitching-01.0'  # core name of npy.gz file, which is used to auto generate loc/czi names, and output names
czi_name = 'E2f4_CFUe_KO_1-Stitching-01'  # without .0 .1 .2 .3
image_index = 0  # 0 if loc_file = 'xxx.0.xxx.npy.gz', 1 if 'xxx.1.xxx.npy.gz'

loc_file = '/mnt/d/ccount/dev2021/training_data/E2f4_CFUe_2OCT20/res/filter_merge/'+name+'.flt.npy.gz'  
print(exists(loc_file))

# corresponding czi file (must match loc_file)
czi_file = '/mnt/d/ccount/dev2021/training_data/E2f4_CFUe_2OCT20/data/' + czi_name + '.czi'  
print(exists(czi_file))

## output file name, please don't over-write important files!!!
out_name = name + '.labeled.npy.gz' # file name to be saved in, will over-write existing files
print('out fname:', out_name)

# Read Data

In [None]:
locs = load_locs(loc_file)  # contains x,y,z in the first 3 columns
locs = locs[0:500, :]
print('only top 500 blobs kept', locs.shape)

In [None]:
czi = read_czi(czi_file)  # image array of 4 scanned areas
image = parse_image_arrays(czi, i=image_index)  # one of the scanned areas, takes 30s to load
czi=[] # release RAM
# np.save("image.npy", image)  # save temp file for quick reload
# image = np.load('image.npy')

# View Data
Tip: you can run this many many times to pre-view blobs, and generate your baseline of classification, before you start labeling

In [None]:
i = np.random.choice(len(locs))
print('showing blob {}'.format(i))

crops = crop_blobs(locs[i:i+1], image, crop_width=400)  # User can change crop_width, 500 for overview
p = plot_flat_crop(crops[0, :],  image_scale=1)

## Labeling


In [None]:
crops = crop_blobs(locs, image, crop_width=200) # can take 30s

In [None]:
crops = pop_label_flat_crops(crops, random = True, seed = 1, skip_labels = [0, 1])

## Save

In [None]:
# decide output name and save
locs[:, 3] = crops[:, 3]
s = input ("would you like to save/over-write {} ? (y/n)".format(out_name))
if s == 'y':
    save_crops(locs, out_name)
else:
    print('exit without saving')

# Review Labeled DB (Optional)

In [None]:
labeled = load_crops(out_name)

In [None]:
## labeled as YES
_ = show_rand_crops(crops=labeled, label_filter=1, num_shown=3)

In [None]:
## labeled as NO
_ = show_rand_crops(crops=labeled, label_filter=0, num_shown=2)

In [None]:
# any label (Optional)
_ = show_rand_crops(crops=labeled, label_filter='na', num_shown=3)

## Unsupervised classification (overview of blobs) (Optional)

In [None]:
crops_stat(crops)

In [None]:
cluster_info = pd.DataFrame(crops[:, 3], columns=['cluster_id'])
tsne_df = pca_tsne(pd.DataFrame(crops), cluster_info=cluster_info)

# Save notebook for review

In [None]:
%%javascript
IPython.notebook.save_notebook()

In [None]:
!jupyter nbconvert --to html labeling.ipynb

In [None]:
nb_name = name + '.labeling.ipynb'
html_name = name + ".labeling.html"
!echo $html_name $nb_name
!cp labeling.ipynb $nb_name
!mv labeling.html $html_name