<a href="https://colab.research.google.com/github/samuramirez/cellmigration/blob/master/segmentation_1_class_cell_indiv_tiff.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Segmentation one class (cell)

### Mount Google Drive (Colab can see Drive files) and authenticate so that we can interact with GCP via SDK

In [1]:
try:
  from google.cloud import storage
  client = storage.Client();
except Exception as e:
  print(e)
  print("error: no credentials, ignoring");

try:
  from google.colab import auth
  #This allows SDK to see and edit Google Drive files
  #SDK is required to interact with GCP
  auth.authenticate_user()
except Exception as e:
  print(e);
  print("error: no colab credentials, ignoring");


### Installing and loading packages 

In [2]:
#Install packages (skippable for speed)
!pip install torch==1.8.0+cu111 torchvision==0.9.0+cu111 torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.htmlimport torch
!pip install fastcore --upgrade
!pip install fastai --upgrade
!pip install imagecodecs --upgrade
!pip install scikit-image --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://download.pytorch.org/whl/torch_stable.htmlimport
[31mERROR: Could not find a version that satisfies the requirement torch==1.8.0+cu111 (from versions: 1.4.0, 1.5.0, 1.5.1, 1.6.0, 1.7.0, 1.7.1, 1.8.0, 1.8.1, 1.9.0, 1.9.1, 1.10.0, 1.10.1, 1.10.2, 1.11.0, 1.12.0, 1.12.1, 1.13.0, 1.13.1)[0m[31m
[0m[31mERROR: No matching distribution found for torch==1.8.0+cu111[0m[31m
[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
#Check for cuda
!nvidia-smi

Wed Feb  1 18:32:47 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P0    23W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
import torch
import torchvision
import fastai
from fastai.vision.all import *
from skimage.io import imread, imsave, imshow
from skimage.exposure import rescale_intensity
import os
import numpy as np
from pathlib import Path
import re
import ntpath
from datetime import datetime
from fastprogress.core import format_time
from IPython.utils.io import capture_output
import logging

  from .collection import imread_collection_wrapper


In [5]:
print("cuda available:", torch.cuda.is_available());

cuda available: True


# Segmentation Parameters


## Basic Parameters
other shared settings between training and separation

In [6]:
#@markdown What object type you are segmenting
segmentation_target = "Separate - Cell + Nucleus models (two output folders)" #@param ["Nucleus", "Cell", "Separate - Cell + Nucleus models (two output folders)", "Combined - Cell + Nucleus models", "Separate - Single Model (two output folders)", "Combined - Single Model","Custom Models"]


#@markdown local folder where all images and masks to be transferred to and from the GCP will be stored
GCP_transfer_folder = "gcp_transfer" #@param {type:"string"}
GCP_transfer_folder = Path(GCP_transfer_folder);
if not os.path.exists(GCP_transfer_folder):
  os.makedirs(GCP_transfer_folder);

gsutil_dest_folder = GCP_transfer_folder;

#@markdown regex used to decipher filenames (Leave the same for metamorph outputs)
filename_regex = "p[0-9]*_s([0-9]+)_t([0-9]+).*\\.(tif|tiff|TIF|TIFF)" #@param {type:"string"}

#@markdown whether the notebook is being run in a location with persistent file storage (local or GCE VM), as opposed to something like google colab. Will be set to False automatically if google colab is detected.
persistent_files = True #@param {type:"boolean"}
try:
    from colab import drive
    persistent_files = False;
except Exception:
    pass;

#@markdown will put checkpoint outputs into a log folder logs/
process_logging = True #@param {type:"boolean"}
if process_logging:
    print("activating process logging");
    import logging
    log_folder = Path("logs");
    if not os.path.exists(log_folder): os.mkdir(log_folder);
    foldername = "log_" + str(datetime.now()) + ".txt";
    log_filepath = log_folder/foldername;
    logging.basicConfig(filename=log_filepath, level=logging.DEBUG,force=True);
    # del log_folder;

activating process logging


## Cloud Storage Parameters
Filenames and parameters for using google cloud storage

In [7]:
#@markdown The name of the bucket to use (exclude the gs://)
bucket = Path("optotaxisbucket") #@param {type:"string"}

# #@markdown the name of the high level directory in the bucket to which movie segmentation files wil be uploaded
# GCP_parent_dir = "movie_segmentation" #@param {type:"string"}
# GCP_parent_dir = Path(bucket)/GCP_parent_dir
# #NOTE: whenever gsutil is used with GCP_parent_dir, you must add gs://{directory}

#@markdown the path to the folder in the bucket (exclude the bucket name) where models are stored and will be exported to after training
modelsfolder = 'models/fastai' #@param {type:"string"}
gcp_modelsfolder = bucket/modelsfolder;
local_modelsfolder = GCP_transfer_folder/os.path.basename(modelsfolder);
del modelsfolder; #for testing purposes to make sure no old code is using this

## Image Preparation
shared settings between training and segmentation image/mask processing

In [8]:
#@markdown Number of slices (columns/rows) to divide input images into; for the math, pleast check https://www.desmos.com/calculator/t3cyflvlef
x_slices = 5 #@param {type:"integer"}
y_slices =  5 #@param {type:"integer"}
#@markdown dx, dy are the extra context around the segmented center in both directions
dx =  42#@param {type:"integer"}
dy =  32#@param {type:"integer"}
#@markdown x and y crop are how much to straight remove from the image to make the sizes able to be subdivided nicely
x_crop = 0 #@param {type:"integer"}
y_crop = 0 #@param {type:"integer"}

context_bounds = [dy,dx]*2 #assuming x and y symmetrical, not always true -- fix?
crop = [y_crop,x_crop]*2 #both of these are negative y, negative x, positive y, positive x

#@markdown whether to rescale images to their minimum and maximum pixel values; not used if external processing
auto_rescale = True; #@param {type:"bool"}

## Training-Specific Settings
parameters and paths only for training

In [9]:
#@markdown path to the input images within the GCP bucket (exclude the bucket name)
training_images="training_images" #@param {type:"string"}
gcp_training_images=bucket/training_images;
local_training_images = GCP_transfer_folder/training_images;
del training_images; #for testing purposes to make sure no old code is using this

#@markdown path to the input masks within the GCP bucket (exclude the bucket name)
training_masks="/content/gdrive/Othercomputers/My PC/segmentation_iteration_testing/processing/training_masks" #@param {type:"string"}
gcp_training_masks=bucket/training_masks
local_training_masks = GCP_transfer_folder/training_masks;
del training_masks; #for testing purposes to make sure no old code is using this

#@markdown base model that will be used to train from
inmodelname = 'seg_nuc_062719_s_1_2_110619_bleb.pkl' #@param {type:"string"}
#@markdown where the model will be exported to after training
outmodelname = 'iter3_2_nuc_scratch_test.pkl' #@param {type:"string"}

## Segmentation-Specific Settings:
parameters and paths only for segmenting an experiment


In [10]:
experiment = "2023.1.26 OptoITSN Test 41" #@param {type: "string"}

#@markdown path to the input images in the GCP bucket (exclude the bucket name)
segmentation_images=f"movies/{experiment}/{experiment}"  #@param {type:"raw"}
gcp_segmentation_images=bucket/segmentation_images;
local_segmentation_images = GCP_transfer_folder/Path(segmentation_images).name;
del segmentation_images; #for testing purposes to make sure no old code is using this

#@markdown folder name where output masks will be deposited in the GCP bucket gcp directory. If segmentation target is "Cell", "Nucleus", or either of the "Separate" options, this will output to the subfolders "Nucleus" and "Cell" respectively. If target is "Combined", will output to the "Combined" subfolder
segmentation_output_masks=f'movie_segmentation/{experiment}/segmentation_output_masks/' #@param {type:"raw"}
gcp_segmentation_output_masks=bucket/segmentation_output_masks;
local_segmentation_output_masks=GCP_transfer_folder/Path(segmentation_output_masks).name;
del segmentation_output_masks; #for testing purposes to make sure no old code is using this

print("out file:",gcp_segmentation_output_masks)

#@markdown whether to send output folders to (individual) zip files; if persistent_files is false, this will incur a cost of downloading any nonpersistent files from the cloud and zipping locally
zip_output = True; #@param {type:"boolean"}

#@markdown whether splitting and stitching is already done or should be done by the program
external_splitting_stitching = False; #@param {type:"boolean"}

#@markdown name/path in the gcp models folder to the nucleus model for segmentation. This field will not be used if segmentation target is Cell or * - Single Model
nucleus_model = "iter3_4_nuc_continue.pkl" #@param {type:"string"}
nucleus_model = local_modelsfolder/nucleus_model;

#@markdown name/path in the gcp models folder to the cell model for segmentation. This field will not be used if segmentation target is Nucleus or * - Single Model
cell_model = "iter1_9_continue.pkl" #@param {type:"string"}
cell_model = local_modelsfolder/cell_model;

#@markdown name/path in the gcp models folder to the combined nucleus + cell model for segmentation. This field will not be used if segmentation target is Cell, Nucleus, or Separate - Cell + Nucleus models
combined_model = "" #@param {type:"string"}
combined_model = local_modelsfolder/combined_model;

#@markdown dict of model_name:output_folder_name items to be used if segmentation target is Custom Models. Input files will be processed once per model, and the outputs will be saved to the appropriate output file.
#@markdown Advanced usage: if either model_name or output_folder_name is a list/tuple, the segmentation outputs will be combined into one file or split into separate files, respectively
custom_models = {"iter1_1_seg_redo.pkl":"Iter1","iter1_2_seg_redo.pkl":"Iter2","iter1_3_seg_redo.pkl":"Iter3","iter1_4_seg_redo.pkl":"Iter4","iter1_6_context_4_continue.pkl":"Iter6","iter1_8_continue.pkl":"Iter8"} #@param {type:"raw"}
relative_custom_models = {};
for key,val in custom_models.items():
    if isinstance(key,str):
        relative_custom_models[local_modelsfolder/key] = val;
    else:
        relative_custom_models[(local_modelsfolder/k for k in key)] = val;
custom_models = relative_custom_models;
# print(custom_models)

used_models = {};
ts = ["Nucleus", "Cell", "Separate - Cell + Nucleus models (two output folders)", "Combined - Cell + Nucleus models", "Separate - Single Model (two output folders)", "Combined - Single Model","Custom Models"]
ms = [{nucleus_model:"Nucleus"},{cell_model:"Cell"},
      {nucleus_model:"Nucleus",cell_model:"Cell"},
      {(nucleus_model,cell_model):"Combined"},
      {combined_model:("Nucleus","Cell")},
      {combined_model:"Combined"},
       custom_models]; 
target_model_map = {x:y for x,y in zip(ts,ms)};


print("using models:",list(target_model_map[segmentation_target].keys()));

out file: optotaxisbucket/movie_segmentation/2023.1.26 OptoITSN Test 41/segmentation_output_masks
using models: [Path('gcp_transfer/fastai/iter3_4_nuc_continue.pkl'), Path('gcp_transfer/fastai/iter1_9_continue.pkl')]


## Helper Functions

In [11]:
def get_image_files_recursively(path):
    path = Path(path);
    out = get_image_files(path);
    subfiles = os.listdir(path);
    for f in subfiles:
        if (os.path.isdir(f)):
            out += get_image_files_recursively(path/f);
    return out;
            
def on_rm_error( func, path, exc_info):
    # path contains the path of the file that couldn't be removed
    # let's just assume that it's read-only and unlink it.
    os.chmod( path, stat.S_IWRITE )
    # os.unlink( path )

def cleardir(dir): #clears all files in dir without deleting dir
  for f in os.scandir(dir):
    # f = os.path.join(dir,f)
    if os.path.isdir(f): shutil.rmtree(f,onerror=on_rm_error); #just in case
    else: os.remove(f);    

# Segment Experiment
Will process all images in the segmentation input folder and continuously output segmented masks into the segmentation output masks folder in GCP.

### Pull GCP folders to local environment

In [12]:
print(gcp_modelsfolder)
print(gsutil_dest_folder)
if process_logging: logging.debug("copying remote folders...");

optotaxisbucket/models/fastai
gcp_transfer


In [13]:
#models folder
!gsutil -m cp -r -n "gs://{gcp_modelsfolder}" "{gsutil_dest_folder}"
# print(s)

Skipping existing item: file://gcp_transfer/fastai/4x4_split_test.pkl
Skipping existing item: file://gcp_transfer/fastai/2x2_split_noaug_test.pkl
Skipping existing item: file://gcp_transfer/fastai/5x5_context_split_test.pkl
Skipping existing item: file://gcp_transfer/fastai/iter1_1_seg_redo.pkl
Skipping existing item: file://gcp_transfer/fastai/iter1_3_seg_redo.pkl
Skipping existing item: file://gcp_transfer/fastai/iter3_1_nuc.pkl
Skipping existing item: file://gcp_transfer/fastai/iter3_2_nuc_continue_test.pkl
Skipping existing item: file://gcp_transfer/fastai/iter3_2_nuc_scratch_test.pkl
Skipping existing item: file://gcp_transfer/fastai/iter3_4_nuc_continue.pkl
Skipping existing item: file://gcp_transfer/fastai/iter1_2_seg_redo.pkl
Skipping existing item: file://gcp_transfer/fastai/iter1_6_context_4_continue.pkl
Skipping existing item: file://gcp_transfer/fastai/seg_nuc_062719_s_1_2_110619_bleb.pkl
Skipping existing item: file://gcp_transfer/fastai/seg_cell_062719_s_1_2_110619_bleb.p

In [14]:
# %%capture
#segmentation images
with capture_output() as capture:
    s=!gsutil -m cp -r -n "gs://{gcp_segmentation_images}" "{gsutil_dest_folder}"
if s[0].startswith('CommandException'):
    raise Exception("Error: unable to copy images:\n"+"\n".join(s));
else:
    print("Images successfully copied")
files_changed = False;
stitching_complete = False

Images successfully copied


### Loading a trained model

In [15]:
print("available cpu count:",len(os.sched_getaffinity(0)));

available cpu count: 2


In [16]:
if process_logging: logging.debug("preparing models...");
def label_func(x): return None; ##dummy function to make unpickling work, never used
model_outputs_dict = target_model_map[segmentation_target];
process_instructions = [];
output_dirs = [];
    


for model,outName in model_outputs_dict.items():
    key = [];
    if not isinstance(model,list):
        torchmodel=load_learner(model).model;
        if torch.cuda.is_available():
          torchmodel = torchmodel.to("cuda");
        # torch.save(torchmodel.state_dict(),"temp.pkl");
        # torchmodel = torch.load("temp.pkl");
        key = [torchmodel];
    else:
        for m in model:
            torchmodel=load_learner(model).model;
            if torch.cuda.is_available():
              torchmodel = torchmodel.to("cuda");
            # torch.save(torchmodel.state_dict(),"temp.pkl");
            # torchmodel = torch.load("temp.pkl");
            key.append(torchmodel);
    if not isinstance(outName,list):
        outName = [local_segmentation_output_masks/outName]
        output_dirs += outName;
    else:
        outName = [local_segmentation_output_masks/n for n in outName];
        output_dirs += outName;
    process_instructions.append((key,outName));
    
print("model loaded and instructions created");
        

model loaded and instructions created


In [17]:
process_instructions[0][0][0]

DynamicUnet(
  (layers): ModuleList(
    (0): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05

### Do pre-segmentation file management
Create output folder, find which files are remaining to segment, chunk files into batches for segmentation and upload

In [18]:
if process_logging: logging.debug("preparing filepaths...");
if not os.path.exists(local_segmentation_output_masks):
    os.makedirs(local_segmentation_output_masks,exist_ok=True);
for d in output_dirs:
    if not os.path.exists(d):
        os.makedirs(d,exist_ok=True);
        
model_batchsize = 32
## prepare files

#as far as I can tell through testing, this only matters for ensuring RAM usage stays low
batchSize = 256;
if not external_splitting_stitching:
    batchSize = max(int(batchSize / (x_slices*y_slices) / model_batchsize)*model_batchsize,1);
print("effective batchsize:",batchSize)


files = get_image_files_recursively(local_segmentation_images);

partial_upload = False;

#make destination folders to match input directory tree
for name in files:
    for d in output_dirs:
        os.makedirs(d/(Path(os.path.relpath(name,local_segmentation_images)).parent),exist_ok=True);

completed_masks = [];
for d in progress_bar(output_dirs):
    cmasks = [os.path.relpath(x,d) for x in get_image_files_recursively(d)];
    if not persistent_files or True:
        c = !gsutil ls -r "gs://{gcp_segmentation_output_masks/os.path.basename(d)}"
        if any([x.startswith("CommandException") for x in c]): c = [];
        # print(c)
        t = [os.path.relpath(x,f"gs://{gcp_segmentation_output_masks/os.path.basename(d)}") for x in c if x != '' and not x.endswith((':','.zip','.nd','.flag'))];
        # print(t);
        partial_upload = partial_upload or any([s for s in t if t not in cmasks]);
        cmasks += t;
    completed_masks.append(cmasks);
# print(completed_masks);
print("input files:",len(files))
total_files = len(files);
print("output files:",[len(set(cmasks)) for cmasks in completed_masks])
# print("output files:",[cmasks for cmasks in completed_masks])

files = [fi for fi in files if any([os.path.relpath(fi,local_segmentation_images) not in cmasks for cmasks in completed_masks])];
print("remaining files:",len(files))
files_remaining = len(files);

# print(batchSize);
fileChunks = list(chunked(files, chunk_sz=batchSize));

filestate_read = True;


effective batchsize: 1


input files: 6347
output files: [0, 0]
remaining files: 6347


## Segment Movie

In [19]:
if external_splitting_stitching:
    if process_logging: logging.debug("starting external_splitting segmentation...");
    stitching_complete = False;
    if not filestate_read:
        raise Exception("Error: file output state not updated since last processing, please run the \"pre-segmentation file management\" cell again")
    filestate_read = False
    cpus = len(os.sched_getaffinity(0))
    if files_remaining != 0:
        files_changed = True;
        p = progress_bar(fileChunks,files_remaining//batchSize+1); #makes a progress bar
        for n,chunk in enumerate(p):
            for models,out in process_instructions:
                outputs = [];
                for m in models:
                    res = m(chunk);
                    print(res);
                    raise Exception()
                    if torch.cuda.is_available():
                        dl = m.dls.test_dl(chunk,device=torch.device('cuda'));
                        dl.to('cuda');
                    else:
                        dl = m.dls.test_dl(chunk,num_workers=cpus);
                    _,_,dec_preds = m.get_preds(with_decoded=True,dl=dl);
                    outputs.append(dec_preds);
                if len(out) == len(outputs): #should also work for model splitting if can figure out how to make outputs the right shape
                    for p,put in zip(out,outputs):
                        for path,prediction in zip(chunk,put):
                            basename = os.path.relpath(path,local_segmentation_images); #preserve input directory structure
                            imsave(p/basename,prediction.numpy().astype('uint8'),check_contrast=False);
                else: #TODO: TEST AND FIX
                    print("ERROR: NOT IMPLEMENTED");
            if process_logging: 
                try:
                    time_remaining = format_time(p.pred_t - p.last_t + p.start_t);
                    logging.debug(f"chunk {n} complete; time remaining: {time_remaining}");
                except Exception as e:
                    logging.error(e);
                    logging.debug(f"chunk {n} complete");
                    
                
            if not persistent_files:
                with capture_output() as capture:
                    !gsutil -m rsync -r "{local_segmentation_output_masks}" "gs://{gcp_segmentation_output_masks}"
    if not stitching_complete:
        print("image segmentation process complete, finishing up...");
        with open(local_segmentation_output_masks/'segmentation_complete.flag','w') as f:
            pass;
        #zip files if required
        if zip_output:
            if not persistent_files or (partial_upload and files_changed):
                #download external files
                !gsutil -m rsync -r "gs://{gcp_segmentation_output_masks}" "{local_segmentation_output_masks}"
            for folder in output_dirs:
                if files_changed or not os.path.exists(folder.with_suffix('.zip')):
                    with capture_output() as capture:
                        !(cd "{folder.parent}" && zip -r "{folder.with_suffix('.zip').name}" "{folder.name}")
                        !gsutil -m cp "{folder.with_suffix('.zip')}" "gs://{gcp_segmentation_output_masks/folder.with_suffix('.zip').name}"
        #this is actually insane. The GCP bucket has to have at least one file in the parent directory
        #or it just like forgets to put the innermost directory. I have no idea why, but this fixes it
        with open("foothold.txt",'w') as f:
            pass;
        # with capture_output() as capture:
        #     !gsutil cp foothold.txt "gs://{gcp_segmentation_output_masks.parent}/foothold.txt"
        with capture_output() as capture:
            !gsutil -m rsync -r "{local_segmentation_output_masks}" "gs://{gcp_segmentation_output_masks}"
        print("folder processing complete");
        stitching_complete = True;
        if process_logging: logging.debug("segmentation fully complete");
        print("segmentation complete!");
    else:
        print("segmentation already complete!");
        stitching_complete = True;

In [20]:
if not external_splitting_stitching:
    if process_logging: logging.debug("starting nonexternal_splitting segmentation");
    if not filestate_read and False:
        raise Exception("Error: file output state not updated since last processing, please run the \"pre-segmentation file management\" cell again")
    filestate_read = False
    from IPython.utils.io import capture_output
    from multiprocessing import Pool
    cpus = len(os.sched_getaffinity(0))
    
    def split_image(name):
        try:
            im = imread(name);
        except:
            raise IOError(f"Error while attempting to read image {name}");
        if auto_rescale:
            im = rescale_intensity(im);
        assert isinstance(im,np.ndarray);

        M = (im.shape[0]-context_bounds[0]-context_bounds[2]-crop[0]-crop[2])/y_slices;
        N = (im.shape[1]-context_bounds[1]-context_bounds[3]-crop[1]-crop[3])/x_slices;

        if int(M) != M or int(N) != N:
            raise Exception(f"ERROR: Mask with size {im.shape[:2]} cannot be sliced into {x_slices} columns and {y_slices} rows\nwith context bounds of {context_bounds}; {M} and {N} not integers");
        else:
            M = int(M)
            N = int(N)
            im = (im/256).astype('uint8');
            im = np.stack((im,im,im),axis=2);
            tiles = [im[y-context_bounds[0]:y+M+context_bounds[2],x-context_bounds[1]:x+N+context_bounds[3]] 
                    for y in range(context_bounds[0]+crop[0],im.shape[0]-crop[0]-crop[2]-context_bounds[0]-context_bounds[2],M) 
                    for x in range(context_bounds[1]+crop[1],im.shape[1]-crop[1]-crop[3]-context_bounds[1]-context_bounds[3],N)];
            return tiles;
    
    def stitch_image(tiles,outName):
        stitchMasks = []
        for i,m in enumerate(tiles):
            m = m.numpy().astype('uint8')
            y = i // x_slices;
            x = i % x_slices;
            imBounds = [crop[0]+context_bounds[0] if y != 0 else 0,m.shape[0]-crop[2]-context_bounds[2] if y != y_slices-1 else m.shape[0],crop[1]+context_bounds[1] if x != 0 else 0 ,m.shape[1]-crop[3]-context_bounds[3] if x != x_slices - 1 else m.shape[1]];
            stitchMasks.append(m[imBounds[0]:imBounds[1],imBounds[2]:imBounds[3]]);
        stitched = np.concatenate([np.concatenate(stitchMasks[i*x_slices:(i+1)*x_slices],axis=1) for i in range(y_slices)]);
        imsave(outName,stitched,check_contrast=False);

    if files_remaining != 0:
        files_changed = True;
        stitching_complete = False;
        prog = progress_bar(fileChunks,files_remaining//batchSize+1); #makes a progress bar
        for n,chunk in enumerate(prog):
            basenames = [os.path.relpath(c,local_segmentation_images) for c in chunk]; #preserve input directory structure
            # print(basenames);
            with Pool(cpus) as p:
                chunk = Tensor(np.array([item for sublist in p.map(split_image,chunk) for item in sublist])).to('cuda' if torch.cuda.is_available() else "cpu").permute(0,3,1,2);
            for models,out in process_instructions:
                outputs = [];
                for m in models:
                    res = m(chunk);
                    outputs.append(res.to("cpu").detach());
                if len(out) == len(outputs): #should also work for model splitting if can figure out how to make outputs the right shape
                    for path,put in zip(out,outputs):
                        grouped_preds = [put[i:i + x_slices*y_slices] for i in range(0, len(put), x_slices*y_slices)]
                        saveNames = [path/b for b in basenames];
                        with Pool(cpus) as p:
                            p.starmap(stitch_image,zip(grouped_preds,saveNames));
                else:
                    print("ERROR: NOT IMPLEMENTED");
            if not persistent_files:
                with capture_output() as capture:
                    !gsutil -m rsync -r "{local_segmentation_output_masks}" "gs://{gcp_segmentation_output_masks}"
            if process_logging: 
                try:
                    time_remaining = format_time(prog.pred_t - prog.last_t + prog.start_t);
                    logging.debug(f"chunk {n} complete; time remaining: {time_remaining}");
                except Exception as e:
                    logging.error(e);
                    logging.debug(f"chunk {n} complete");
    if not stitching_complete:
        print("image segmentation process complete, finishing up...");
        with open(local_segmentation_output_masks/'segmentation_complete.flag','w') as f:
            pass;
        #zip files if required
        if zip_output:
            if not persistent_files or (partial_upload and files_changed):
                print("downloading remote files to zip...");
                #download external files
                with capture_output() as capture:
                    !gsutil -m rsync -r "gs://{gcp_segmentation_output_masks}" "{local_segmentation_output_masks}"
            print("zipping files...")
            for folder in progress_bar(output_dirs):
                if files_changed or not os.path.exists(folder.with_suffix('.zip')):
                    with capture_output() as capture:
                        !(cd "{folder.parent}" && zip -r "{folder.with_suffix('.zip').name}" "{folder.name}")
                        !gsutil -m cp "{folder.with_suffix('.zip')}" "gs://{gcp_segmentation_output_masks/folder.with_suffix('.zip').name}"
        print("copying files to remote...");
        with capture_output() as capture:
            !gsutil -m rsync -r "{local_segmentation_output_masks}" "gs://{gcp_segmentation_output_masks}";
        stitching_complete = True;
        if process_logging: logging.debug("segmentation complete, awaiting integrity verification");
        print("segmentation complete!");
    else:
        print("segmentation already complete!");
        stitching_complete = True;



OutOfMemoryError: ignored

## Verify Segmentation Completion

In [None]:
#verify destination file integrity
#NOTE that this only compares the *number* of input files to the *number* of output files;
#filename mishaps could break this, as could errors when uploading the input files themselves.
original_files = !gsutil ls -r "gs://{gcp_segmentation_images}"
original_files = [os.path.relpath(x,f"gs://{gcp_segmentation_images}") for x in original_files if x and not x.endswith(('.flag','.nd','.zip',':'))];
# print(original_files);

#check output folder integrity
for d in progress_bar(output_dirs):
    c = !gsutil ls -r "gs://{gcp_segmentation_output_masks/os.path.basename(d)}"
    if c[0].startswith('CommandException'):
        c = [];
    c = [x for x in c if x and not x.endswith((':','.flag','.zip','.nd')) and os.path.relpath(x,f"gs://{gcp_segmentation_output_masks/os.path.basename(d)}") in original_files];
    if len(c) < total_files:
        stitching_complete = False;
        if process_logging: logging.error(f"Error in segmentation or uploading: output dir {gcp_segmentation_output_masks/os.path.basename(d)} incomplete (expected {total_files} objects but got {len(c)}, please run file prep and segmentation cells again to ensure output integrity");
        print(f"Error in segmentation or uploading: output dir {gcp_segmentation_output_masks/os.path.basename(d)} incomplete (expected {total_files} objects but got {len(c)}, please run file prep and segmentation cells again to ensure output integrity");
#check zip file integrity, if applicable
if zip_output:
    ziplist = !gsutil ls -r "gs://{gcp_segmentation_output_masks}"
    ziplist = [os.path.basename(x) for x in ziplist if x.endswith('.zip')];
    for d in output_dirs:
        if d.with_suffix('.zip').name not in ziplist:
            stitching_complete = False;
            if process_logging: logging.error(f"Error in segmentation or uploading: output zip file {d.with_suffix('.zip').name} missing from output directory. Please run file prep and segmentation cells again to ensure output integrity");
            print(f"Error in segmentation or uploading: output zip file {d.with_suffix('.zip').name} missing from output directory. Please run file prep and segmentation cells again to ensure output integrity")
if stitching_complete:
    if logging: logging.debug("Segmentation successful! Output file integrity verified.");
    print("Segmentation successful! Output file integrity verified.")
else:
    print("Error: segmentation was not complete. Please run file prep and segmentation cells again to ensure output integrity");

In [None]:
if stitching_complete:
    cleardir(GCP_transfer_folder)