In [3]:
# Cell 1: Generate Sparse GT Masks
import os
import sys
import numpy as np
import h5py
from scipy.io import savemat, loadmat
from scipy import sparse
import glob

# Add the suns directory to the path to import config
sys.path.insert(0, os.path.join(os.getcwd(), 'suns'))
from config import DATAFOLDER_SETS, ACTIVE_EXP_SET

print(f"Active dataset: {ACTIVE_EXP_SET}")
print(f"Data folder: {DATAFOLDER_SETS[ACTIVE_EXP_SET]}")

# Set the path of the 'GT Masks' folder, which contains the manual labels in 3D arrays.
# Use config to get the active dataset path
data_folder = DATAFOLDER_SETS[ACTIVE_EXP_SET]
dir_Masks = os.path.join(data_folder, 'GT Masks')

print(f"GT Masks directory: {dir_Masks}")

# %%
dir_all = glob.glob(os.path.join(dir_Masks,'*FinalMasks*.mat'))
print(f"Found {len(dir_all)} FinalMasks files")

for path_name in dir_all:
    file_name = os.path.split(path_name)[1]
    if '_sparse' not in file_name:
        print(f"Processing: {file_name}")
        try: # If file_name is saved in '-v7.3' format
            mat = h5py.File(path_name,'r')
            FinalMasks = np.array(mat['FinalMasks']).astype('bool')
            mat.close()
        except OSError: # If file_name is not saved in '-v7.3' format
            mat = loadmat(path_name)
            FinalMasks = np.array(mat["FinalMasks"]).transpose([2,1,0]).astype('bool')

        (ncells,Ly,Lx) = FinalMasks.shape
        print(f"  Shape: {FinalMasks.shape} (ncells={ncells}, Ly={Ly}, Lx={Lx})")
        GTMasks_2=sparse.coo_matrix(FinalMasks.reshape(ncells,Lx*Ly).T)
        savemat(os.path.join(path_name[:-4]+'_sparse.mat'), \
            {'GTMasks_2':GTMasks_2}, do_compression=True)
        print(f"  Created sparse version: {file_name[:-4]+'_sparse.mat'}")

print("Sparse GT generation completed!")


Active dataset: data
Data folder: /gpfs/data/shohamlab/nicole/code/Shallow-UNet-Neuron-Segmentation_SUNS/demo/data
GT Masks directory: /gpfs/data/shohamlab/nicole/code/Shallow-UNet-Neuron-Segmentation_SUNS/demo/data/GT Masks
Found 8 FinalMasks files
Processing: FinalMasks_YST_part22.mat
  Shape: (79, 120, 88) (ncells=79, Ly=120, Lx=88)
  Created sparse version: FinalMasks_YST_part22_sparse.mat
Processing: FinalMasks_YST_part11.mat
  Shape: (75, 120, 88) (ncells=75, Ly=120, Lx=88)
  Created sparse version: FinalMasks_YST_part11_sparse.mat
Processing: FinalMasks_YST_part12.mat
  Shape: (99, 120, 88) (ncells=99, Ly=120, Lx=88)
  Created sparse version: FinalMasks_YST_part12_sparse.mat
Processing: FinalMasks_YST_part21.mat
  Shape: (89, 120, 88) (ncells=89, Ly=120, Lx=88)
  Created sparse version: FinalMasks_YST_part21_sparse.mat
Sparse GT generation completed!


In [None]:
# Cell 2: Train CNN and Optimize Parameters
import sys
import os
import random
import time
import glob
import numpy as np
import math
import h5py
from scipy.io import savemat, loadmat
import multiprocessing as mp

sys.path.insert(0, 'suns') # the path containing "suns" folder
os.environ['KERAS_BACKEND'] = 'tensorflow'
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Set which GPU to use. '-1' uses only CPU.

from PreProcessing.preprocessing_functions import preprocess_video, find_dataset
from PreProcessing.generate_masks import generate_masks
from train_CNN_params import train_CNN, parameter_optimization_cross_validation
from config import DATAFOLDER_SETS, ACTIVE_EXP_SET, EXP_ID_SETS, OUTPUT_FOLDER, RATE_HZ, MAG

import tensorflow as tf
tf_version = int(tf.__version__[0])
if tf_version == 1:
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config = config)
else: # tf_version == 2:
    gpus = tf.config.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

print(f"Starting CNN training for dataset: {ACTIVE_EXP_SET}")
print(f"Experiment IDs: {EXP_ID_SETS[ACTIVE_EXP_SET]}")
print(f"Data folder: {DATAFOLDER_SETS[ACTIVE_EXP_SET]}")
print(f"Output folder: {OUTPUT_FOLDER[ACTIVE_EXP_SET]}")
print(f"Rate: {RATE_HZ[ACTIVE_EXP_SET]} Hz, Magnification: {MAG[ACTIVE_EXP_SET]}")

#-------------- Start user-defined parameters --------------#
# %% set folders
# file names of the ".h5" files storing the raw videos. 
list_Exp_ID = EXP_ID_SETS[ACTIVE_EXP_SET]
# folder of the raw videos
dir_video = DATAFOLDER_SETS[ACTIVE_EXP_SET] 
# folder of the ".mat" files stroing the GT masks in sparse 2D matrices. 'FinalMasks_' is a prefix of the file names. 
dir_GTMasks = os.path.join(dir_video, 'GT Masks', 'FinalMasks_') 

# %% set video parameters
rate_hz = RATE_HZ[ACTIVE_EXP_SET] # frame rate of the video
Mag = MAG[ACTIVE_EXP_SET] # spatial magnification compared to ABO videos (0.785 um/pixel). # Mag = 0.785 / pixel_size

# %% set the range of post-processing hyper-parameters to be optimized in
# minimum area of a neuron (unit: pixels in ABO videos). must be in ascend order
list_minArea = list(range(30,85,5)) 
# average area of a typical neuron (unit: pixels in ABO videos)
list_avgArea = [177] 
# uint8 threshould of probablity map (uint8 variable, = float probablity * 256 - 1)
list_thresh_pmap = list(range(130,235,10))
# threshold to binarize the neuron masks. For each mask, 
# values higher than "thresh_mask" times the maximum value of the mask are set to one.
thresh_mask = 0.5
# maximum COM distance of two masks to be considered the same neuron in the initial merging (unit: pixels in ABO videos)
thresh_COM0 = 2
# maximum COM distance of two masks to be considered the same neuron (unit: pixels in ABO videos)
list_thresh_COM = list(np.arange(4, 9, 1)) 
# minimum IoU of two masks to be considered the same neuron
list_thresh_IOU = [0.5] 
# minimum consecutive number of frames of active neurons
list_cons = list(range(1, 8, 1)) 

# %% set pre-processing parameters
gauss_filt_size = 50*Mag # standard deviation of the spatial Gaussian filter in pixels
num_median_approx = 1000 # number of frames used to caluclate median and median-based standard deviation
filename_TF_template = 'demo/YST_spike_tempolate.h5' # File name storing the temporal filter kernel
h5f = h5py.File(filename_TF_template,'r')
Poisson_filt = np.array(h5f['filter_tempolate']).squeeze().astype('float32')
h5f.close()
Poisson_filt = Poisson_filt[Poisson_filt>np.exp(-1)] # temporal filter kernel
Poisson_filt = Poisson_filt/Poisson_filt.sum()

# %% set training parameters
thred_std = 3 # SNR threshold used to determine when neurons are active
num_train_per = 2400 # Number of frames per video used for training 
NO_OF_EPOCHS = 200 # Number of epoches used for training 
batch_size_eval = 100 # batch size in CNN inference
list_thred_ratio = [thred_std] # A list of SNR threshold used to determine when neurons are active.

# %% Set processing options
useSF=False # True if spatial filtering is used in pre-processing.
useTF=True # True if temporal filtering is used in pre-processing.
useSNR=True # True if pixel-by-pixel SNR normalization filtering is used in pre-processing.
med_subtract=False # True if the spatial median of every frame is subtracted before temporal filtering.
prealloc=False # True if pre-allocate memory space for large variables in pre-processing. 
useWT=False # True if using additional watershed
load_exist=False # True if using temp files already saved in the folders
use_validation = True # True to use a validation set outside the training set
useMP = True # True to use multiprocessing to speed up
BATCH_SIZE = 20 # Batch size for training 
# Cross-validation strategy. Can be "leave_one_out", "train_1_test_rest", or "use_all"
cross_validation = "leave_one_out"
Params_loss = {'DL':1, 'BCE':20, 'FL':0, 'gamma':1, 'alpha':0.25} # Parameters of the loss function
#-------------- End user-defined parameters --------------#

dir_parent = os.path.join(dir_video, OUTPUT_FOLDER[ACTIVE_EXP_SET]) # folder to save all the processed data
dir_network_input = os.path.join(dir_parent, 'network_input') # folder of the SNR videos
dir_mask = os.path.join(dir_parent, 'temporal_masks({})'.format(thred_std)) # foldr to save the temporal masks
weights_path = os.path.join(dir_parent, 'Weights') # folder to save the trained CNN
training_output_path = os.path.join(dir_parent, 'training output') # folder to save the loss functions during training
dir_output = os.path.join(dir_parent, 'output_masks') # folder to save the optimized hyper-parameters
dir_temp = os.path.join(dir_parent, 'temp') # temporary folder to save the F1 with various hyper-parameters

# Create directories if they don't exist
for dir_path in [dir_network_input, weights_path, training_output_path, dir_output, dir_temp]:
    if not os.path.exists(dir_path):
        os.makedirs(dir_path) 

print(f"Created output directories in: {dir_parent}")

# Continue with the rest of the training script...
# (Note: This is a simplified version for testing - you may want to run the full script separately)
print("CNN training setup completed! Run the full demo_train_CNN_params.py script for complete training.")


In [None]:
# Cell 3: Test Batch Processing
import os
import numpy as np
import time
import h5py
import sys

from scipy.io import savemat, loadmat
import multiprocessing as mp

sys.path.insert(0, 'suns') # the path containing "suns" folder
os.environ['KERAS_BACKEND'] = 'tensorflow'
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Set which GPU to use. '-1' uses only CPU.

from PostProcessing.evaluate import GetPerformance_Jaccard_2
from run_suns import suns_batch
from config import DATAFOLDER_SETS, ACTIVE_EXP_SET, EXP_ID_SETS, OUTPUT_FOLDER, RATE_HZ, MAG

import tensorflow as tf
tf_version = int(tf.__version__[0])
if tf_version == 1:
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config = config)
else: # tf_version == 2:
    gpus = tf.config.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

print(f"Starting batch testing for dataset: {ACTIVE_EXP_SET}")
print(f"Experiment IDs: {EXP_ID_SETS[ACTIVE_EXP_SET]}")
print(f"Data folder: {DATAFOLDER_SETS[ACTIVE_EXP_SET]}")
print(f"Output folder: {OUTPUT_FOLDER[ACTIVE_EXP_SET]}")

#-------------- Start user-defined parameters --------------#
# %% set folders
# file names of the ".h5" files storing the raw videos. 
list_Exp_ID = EXP_ID_SETS[ACTIVE_EXP_SET]
# folder of the raw videos
dir_video = DATAFOLDER_SETS[ACTIVE_EXP_SET] 
# folder of the ".mat" files stroing the GT masks in sparse 2D matrices. 'FinalMasks_' is a prefix of the file names. 
dir_GTMasks = os.path.join(dir_video, 'GT Masks', 'FinalMasks_') 

# %% set video parameters
rate_hz = RATE_HZ[ACTIVE_EXP_SET] # frame rate of the video
Mag = MAG[ACTIVE_EXP_SET] # spatial magnification compared to ABO videos (0.785 um/pixel). # Mag = 0.785 / pixel_size

# %% set pre-processing parameters
gauss_filt_size = 50*Mag # standard deviation of the spatial Gaussian filter in pixels
num_median_approx = 1000 # number of frames used to caluclate median and median-based standard deviation
filename_TF_template = 'demo/YST_spike_tempolate.h5' # File name storing the temporal filter kernel
h5f = h5py.File(filename_TF_template,'r')
Poisson_filt = np.array(h5f['filter_tempolate']).squeeze().astype('float32')
h5f.close()
Poisson_filt = Poisson_filt[Poisson_filt>np.exp(-1)] # temporal filter kernel
Poisson_filt = Poisson_filt/Poisson_filt.sum()

# %% Set processing options
useSF=False # True if spatial filtering is used in pre-processing.
useTF=True # True if temporal filtering is used in pre-processing.
useSNR=True # True if pixel-by-pixel SNR normalization filtering is used in pre-processing.
med_subtract=False # True if the spatial median of every frame is subtracted before temporal filtering.
prealloc=True # True if pre-allocate memory space for large variables in pre-processing. 
batch_size_eval = 200 # batch size in CNN inference
useWT=False # True if using additional watershed
display=True # True if display information about running time 
#-------------- End user-defined parameters --------------#

dir_parent = os.path.join(dir_video, OUTPUT_FOLDER[ACTIVE_EXP_SET]) # folder to save all the processed data
dir_output = os.path.join(dir_parent, 'output_masks') # folder to save the segmented masks and the performance scores
dir_params = os.path.join(dir_parent, 'output_masks') # folder of the optimized hyper-parameters
weights_path = os.path.join(dir_parent, 'Weights') # folder of the trained CNN

print(f"Output directories:")
print(f"  Parent: {dir_parent}")
print(f"  Output: {dir_output}")
print(f"  Params: {dir_params}")
print(f"  Weights: {weights_path}")

# Check if required files exist
print(f"\nChecking for required files:")
for exp_id in list_Exp_ID:
    video_file = os.path.join(dir_video, f"{exp_id}.h5")
    gt_file = os.path.join(dir_GTMasks, f"{exp_id}_sparse.mat")
    print(f"  {exp_id}:")
    print(f"    Video: {video_file} - {'✓' if os.path.exists(video_file) else '✗'}")
    print(f"    GT: {gt_file} - {'✓' if os.path.exists(gt_file) else '✗'}")

# Check for trained models
print(f"\nChecking for trained models in {weights_path}:")
if os.path.exists(weights_path):
    model_files = [f for f in os.listdir(weights_path) if f.endswith('.h5')]
    print(f"  Found {len(model_files)} model files: {model_files}")
else:
    print(f"  Weights directory does not exist - training required first")

# Check for optimization results
print(f"\nChecking for optimization results in {dir_params}:")
if os.path.exists(dir_params):
    opt_files = [f for f in os.listdir(dir_params) if f.startswith('Optimization_Info')]
    print(f"  Found {len(opt_files)} optimization files: {opt_files}")
else:
    print(f"  Params directory does not exist - training required first")

print("\nBatch testing setup completed! Run the full demo_test_batch.py script for complete testing.")


In [None]:
# Cell 4: Switch Dataset Configuration
# To switch between datasets, modify the ACTIVE_EXP_SET in config.py
# Then restart the kernel and run all cells again

import sys
sys.path.insert(0, 'suns')
from config import DATAFOLDER_SETS, ACTIVE_EXP_SET, EXP_ID_SETS, OUTPUT_FOLDER, RATE_HZ, MAG

print("Current Configuration:")
print(f"  Active Dataset: {ACTIVE_EXP_SET}")
print(f"  Data Folder: {DATAFOLDER_SETS[ACTIVE_EXP_SET]}")
print(f"  Experiment IDs: {EXP_ID_SETS[ACTIVE_EXP_SET]}")
print(f"  Output Folder: {OUTPUT_FOLDER[ACTIVE_EXP_SET]}")
print(f"  Frame Rate: {RATE_HZ[ACTIVE_EXP_SET]} Hz")
print(f"  Magnification: {MAG[ACTIVE_EXP_SET]}")

print("\nAvailable Datasets:")
for dataset in DATAFOLDER_SETS.keys():
    print(f"  {dataset}:")
    print(f"    Data: {DATAFOLDER_SETS[dataset]}")
    print(f"    Experiments: {EXP_ID_SETS[dataset]}")
    print(f"    Output: {OUTPUT_FOLDER[dataset]}")
    print(f"    Rate: {RATE_HZ[dataset]} Hz")
    print(f"    Mag: {MAG[dataset]}")

print("\nTo switch datasets:")
print("1. Edit suns/config.py")
print("2. Change ACTIVE_EXP_SET = 'data' to ACTIVE_EXP_SET = 'line3_dataset' (or vice versa)")
print("3. Restart kernel and run all cells")
