# Generate masks, check masks
___
### 1. Organize images into npy stacks.
### 2. Preprocess images for object detection.
### 3. Generate masks.
### 4. Check masks

In [None]:
from spacr.plot import preprocess_generate_masks
%matplotlib inline

settings = {'metadata_type':'cellvoyager',
            'custom_regex':None,
            'experiment':'ubiquitin',
            'channels':[0,1,2,3],
            'nucleus_channel':0,
            'nucleus_background':500,
            'nucleus_Signal_to_noise':2,
            'nucleus_CP_prob':0,
            'remove_background_nucleus':False,
            'cell_channel':3,
            'cell_background':100,
            'cell_Signal_to_noise':10,
            'cell_CP_prob':-1,
            'remove_background_cell':False,
            'pathogen_model':None,
            'pathogen_channel':1,
            'pathogen_background':200,
            'pathogen_Signal_to_noise':5,
            'pathogen_CP_prob':-1,
            'remove_background_pathogen':True,
            'magnification':40,
            'save':True,
            'preprocess':True,
            'masks':True,
            'batch_size':50,
            'filter':False,
            'merge_pathogens':True,
            'plot':False,
            'adjust_cells':True,
            'test_mode':False,
            'test_images':10,
            'random_test':True}

src = 'path to folder with images'
spacr.core.preprocess_generate_masks(src, settings=settings)

In [None]:
from spacr.plot import plot_images_and_arrays
%matplotlib inline

folder_paths = ['path to folder that origionally contained images', ...]

plot_images_and_arrays(folder_paths)

In [None]:
from spacr.plot import plot_merged
%matplotlib inline

settings = {'include_noninfected':True, 
            'include_multiinfected':True,
            'include_multinucleated':True,
            'remove_background':False,
            'filter_min_max':[[0,1000000],[0,1000000],[0,1000000]],
            'channel_dims':[0,1,2,3],
            'backgrounds':[100,100,100,100],
            'cell_mask_dim':4,
            'nucleus_mask_dim':5,
            'pathogen_mask_dim':6,
            'overlay_chans':[0,2,3],
            'outline_thickness':3,
            'outline_color':'gbr',
            'overlay':True,
            'normalization_percentiles':[2,98],
            'normalize':False,
            'print_object_number':True,
            'nr':1,
            'figuresize':20,
            'cmap':'inferno',
            'verbose':True}

src = 'path to folder with merged images and masks'
fig = plot_merged(src, settings)

# Generate Measurements, single cell images,  ML scores
___
### 1. Collect measurement data and save to database, Generate single object images.
### 2. Generate ML scores

In [None]:
settings = {'input_folder':'path to folder with merged images and masks',
            'channels':[0,1,2,3],
            'cell_mask_dim':4,
            'cell_min_size':2000,
            'nucleus_mask_dim':5,
            'nucleus_min_size':1000,
            'pathogen_mask_dim':6,
            'pathogen_min_size':400,
            'cytoplasm_min_size':0,
            'save_png':True,
            'crop_mode':['cell'],
            'use_bounding_box':False,
            'png_size':[[224,224]],
            'normalize':False,
            'png_dims':[0,1,2],
            'normalize_by':'fov', #'fov' or 'png'
            'save_measurements':True,
            'plot':False,
            'plot_filtration':False,
            'include_uninfected':False,
            'test_mode':False}

spacr.measure.measure_crop(settings)

In [None]:
from spacr.core import generate_ml_scores

settings = {'model_type':'xgboost',
            'heatmap_feature':'predictions',
            'grouping':'mean',
            'min_max':'allq',
            'cmap':'viridis',
            'channel_of_interest':3,
            'minimum_cell_count':25,
            'n_estimators':100,
            'test_size':0.2,
            'location_column':'col',
            'positive_control':'c2',
            'negative_control':'c1',
            'exclude':None,
            'n_repeats':10,
            'top_features':30,
            'remove_low_variance_features':True,
            'remove_highly_correlated_features':True,
            'n_jobs':-1,
            'verbose':True}

src = 'path to folder that origionally contained images'
results = generate_ml_scores(src, settings)

# Auxiliary functions
___
### 1. annotate_app_v2: Annotate images after exporting measurements and saving PNGs. annotations are saved to the measurements.db in the table png_list
### 2. analyze_recruitment: Read the tables in measurements.db to a pd.DataFrame, annotate conditions and evaluate recruitment.
### 3. analyze_plaques: Generate a csv file with the area and count of objects in masks

In [None]:
from spacr.app_annotate import annotate

settings = {'src':'path to folder that origionally contained images',
            'image_type':'cell_png',
            'channels':['r', 'g', 'b'],
            'geom': "3200x2000",
            'img_size':(200, 200),
            'rows':10,
            'columns':18,
            'annotation_column':'recruited_test',
            'normalize':False,
            'percentiles':(2,98),
            'measurement':None,
            'threshold':None}

annotate(settings)

In [None]:
# Calculate recruitment.
from spacr.core import analyze_recruitment
%matplotlib inline

metadata_settings = {'target':'protein',
                     'cell_types':['HeLa'],
                     'cell_plate_metadata':None,
                     'pathogen_types':['pathogen_1', 'pathogen_2'],
                     'pathogen_plate_metadata':[['c1', 'c2', 'c3'],['c4','c5', 'c6']],
                     'treatments':['cm', 'lovastatin'],
                     'treatment_plate_metadata':[['r1', 'r2','r3'], ['r4', 'r5','r6']],
                     'metadata_types':['col', 'col', 'row'],
                     'channel_dims':[0,1,2,3],
                     'cell_chann_dim':3,
                     'cell_mask_dim':4,
                     'nucleus_chann_dim':0,
                     'nucleus_mask_dim':5,
                     'pathogen_chann_dim':2,
                     'pathogen_mask_dim':6,
                     'channel_of_interest':2,
                     'plot':True,
                     'plot_nr':1,
                     'plot_control':True,
                     'figuresize':20,
                     'remove_background':True,
                     'backgrounds':100,
                     'include_noninfected':False,
                     'include_multiinfected':2,
                     'include_multinucleated':False,
                     'cells_per_well':100,
                     'pathogen_size_range':[0,100000],
                     'nucleus_size_range':[0,100000],
                     'cell_size_range':[0,100000],
                     'pathogen_intensity_range':[0,100000],
                     'nucleus_intensity_range':[0,100000],
                     'cell_intensity_range':[0,100000],
                     'target_intensity_min':0}

src = 'path to folder that origionally contained images'

dfs = analyze_recruitment(src, settings)

In [None]:
from spacr.core import analyze_plaques

folder = 'path to folder with plaque masks'

analyze_plaques(folder)

# Train computer vision Torch models
___
### 1. generate_training_dataset: Generate train and test folders with class subfolders for training DL models.
### 2. train_test_model: Train torch model
### 3. generate_dataset: Generate dataset to apply model to
### 4. apply_model_to_tar: Apply trained model to tar dataset

In [None]:
from spacr.core import generate_training_dataset

src = 'path to folder that origionally contained images'
generate_training_dataset(src,
                          mode='metadata', # annotation, recruitment or metadata
                          annotation_column='test', # for mode = annotation
                          annotated_classes=[1,2], # for mode = annotation
                          classes=['nc','pc'], # for mode = annotation
                          size=None, # number of images per class; None = nr of images in least abundant class 
                          test_split=0.1,
                          metadata_type_by='col',
                          class_metadata=[['c1'],['c2']], # for mode = metadata
                          channel_of_interest=3) # for mode = recruitment

In [None]:
from spacr.deep_spacr import train_test_model
%matplotlib inline

settings = {'train':True,
            'test': False,
            'classes':['nc','pc'],
            'model_type':'maxvit_t',
            'optimizer_type':'adamw',
            'schedule':'reduce_lr_on_plateau', # reduce_lr_on_plateau, step_lr
            'loss_type':'focal_loss', # binary_cross_entropy_with_logits, focal_loss
            'normalize':True,
            'image_size':224,
            'batch_size':64,
            'epochs':100,
            'val_split':0.1,
            'train_mode':'erm',
            'learning_rate':0.0001,
            'weight_decay':0.00001,
            'dropout_rate':0.1,
            'init_weights':True,
            'amsgrad':True,
            'use_checkpoint':True,
            'gradient_accumulation':True,
            'gradient_accumulation_steps':4,
            'intermedeate_save':True,
            'pin_memory':True,
            'num_workers':30,
            'channels':['r','g','b'],
            'verbose':True}

src = 'path to training dataset folder, basename training'
train_test_model(src, settings)

In [None]:
from spacr.core import generate_dataset

src = 'path to folder that origionally contained images'
generate_dataset(src,
                 file_metadata=None,
                 experiment='experiment_1',
                 sample=None)

In [None]:
from spacr.core import apply_model_to_tar
%matplotlib inline

tar_path = 'path to dataset, extention is .tar'
model_path='path to model, extention is .pth'

result_df = apply_model_to_tar(tar_path,
                               model_path,
                               file_type='cell_png',
                               image_size=224,
                               batch_size=64,
                               normalize=True,
                               preload='images',
                               num_workers=30,
                               threshold=0.5,
                               verbose=True)

# Analyze sequencing reeds
___
### 1. analyze_reads: Merge paired end reads and extract barecodes.
### 2. map_barcodes_folder: Map barecode and gRNA sequences to names
### 3. plot_lorenz_curves: Generate lorenz curves from unique_grna.csv
### 4. perform_regression: Perform multiple linear regression to estimate the effect size of gRNAs on the dependent variable

In [None]:
from spacr.sequencing import analyze_reads

settings = {'src':'path to folder with FASTQ files',
           'upstream':'CTTCTGGTAAATGGGGATGTCAAGTT',
           'downstream':'GTTTAAGAGCTATGCTGGAAACAGCAG',
           'barecode_length_1':8,
           'barecode_length_2':7,
           'chunk_size':1000000,
           'test':False}

analyze_reads(settings)

In [None]:
from spacr.sequencing import map_barcodes_folder

src = 'path to folder containing .h5 files, .*_combined.h5'

plate_name_dict = {'E01': 'plate1','E02': 'plate2'}

settings = {'pc':'positive control gRNA name',
            'pc_loc':'positive control column',
            'nc':'negative control gRNA name',
            'nc_loc':'negative control column'}
            'grna', 'path to grna sequence | name csv file')
            'barcodes', 'path to barecode sequence | name csv file')
            'plate_dict', plate_name_dict)
            'test', False)
            'verbose', True}
            
map_barcodes_folder(src, settings)

In [None]:
from spacr.plot import plot_lorenz_curves
%matplotlib inline

csv_files = ['path to csv file (.*_combined_unique_grna.csv)', 'path to csv file (.*_combined_unique_grna.csv)']
plot_lorenz_curves(csv_files)

In [None]:
from spacr.sequencing import perform_regression
%matplotlib inline

csv_path = 'path to reads, .*combined_combination_counts.csv'
dependent_variable = 'path do computer vision results'

settings = {'dependent_variable':dependent_variable,
            'gene_weights_csv':csv_path,
            'fraction_threshold':0.1,
            'dependent_variable':'prediction_probability_class_1',
            'transform':None,
            'agg_type':'mean',
            'min_cell_count':25,
            'regression_type':'ols',
            'remove_row_column_effect':False,
            'alpha':1,
            'nc':'c1',
            'pc':'c2',
            'other':'c3'}

coef_df = perform_regression(settings)

# Simulate Pooled screens
___
### 1. run_and_save: Simulate pooled screen
### 2. run_multiple_simulations: Simulate multiple screens

In [None]:
from spacr.sim import run_and_save

#For 1 simulation
settings = {
    'name':'test',
    'variable':'none',
    'src': 'path',
    'plot': True,
    'random_seed': True,
    'nr_plates': 4,
    'number_of_genes': 100,
    'number_of_active_genes': 10,
    'number_of_control_genes': 10,
    'avg_genes_per_well': 5,
    'sd_genes_per_well':2,
    'avg_cells_per_well': 100,
    'sd_cells_per_well':50,
    'positive_mean': 0.8,
    'positive_variance':0.15,
    'negative_mean':0.2,
    'negative_variance':0.15,
    'avg_reads_per_gene': 1000,
    'sd_reads_per_gene':500,
    'sequencing_error': 0.1,
    'well_ineq_coeff': 0.5,
    'gene_ineq_coeff': 0.1,
    'max_workers': 25}

i=1
run_and_save(i, settings, time_ls=[1], total_sims=1)

In [None]:
from spacr.sim import run_multiple_simulations

#For multiple simulation

#lopp over most variables
avg_genes_per_well_ls = [10]
avg_cells_per_well_ls = [100]
positive_mean_ls = [0.8]
avg_reads_per_gene_ls = [1000]
sequencing_error_ls = [0.01]
well_ineq_coeff_ls = [0.3] #lower mean more inequality (0-1)
gene_ineq_coeff_ls = [0.8]
nr_plates_ls = [8]
number_of_genes_ls = [1384]
number_of_active_genes_ls = [8]

settings = {
    'name':'plates_2_4_8',
    'variable':'all',
    'src': 'path',
    'plot': True,
    'random_seed': False,
    'replicates':2,
    'nr_plates': nr_plates_ls,
    'number_of_genes': number_of_genes_ls,
    'number_of_active_genes': number_of_active_genes_ls,
    'number_of_control_genes': 30,
    'avg_genes_per_well': avg_genes_per_well_ls,
    'avg_cells_per_well': avg_cells_per_well_ls,
    'positive_mean': positive_mean_ls,
    'avg_reads_per_gene': avg_reads_per_gene_ls,
    'sequencing_error': sequencing_error_ls,
    'well_ineq_coeff': well_ineq_coeff_ls,
    'gene_ineq_coeff': gene_ineq_coeff_ls,
    'max_workers': 25}

run_multiple_simulations(settings)

# Train Cellpose Models
___
### 1. sample_and_copy_images: Randomly select images from folders to generate an image dataset.
### 2. generate_cellpose_train_test: Split images from a folder into test and train datasets.
### 3. train_cellpose: Train cellpose models.
### 4. check_cellpose_models: generate masks using all stock cellpose models.
### 5. identify_masks_finetune: Use a custom or stock cellpose model to generate masks.
### 6. compare_cellpose_masks: Compare masks generated with different cellpose models.

In [None]:
from spacr.alpha import sample_and_copy_images

folder_list = ['path to golder with single channel images', ...]

nr_of_images = 100
dst = 'path to destination folder where images will be copied to'
sample_and_copy_images(folder_list, nr_of_images, dst)

In [None]:
from spacr.io import generate_cellpose_train_test
src = 'path to folder containing images that will be used for train and test datasets'
generate_cellpose_train_test(src, test_split=0.1)

In [None]:
from spacr.core import train_cellpose
%matplotlib inline

settings = {'img_src':'path to train folder, basename train',
            'model_name':'model_name',
            'model_type':'cyto',
            'Signal_to_noise':10,
            'background':200,
            'remove_background':False,
            'learning_rate':0.2,
            'weight_decay':1e-05,
            'batch_size':8,
            'n_epochs':25000,
            'from_scratch':False,
            'diameter':30,
            'resize':True,
            'width_height':[1120,1120],
            'verbose':True}

train_cellpose(settings)

In [None]:
from spacr.core import check_cellpose_models
%matplotlib inline

settings = {'src':'path to test folder, basename test',
           'batch_size':8,
           'CP_prob':0,
           'diameter':30,
           'save':True,
           'normalize':True,
           'remove_background':False,
           'background':200,
           'Signal_to_noise':10,
           'plot':True,
           'verbose':True}

check_cellpose_models(settings)

In [None]:
from spacr.core import identify_masks_finetune
%matplotlib inline

settings = {'src':'path to folder with images',
            'dst':'path to folder masks will be saved in',
            'model_name':'cyto',
            'custom_model': 'path to model, extention: .CP_model',
            'background':200,
            'Signal_to_noise':10,
            'save':True,
            'resize':True,
            'target_height':1120,
            'target_width':1120,
            'diameter':30,
            'CP_prob':0,
            'invert':False,
            'verbose':True}

identify_masks_finetune(settings)

In [None]:
from spacr.core import compare_cellpose_masks
%matplotlib inline

src = 'path to folder with images'
compare_cellpose_masks(src, verbose=True, save=True)

In [None]:
# Trigger gui application
___
### 1. gui_make_masks: Modify or draw masks using single channel images.
### 2. 
### 3. 
### 4. 
### 5. 
### 6. 
### 7. 
### 8. 

In [None]:
from spacr.mask_app import gui_make_masks
gui_make_masks()

In [None]:
from spacr.app_mask import start_mask_app
start_mask_app()

In [None]:
from spacr.app_measure import start_measure_app
start_measure_app()

In [None]:
from spacr.app_classify import start_classify_app
start_classify_app()

In [None]:
from spacr.app_sequencing import start_seq_app
start_seq_app()

In [None]:
from spacr.app_umap import start_umap_app
start_umap_app()

In [None]:
from spacr.gui import gui_app
gui_app()