In [None]:
# Description: use this cell to generate train and test folders: datasets/train/nc and pc and datasets/test/nc and pc
from spacr.io import generate_training_dataset

settings = {'src':'path', # (path) path to source folder (where origional images were stored)
            'dataset_mode':'metadata_annotation', # (string) annotation, measurement, metadata, annotation_metadata
            'tables':['cell'],# (list of strings) The tabels present in the database, excluding png_list
            'test_split':0.1, # (float) Fraction of images used for the test set
            'annotation_column':'test', # (Optional, string) If using mode annotation, The annotation column in the database
            'annotated_classes':[1], # (Optional, list of integers) If using mode annotation, The interger in annotation_column, if len(annotated_classes) is 1, class 2 will be generated from a random selection of images.
            'metadata_type_by':'column_name', # (Optional, strin) If using mode medatada, If using mode medatada,the column class_metadata elements are in
            'class_metadata':['c10','c11','c12','c22','c23','c24'], # (Optional, list of lists of strings) If using mode medatada, the elements that deffine each class 
            'png_type':'cell_png', # (Optional, string) string in the path of each image (used to filter images)
            'nuclei_limit':False, # (Optional, bool) if cell and nucleus in tables, filter for number of nuclei per cell
            'pathogen_limit':0, # (Optional, integer) if cell and pathogen in tables, filter for number of pathogen per cell
            'uninfected':True, # (Optional, bool) if cell and pathogen in tables, bool for uninfected cells (cells)
            'size':None # (Optional, integer or NoneType) limit for number of images to include in total (test + train) per class
           }

generate_training_dataset(settings)

In [1]:
# Description: train a torch model
from spacr.deep_spacr import train_test_model

settings = {'src':'/nas_mnt/carruthers/yifan/Yifan_Einar screen/yifanUbtrial11h_20240705_184432/plate1/datasets/training', # (path) path to source folder (ends with datasets/training)
            'train':False, # (bool) - Train
            'test': True, # (bool) - Test
            'custom_model':'/nas_mnt/carruthers/yifan/Yifan_Einar screen/yifanUbtrial11h_20240705_184432/plate1/datasets/training/model/maxvit_t/rgb/epochs_100/maxvit_t_epoch_100_channels_rgb.pth', # (path) - path to a custom model
            'classes':['nc','pc'], # (list) - list of classes (folder names in dataset/training/train or test)
            'model_type':'maxvit_t', # (string) - Name of torch model architecture
            'optimizer_type':'adamw', # (string) - type of optimizer
            'schedule':'reduce_lr_on_plateau', # (string) - type of scheduler (reduce_lr_on_plateau or step_lr)
            'loss_type':'focal_loss', # (string) - Loss function (binary_cross_entropy_with_logits or focal_loss)
            'normalize':True, # (bool) - Apply ImageNet normalization to images before training.
            'image_size':224, # (int) - Size of images, height and width.
            'batch_size':64, # (int) - Nr. of images per batch
            'epochs':100, # (int) - Nr. of epochs for training
            'val_split':0.1, # (float) - Fraction of images in validation dataset
            'learning_rate':0.0001, # (float) - Learning rate per epoch
            'weight_decay':0.00001, # (float) - Fraction of random weights decay (regularization)
            'dropout_rate':0.1, # (float) - Fraction of weights to omit per epoch (regularization)
            'init_weights':True, # (bool) - Initiate model with ImageNet weights
            'amsgrad':True, # (bool) - guard against exploding gradients
            'use_checkpoint':True, # (bool) - checkpoint gradient calculations to save VRAM at the expence of computation
            'gradient_accumulation':True, # (bool) - Accumulate gradients to mimic larger batches
            'gradient_accumulation_steps':4, # (int) - Epochs to accumulate gradients
            'intermedeate_save':True, # Save intermediate states of the model
            'pin_memory':True, # (bool) - Whether to pin memory for the data loader
            'n_jobs':30, # (int) - Number of threads to use
            'train_channels':['r','g','b'], # (list of 'r', 'g', and/or 'b') - PNG channels to use for training
            'augment':False, # (bool) - Augment the dataset, vertical, horizontal flip and rotate each image to artificially expand the dataset 8 fold.
            'verbose':True}

train_test_model(settings)

Models already downloaded to: /home/carruthers/anaconda3/envs/spacr/lib/python3.9/site-packages/spacr/resources/models
Training a network on channels: [1, 2, 3]
Channel 1: Red, Channel 2: Green, Channel 3: Blue
Loading test dataset
Results wil be saved in: /nas_mnt/carruthers/yifan/Yifan_Einar screen/yifanUbtrial11h_20240705_184432/plate1/datasets/training/model/maxvit_t/rgb/epochs_100/maxvit_t_time_241205_test_result.csv
Copied 50 misclassified images.


'/nas_mnt/carruthers/yifan/Yifan_Einar screen/yifanUbtrial11h_20240705_184432/plate1/datasets/training/model/maxvit_t/rgb/epochs_100/maxvit_t_time_241205_test_result.csv'

In [None]:
# Description: generate a tar dataset

from spacr.io import generate_dataset

settings = {'src':'path', # (path) path to source folder (where origional images were stored)
           'file_metadata':'cell_png', # (Optional, string) string in the path of each image (used to filter images)
           'experiment':'test', # (string) - Name of dataset
           'sample':10000} # (Optional, integer or NoneType) limit for number of images to include in the dataset

generate_dataset(settings)

In [None]:
# Description: apply a model to a tar dataset

from spacr.deep_spacr import apply_model_to_tar

settings = {'dataset':'path.tar', # (path) - path to tar dataset (ends with .tar) 
            'model_path':'path.pth', # (path) - path to model (ends with .pth) 
            'file_type':'cell_png', # (Optional, string) string in the path of each image (used to filter images)
            'image_size':224, # (int) - Size of images, height and width
            'batch_size':64, # (int) - Nr. of images per batch
            'normalize':True, # (bool) - Apply ImageNet normalization to images before training.
            'score_threshold':0.5, # (float) - Score to byass the classes
            'n_jobs':30, # (int) - Number of threads to use
            'verbose':True}

result_df = apply_model_to_tar(settings)

In [None]:
# Description: Fit a regression model to estimate the effect size of gRNAs on cell scores.
from spacr.ml import perform_regression
import pandas as pd
%matplotlib inline

settings = {'count_data':'path', # (path) path or list of paths to sequencing count data
            'score_data':'path', # (path) path or list of paths to score data
            'score_column':'column', # () - column with cell scores
            'metadata_files':['path.csv','path.csv'], # (list) pahts to gene metadata 
            'positive_control':'gene', # (string) - gene to highlight in volcano plot
            'negative_control':'gene', # (string) - gene to highlight in volcano plot
            'min_n':3, # () - 
            'fraction_threshold':None, # (Optional, float or NoneType) - Minimum threshold for gene fraction, if None automatically calculated
            'target_unique_count':5, # () - Number of expected unique gRNAs per well
            'tolerance':0.02, # (float) - Tollerance for cells per well limit
            'log_x':False, # () - gRNA Fraction plot X axis log
            'log_y':False, # () - gRNA Fraction plot Y axis log
            'x_lim':None, # () - Volcano X axis limit
            'control_wells':['c1','c2','c3'], # (list) - Metadata to exclude from regression model
            'filter_column':'column', # (str) - Column containing control metadata to remove
            'dependent_variable': 'column', # (string) - Dependent variable for regression
            'threshold_method':'var', # (string) - effect size thresold type (std or var)
            'threshold_multiplier':4, # (integer) - effect size threshold multiplyer 
            'transform':'log', # (string) - Transform dependent variable
            'agg_type':'mean', # (string) - aggregation for dependent variable
            'min_cell_count':None, # (integer) - Minimum number of cells per well
            'regression_type':'ols', # (string) - Type of regression (ols, glm, mixed, ridge, lasso).
            'random_row_column_effects':False, # (bool) - Remove plate , row and column random effects.
            'y_lims':[[0,9], [12, 16]], # (list of lists) limits for broken y axis
            'plate':None, # (string or NoneType) - strinf to replace plate column values with
            'cov_type':None, # (string) - covariance type for ols regression
            'volcano':'gene', # (string) - mode for significant resuls (gene, grna, all)
            'alpha':0.8} # (float) - alpha for hinge and lasso regression

coef_df = perform_regression(settings)