# GENERAL PROGRAM CONFIGURATIONS inDir: /Users/jnimoca/Jose_BI/P21E05_HN26_DA/P23_Core19_PoC/Cylinter # Path to CyLinter input directory containing multi-channel # image files (TIFF or OME-TIFF), segmentation outlines (OME-TIFF), # segmentation masks (TIFF), and corresponding single-cell feature tables (CSV) outDir: /Users/jnimoca/Jose_BI/P21E05_HN26_DA/P23_Core19_PoC/Cylinter/output # CyLinter output directory. Path is created if it does not exist. sampleMetadata: "19": ["19", "Sonja HeanNeck 19", "NKC", "CANCER-True", 1] # Sample metadata dictionary: keys = file names; values = list of strings. # First elements: sample names (str) # Second elements: descriptive text of experimental condition (str) # Third elements: abbreviation of experimental condition (str) # Fourth elements: comma-delimited string of arbitrary binary declarations # for computing t-statistics between two groups of samples (str dytpe) # Fifth elements: replicate number specifying biological or # technical replicates (int) samplesToExclude: [] # (list of strs) Sample names to exclude from analysis specified # according to the first elements of sampleMetadata configuration. markersToExclude: ["750_bg", "647_bg", "555_bg", "488_bg", "DAPI_bg" ] # (list of strs) Immunomarkers to exclude from analysis # Does not include nuclear dyes. They are needed for the # cycleCorrelation module to remove cell dropout. ############################################################################### # MODULE-SPECIFIC CONFIGURATIONS # selectROIs------------------------------------------------------------------- delintMode: True # (bool) Whether to drop (True; negative selection) or # retain (False; positive selection) cells selected by ROIs. showAbChannels: True # (bool) Whether to show all immunomarker channels (True) when Napari # is open (may be memory limiting) or show cycle 1 DNA only (False). samplesForROISelection: ["19"] # (list of strs) Sample names for ROI selection specified # according to the first elements of sampleMetadata configuration. autoArtifactDetection: True # (bool) Whether to display tools for automated artifact detection in Napari window artifactDetectionMethod: "classical" # (str) Algorithm used for automated artifact detection (current option: "classical"). # Multi-layer perceptron method ("MLP") currently under development. # intensityFilter------------------------------------------------------------------- numBinsIntensity: 75 # (int) Number of bins for DNA intensity histograms. # areaFilter------------------------------------------------------------------- numBinsArea: 75 # (int) Number of bins for DNA area histograms. # cycleCorrelation------------------------------------------------------------------- numBinsCorrelation: 75 # (int) Number of bins for DNA1/DNAn histograms. # pruneOutliers------------------------------------------------------------------- hexbins: False # (bool) Whether to use hexbins (True) or scatter plots (False) to plot # single-cell signal intensities. Scatter plots allow for higher resolution, # but may require longer rendering times. hexbinGridSize: 20 # (int) Hexbin grid size when hexins=True. # Higher values increase bin resolution. # metaQC (optional)------------------------------------------------------------------- metaQC: False # (bool) Whether to perform data reclassification based on # unsupervised clustering results of combinations of clean and # noisy (previously-redacted) data. embeddingAlgorithmQC: "UMAP" # (str) Embedding algorithm used for clustering (options: "TSNE" or "UMAP"). channelExclusionsClusteringQC: [] # (list of strs) Immunomarkers to exclude from clustering. samplesToRemoveClusteringQC: [] # (list of strs) Samples to exclude from clustering. fracForEmbeddingQC: 1.0 # (float) Fraction of cells to be embedded (range: 0.0-1.0) # Limits the amount of data passed to downstream modules. dimensionEmbeddingQC: 2 # (int) Dimension of the embedding (fixed to 2 in current version). topMarkersQC: "clusters" # (str) Normalization axis ("channels" or "clusters") used to define # highest expressed markers per cluster. colormapAnnotationQC: "Sample" # (str) Metadata annotation to colormap the embedding: Sample or Condition. metricQC: "euclidean" # (str) Distance metric for computing embedding. # Choose from valid metrics used by scipy.spatial.distance.pdist: # "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", # "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski", # "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", # "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule". # -------------------------------------- # tSNE-specific configurations: # https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html perplexityQC: 50.0 # (float) Related to the number of nearest neighbors used in other # manifold learning algorithms. Larger datasets usually require # larger perplexity. Different values can result in significantly # different results. earlyExaggerationQC: 12.0 # (float) For larger values, the space between natural clusters # will be larger in the embedded space. learningRateTSNEQC: 200.0 # (float) tSNE learning rate (typically between 10.0 and 1000.0). randomStateQC: 5 # (int) Determines the random number generator for reproducible results # across multiple function calls. # -------------------------------------- # UMAP-specific configurations: # https://umap-learn.readthedocs.io/en/latest/api.html nNeighborsQC: 6 # (int) The size of local neighborhood (in terms of number of # neighboring sample points) used for manifold approximation. # Larger values result in more global views of the manifold, # while smaller values result in more local data being preserved. # In general values should be in the range 2 to 100. learningRateUMAPQC: 1.0 # (float) The initial learning rate for the embedding optimization. minDistQC: 0.1 # (float) The effective minimum distance between embedded points. # Smaller values will result in a more clustered/clumped # embedding where nearby points on the manifold are drawn # closer together, while larger values will result on a more # even dispersal of points. The value should be set relative # to the spread value, which determines the scale at which # embedded points will be spread out. repulsionStrengthQC: 5.0 # (float) Weighting applied to negative samples in low dimensional # embedding optimization. Values higher than one will # result in greater weight being given to negative samples. # PCA------------------------------------------------------------------- channelExclusionsPCA: [] # (strs) Immunomarkers to exclude from PCA analysis. samplesToRemovePCA: [] # (list of strs) Samples to exclude from PCA analysis. dimensionPCA: 2 # (int) Number of PCs to compute. pointSize: 90.0 # (float) scatter point size for sample scores plot. labelPoints: True # (bool) Annotate scatter points with condition abbreviations # from sampleMetadata configuration. distanceCutoff: 0.15 # (float) Maximum distance between data points in PCA scores plot to # be annotated with a common label. Useful for increasing visual clarity # of PCA plots containing many data points. Applicable when # labelPoints is True. conditionsToSilhouette: [] # (list of strs) List of abbreviated condition names whose corresponding #scores plot points will be greyed out, left unannotated, and sent to the back # of the plot (zorder). Useful for increasing visual clarity of PCA # plots containing many data points. # gating (optional)------------------------------------------------------------------- gating: True # (bool) Whether to perform SYLARAS-style gating on single-cell data. # Cell Syst. 2020 Sep 23;11(3):272-285.e9 PMID: 32898474 channelExclusionsGating: [] # (list of strs) Immunomarkers to exclude from gating. samplesToRemoveGating: [] # (list of strs) Samples to exclude from gating. vectorThreshold: 100 # (int) vizualize Boolean vectors with cell counts >= vectorThreshold classes: Tumor: definition: [+pan-CK, +KI67] subsets: [] # (dict) Boolean immunophenotype signatures. # +marker = immunopositive , -marker = immunonegative, marker = don't care # clustering------------------------------------------------------------------- embeddingAlgorithm: "UMAP" # (str) Embedding algorithm to use for clustering (options: "TSNE" or "UMAP"). channelExclusionsClustering: [] # (list of strs) Immunomarkers to exclude from clustering. samplesToRemoveClustering: [] # (list of strs) Samples to exclude from clustering. normalizeTissueCounts: True # (bool) Make the number of cells per tissue for clustering more similar # through sample-weighted random sampling. fracForEmbedding: 1.0 # (float) Fraction of cells to be embedded (range: 0.0-1.0). # Limits amount of data passed to downstream modules. dimensionEmbedding: 2 # (int) Dimension of the embedding (options: 2 or 3). topMarkers: "clusters" # (str) Normalization axis ("channels" or "clusters") used to define # highest expressed markers per cluster. colormapAnnotationClustering: "Sample" # (str) Metadata annotation to colormap the embedding: Sample or Condition. metric: "euclidean" # (str) Distance metric for computing embedding. # Choose from valid metrics used by scipy.spatial.distance.pdist: # "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", # "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski", # "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", # "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule". # -------------------------------------- # tSNE-specific configurations: # https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html perplexity: 50.0 # (float) Related to the number of nearest neighbors used in other # manifold learning algorithms. Larger datasets usually require # larger perplexity. Different values can result in significantly # different results. earlyExaggeration: 12.0 # (flaot) For larger values, the space between natural clusters # will be larger in the embedded space. learningRateTSNE: 200.0 # (float) tSNE learning rate (typically between 10.0 and 1000.0). randomStateTSNE: 5 # (int) Determines the random number generator for reproducible results # across multiple function calls. # -------------------------------------- # UMAP-specific configurations: # https://umap-learn.readthedocs.io/en/latest/api.html nNeighbors: 6 # (int) The size of local neighborhood (in terms of number of # neighboring sample points) used for manifold approximation. # Larger values result in more global views of the manifold, # while smaller values result in more local data being preserved. # In general values should be in the range 2 to 100. learningRateUMAP: 1.0 # (float) The initial learning rate for the embedding optimization. minDist: 0.1 # (float) The effective minimum distance between embedded points. # Smaller values will result in a more clustered/clumped # embedding where nearby points on the manifold are drawn # closer together, while larger values will result on a more # even dispersal of points. The value should be set relative # to the spread value, which determines the scale at which # embedded points will be spread out. repulsionStrength: 5.0 # (float) Weighting applied to negative samples in low dimensional # embedding optimization. Values higher than one will # result in greater weight being given to negative samples. randomStateUMAP: 5 # (int) Determines the random number generator for reproducible results # across multiple function calls. # frequencyStats------------------------------------------------------------------- controlGroups: ["CANCER-FALSE"] # (list of strs) Corresponds to control groups for each binary declaration # specified as the third elements of sampleMetadata values. denominatorCluster: null # (None type) Cluster to be used as the denominator when computing cluster # frequency ratios. Set to null first, change to cluster integer number # to normalize cluster frequencies to a particular cluster if desired. FDRCorrection: False # (bool) Whether to compute p-vals and false discovery rate (FDR)-corrected # q-vals (True) or compute uncorrected p-vals only (False). # curateThumbnails------------------------------------------------------------- numThumbnails: 25 # (int) Number of examples per cluster to be curated. topMarkersThumbnails: "clusters" # (str) Normalization axis ("channels" or "clusters") used to define # highest expressed markers per cluster. windowSize: 30 # (int) Number of pixels in x and y dimensions per thumbnail. segOutlines: True # (bool) Whether to overlay cell segmentation outlines on thumbnail images.