From ac120039b1210943ba4b714fe879c0727b4e7584 Mon Sep 17 00:00:00 2001 From: mizraelson <18702359+mizraelson@users.noreply.github.com> Date: Mon, 29 Jul 2024 07:00:16 +0000 Subject: [PATCH] regression tests automated change --- ...r-umi-drivermap-air-bcr-spikein-1-1-1.yaml | 602 ++++++++++++++++++ ...-umi-drivermap-air-bcr-spikein-16-4-1.yaml | 602 ++++++++++++++++++ ...r-umi-drivermap-air-tcr-spikein-1-1-1.yaml | 575 +++++++++++++++++ ...-umi-drivermap-air-tcr-spikein-16-4-1.yaml | 575 +++++++++++++++++ regression/presets/list.txt | 8 + 5 files changed, 2362 insertions(+) create mode 100644 regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-bcr-spikein-1-1-1.yaml create mode 100644 regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-bcr-spikein-16-4-1.yaml create mode 100644 regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-tcr-spikein-1-1-1.yaml create mode 100644 regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-tcr-spikein-16-4-1.yaml diff --git a/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-bcr-spikein-1-1-1.yaml b/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-bcr-spikein-1-1-1.yaml new file mode 100644 index 000000000..253f71334 --- /dev/null +++ b/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-bcr-spikein-1-1-1.yaml @@ -0,0 +1,602 @@ +flags: [] +pipeline: + - align + - refineTagsAndSort + - assemble + - exportClones +align: + species: hsa + libraryName: default + trimmingQualityThreshold: 10 + trimmingWindowSize: 6 + chains: ALL + replaceWildcards: true + overlapPairedReads: true + bamDropNonVDJ: false + writeFailedAlignments: false + tagPattern: "^(R1:*) \\ ^(UMI:N{18})(R2:*)" + tagUnstranded: false + tagMaxBudget: 10.0 + headerExtractors: [] + readIdAsCellTag: false + sampleTable: null + tagsValidations: + - type: MustContainTagType + tagType: Molecule + splitBySample: true + limit: null + parameters: + vParameters: + geneFeatureToAlign: VTranscriptWithP + minSumScore: 150 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner2 + mapperNValue: 8 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: true + mapperAbsoluteMinClusterScore: 102 + mapperExtraClusterScore: -38 + mapperMatchScore: 98 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 8 + mapperMaxClusters: 11 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 103 + mapperRelativeMinScore: 0.81 + mapperMinSeedsDistance: 8 + mapperMaxSeedsDistance: 8 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 2 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + dParameters: + geneFeatureToAlign: DRegionWithP + relativeMinScore: 0.85 + absoluteMinScore: 25.0 + maxHits: 3 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + jParameters: + geneFeatureToAlign: JRegionWithP + minSumScore: 150 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner2 + mapperNValue: 8 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: false + mapperAbsoluteMinClusterScore: 102 + mapperExtraClusterScore: -38 + mapperMatchScore: 95 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 6 + mapperMaxClusters: 4 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 100 + mapperRelativeMinScore: 0.8 + mapperMinSeedsDistance: 5 + mapperMaxSeedsDistance: 5 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 3 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + cParameters: + geneFeatureToAlign: CExon1 + minSumScore: 40 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner + mapperKValue: 5 + floatingLeftBound: false + floatingRightBound: true + mapperAbsoluteMinScore: 1.5 + mapperRelativeMinScore: 0.75 + mapperMatchScore: 1.0 + mapperMismatchPenalty: -0.1 + mapperOffsetShiftPenalty: -0.3 + mapperMinSeedsDistance: 4 + mapperMaxSeedsDistance: 10 + minAlignmentLength: 15 + maxAdjacentIndels: 2 + alignmentStopPenalty: -1000 + absoluteMinScore: 40.0 + relativeMinScore: 0.87 + maxHits: 7 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + vjAlignmentOrder: VThenJ + libraryStructure: Unknown + includeDScore: false + includeCScore: false + minSumScore: 120.0 + relativeMinVFR3CDR3Score: 0.7 + allowPartialAlignments: false + allowNoCDR3PartAlignments: false + allowChimeras: false + readsLayout: DirectOnly + mergerParameters: + qualityMergingAlgorithm: MaxSubtraction + partsLayout: null + minimalOverlap: 13 + minimalMatchQualitySum: 364 + maxQuality: 50 + minimalIdentity: 0.7 + identityType: MinimalQualityWeighted + fixSeed: true + alignmentBoundaryTolerance: 5 + minChimeraDetectionScore: 120 + vjOverlapWindow: 3 + saveOriginalSequence: false + saveOriginalReads: false + smartForceEdgeAlignments: true +refineTagsAndSort: + whitelists: {} + runCorrection: true + parameters: + correctionPower: 0.001 + backgroundSubstitutionRate: 0.001 + backgroundIndelRate: 1.0E-5 + minQuality: 12 + maxSubstitutions: 2 + maxIndels: 2 + maxTotalErrors: 3 + postFilter: + type: filter_groups + groupingKeys: + - allTags:Molecule + predicates: + - metrics: + - type: group_metric_sum_weight + reportHist: + log: true + binNumber: 0 + minBinWidth: 0.2 + multiplyWeightByKey: false + operator: + type: group_operator_lowest_threshold + operators: + - type: group_operator_advanced_thresholding + algo: + type: otsu + histTransformers: + - type: inflate + fraction: 0.15 + value: 1.0 + logX: true + minimalSample: 20 + fallbackThreshold: 1.0 + accept: High + - type: group_operator_cumtop + share: 0.85 + round: Down + accept: High + accept: High + expectedSorting: [] + requiredSequences: [] + expectedSorting: + - allTags:Molecule + requiredSequences: [] +assemblePartial: + overlappedOnly: false + dropPartial: false + cellLevel: false + parameters: + kValue: 12 + kOffset: -7 + minimalAssembleOverlap: 12 + minimalNOverlap: 7 + minimalNOverlapShare: 0.65 + minimalAlignmentMergeIdentity: 0.85 + mergerParameters: + qualityMergingAlgorithm: SumSubtraction + partsLayout: CollinearDirect + minimalOverlap: 20 + minimalMatchQualitySum: 0 + maxQuality: 45 + minimalIdentity: 0.95 + identityType: Unweighted + maxLeftParts: 256000 + maxLeftMatches: 6144 +extend: + vAnchor: CDR3Begin + jAnchor: FR4Begin + minimalVScore: 50 + minimalJScore: 50 +assemble: + sortBySequence: false + clnaOutput: false + cellLevel: false + consensusAssemblerParameters: + assembler: + aAssemblerParameters: + bandWidth: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -4)" + gapPenalty: -14 + minAlignmentScore: 40 + maxNormalizedAlignmentPenalty: 0.2 + trimMinimalSumQuality: 0 + trimReferenceRegion: false + maxQuality: 45 + maxIterations: 6 + minAltSeedQualityScore: 11 + minAltSeedNormalizedPenalty: 0.35 + altSeedPenaltyTolerance: 0.5 + minRecordSharePerConsensus: 0.2 + minRecordsPerConsensus: 0 + minRecursiveRecordShare: 0.4 + minQualityScore: 0 + maxConsensuses: 3 + minTagSuffixShare: 0.0 + isolateChains: false + cloneAssemblerParameters: + assemblingFeatures: + - CDR3 + minimalClonalSequenceLength: 12 + qualityAggregationType: BetaScore + cloneClusteringParameters: + searchDepth: 2 + allowedMutationsInNRegions: 1 + searchParameters: twoMismatchesOrIndels + clusteringFilter: + type: advanced + correctionPower: 0.001 + backgroundSubstitutionRate: 5.0E-4 + backgroundIndelRate: 2.0E-4 + cloneFactoryParameters: + vParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + jParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + cParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + dParameters: + relativeMinScore: null + absoluteMinScore: null + maxHits: null + scoring: null + separateByV: false + separateByJ: false + separateByC: true + maximalPreClusteringRatio: 1.0 + preClusteringScoreFilteringRatio: 2.0 + preClusteringCountFilteringRatio: 2.0 + addReadsCountOnClustering: false + badQualityThreshold: 15 + maxBadPointsPercent: 0.7 + mappingThreshold: 2of5 + minimalQuality: 20 + postFilters: null + inferMinRecordsPerConsensus: true +assembleContigs: + ignoreTags: false + parameters: + branchingMinimalQualityShare: 0.1 + branchingMinimalSumQuality: 60 + decisiveBranchingSumQualityThreshold: 120 + alignedSequenceEdgeDelta: 3 + alignmentEdgeRegionSize: 7 + minimalNonEdgePointsFraction: 0.25 + minimalMeanNormalizedQuality: 5.0 + outputMinimalQualityShare: 0.75 + outputMinimalSumQuality: 0 + subCloningRegions: null + assemblingRegions: null + postFiltering: + type: NoFiltering + trimmingParameters: + averageQualityThreshold: 10.0 + windowSize: 8 + minimalContigLength: 20 + alignedRegionsOnly: false + discardAmbiguousNucleotideCalls: false +exportAlignments: + chains: ALL + noHeader: false + fields: + - field: -readIds + - field: -tags + args: + - Molecule + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + - field: -topChains +exportClones: + splitByTagType: null + filterOutOfFrames: false + filterStops: false + chains: ALL + noHeader: false + fields: + - field: -cloneId + - field: -readCount + - field: -readFraction + - field: -uniqueTagCount + args: + - Molecule + - field: -uniqueTagFraction + args: + - Molecule + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + splitFilesBy: + - chain + groupClonesBy: [] +qc: + checks: + - type: SuccessfullyAlignedReads + upper: 0.85 + middle: 0.7 + label: Successfully aligned reads + - type: OffTargetReads + upper: 0.2 + middle: 0.1 + label: Off target (non TCR/IG) reads + - type: ReadsWithNoVOrJHits + upper: 0.2 + middle: 0.1 + label: Reads with no V or J hits + - type: AlignmentsWithNoAssemblingFeature + upper: 0.15 + middle: 0.05 + label: Alignments without assembling feature + - type: ReadsWithNoBarcode + upper: 0.2 + middle: 0.1 + label: Reads with no barcode + - type: ReadsDroppedInTagRefinement + upper: 0.1 + middle: 0.05 + label: Reads dropped in tags error correction and filtering + - type: TagArtificialDiversityEliminated + tag: UMI + upper: 0.5 + middle: 0.3 + label: UMIs artificial diversity eliminated + - type: ReadsDroppedInTagCorrection + tag: UMI + upper: 0.1 + middle: 0.05 + label: Reads dropped in UMI error correction and whitelist + - type: ReadsDroppedInTagFiltering + upper: 0.1 + middle: 0.05 + label: Reads dropped in tags filtering + - type: TagGroupsWithNoAssemblingFeature + upper: 0.2 + middle: 0.1 + label: Tag groups with no assembling feature + - type: TagGroupsWithMultipleConsensuses + upper: 0.1 + middle: 0.01 + label: Barcode collisions in clonotype assembly + - type: UnassignedAlignments + upper: 0.1 + middle: 0.05 + label: Unassigned alignments in clonotype assembly + - type: ReadsUsedInClonotypes + upper: 0.9 + middle: 0.7 + label: Reads used in clonotypes + - type: AlignmentsDroppedLowQuality + upper: 0.05 + middle: 0.01 + label: Alignments dropped due to low sequence quality + - type: ClonesDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Clones dropped in post-filtering + - type: AlignmentsDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Alignments dropped in clones post-filtering + - type: OverlappedReadsMoreBetter + upper: 0.9 + middle: 0.8 + label: Overlapped paired-end reads + - type: SpikeInControls + alphabet: Nucleotide + geneFeature: CDR3 + error: 0.15 + abundanceMeasure: Molecule + spikeIns: + - label: BCR spike-in-1a + seq: TGTGCGAGATGCGCGATGGAACTGCTGATTGCGACCCGCGAAGAATGG + portion: 1.0 + - label: BCR spike-in-1b + seq: TGTGCGAGATGCGCCATGGATCTGCTCATTGCGACCCGCGAAGAATGG + portion: 1.0 + - label: BCR spike-in-1c + seq: TGTGCGAGATGCGCTATGGACCTGCTAATTGCGACCCGCGAAGAATGG + portion: 1.0 + - label: BCR spike-in-2a + seq: TGTGCGAGAGGCCGCGCCCCGGAAAGCGCCCTGGCCGATTGG + portion: 1.0 + - label: BCR spike-in-2b + seq: TGTGCGAGAGGCCGGGCCCCCGAAAGGGCCCTGGCCGATTGG + portion: 1.0 + - label: BCR spike-in-2c + seq: TGTGCGAGAGGCCGAGCCCCTGAAAGAGCCCTGGCCGATTGG + portion: 1.0 + - label: BCR spike-in-3a + seq: TGTGCGAGAGCTCCTCCTCTTGAAACTATTAATATTTCTTGG + portion: 1.0 + - label: BCR spike-in-3b + seq: TGTGCGAGAGCTCCACCTCTAGAAACGATTAATATTTCTTGG + portion: 1.0 + - label: BCR spike-in-3c + seq: TGTGCGAGAGCTCCGCCTCTCGAAACAATTAATATTTCTTGG + portion: 1.0 + - label: BCR spike-in-4a + seq: TGTGCGAGATATGCGAACGATCGCGAAGCGAGCTATTGG + portion: 1.0 + - label: BCR spike-in-4b + seq: TGTGCGAGATATGCCAACGAACGCGATGCGAGCTATTGG + portion: 1.0 + - label: BCR spike-in-4c + seq: TGTGCGAGATATGCAAACGAGCGCGACGCGAGCTATTGG + portion: 1.0 + - label: BCR spike-in-5a + seq: TGTGCGAGACGCGAAATCAACGACGAAGAACGCTGG + portion: 1.0 + - label: BCR spike-in-5b + seq: TGTGCGAGACGCGATATCAAGGACGATGAACGCTGG + portion: 1.0 + - label: BCR spike-in-5c + seq: TGTGCGAGACGCGAGATCAAAGACGACGAACGCTGG + portion: 1.0 + - label: BCR spike-in-6a + seq: TGTGCGAGAAACGAGTGGTGGCACATCAGCAAGGAGTACTGG + portion: 1.0 + - label: BCR spike-in-6b + seq: TGTGCGAGAAACGACTGGTGGCAGATCAGGAAGGAGTACTGG + portion: 1.0 + - label: BCR spike-in-6c + seq: TGTGCGAGAAACGAATGGTGGCATATCAGAAAGGAGTACTGG + portion: 1.0 + - label: BCR spike-in-7a + seq: TGTGCGAGATTTGCGACCTGCCATATTTGCAAAGAAAACTGG + portion: 1.0 + - label: BCR spike-in-7b + seq: TGTGCGAGATTTGCCACCTGCCAAATTTGCAATGAAAACTGG + portion: 1.0 + - label: BCR spike-in-7c + seq: TGTGCGAGATTTGCAACCTGCCAGATTTGCAACGAAAACTGG + portion: 1.0 + - label: BCR spike-in-8a + seq: TGTGCGAGAAGCGAGCTGTTCATCGAGAGCACCATCTGCAAGTGG + portion: 1.0 + - label: BCR spike-in-8b + seq: TGTGCGAGAAGCGACCTGTTCATGGAGAGCAGCATCTGCAAGTGG + portion: 1.0 + - label: BCR spike-in-8c + seq: TGTGCGAGAAGCGAACTGTTCATTGAGAGCATCATCTGCAAGTGG + portion: 1.0 + - label: BCR spike-in-9a + seq: TGTGCGAGAAGCAGCCAGATCAGGAGGGAGCTGTGG + portion: 1.0 + - label: BCR spike-in-9b + seq: TGTGCGAGAAGCAGGCAGATGAGGAGCGAGCTGTGG + portion: 1.0 + - label: BCR spike-in-9c + seq: TGTGCGAGAAGCAGACAGATTAGGAGTGAGCTGTGG + portion: 1.0 + - label: BCR spike-in-10a + seq: TGTGCGAGAGCCAACGGCGAGCTGTTCGCCCTGCTGAGCTGG + portion: 1.0 + - label: BCR spike-in-10b + seq: TGTGCGAGAGCCAAGGGCGACCTGTTCGCGCTGCTGAGCTGG + portion: 1.0 + - label: BCR spike-in-10c + seq: TGTGCGAGAGCCAAAGGCGATCTGTTCGCACTGCTGAGCTGG + portion: 1.0 + - label: BCR spike-in-11a + seq: TGTGCGAGAGGCAGGGCCTACGCCCTGGACGAGAGGTGG + portion: 1.0 + - label: BCR spike-in-11b + seq: TGTGCGAGAGGCAGCGCCTACGCGCTGGATGAGAGGTGG + portion: 1.0 + - label: BCR spike-in-11c + seq: TGTGCGAGAGGCAGAGCCTACGCACTGGAGGAGAGGTGG + portion: 1.0 + - label: BCR spike-in-12a + seq: TGTGCGAGAAGCTGGATCAGCAGCTGCCATGCCCGCGATTGG + portion: 1.0 + - label: BCR spike-in-12b + seq: TGTGCGAGAAGCTGGATGAGCAGATGCCATCCCCGCGATTGG + portion: 1.0 + - label: BCR spike-in-12c + seq: TGTGCGAGAAGCTGGATGAGCAGATGCCATTCCCGCGATTGG + portion: 1.0 + - label: BCR spike-in-13a + seq: TGTGCGAGAATCATGCCGGCCTTGGCCCCGGCCTTGGCCTGG + portion: 1.0 + - label: BCR spike-in-13b + seq: TGTGCGAGAATCATCCCGGCGTTGGCCCCAGCCTTGGCCTGG + portion: 1.0 + - label: BCR spike-in-13c + seq: TGTGCGAGAATCATACCGGCATTGGCCCCCGCCTTGGCCTGG + portion: 1.0 + - label: BCR spike-in-14a + seq: TGTGCGAGAGATCGCAGCGCGATTCGCGAAGATGATTATTGG + portion: 1.0 + - label: BCR spike-in-14b + seq: TGTGCGAGAGATCGGAGCGCAATTCGCGAGGATGATTATTGG + portion: 1.0 + - label: BCR spike-in-14c + seq: TGTGCGAGAGATCGTAGCGCCATTCGCGATGATGATTATTGG + portion: 1.0 + - label: BCR spike-in-15a + seq: TGTATGCAACTAGGTAATAGTTGGATCACTTTT + portion: 1.0 + - label: BCR spike-in-15b + seq: TGTATGCAACTAGGAAATAGGTGCATCACTTTT + portion: 1.0 + - label: BCR spike-in-15c + seq: TGTATGCAACTAGGGAATAGATGTATCACTTTT + portion: 1.0 + - label: BCR spike-in-16a + seq: TGCGGGTCTTCCCTGTCAGGCTCATGCGTCTTC + portion: 1.0 + - label: BCR spike-in-16b + seq: TGCGGGTCTTCCCTCTCAGGATCATGGGTCTTC + portion: 1.0 + - label: BCR spike-in-16c + seq: TGCGGGTCTTCCCTATCAGGGTCATGTGTCTTC + portion: 1.0 + label: "" diff --git a/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-bcr-spikein-16-4-1.yaml b/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-bcr-spikein-16-4-1.yaml new file mode 100644 index 000000000..9f516f771 --- /dev/null +++ b/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-bcr-spikein-16-4-1.yaml @@ -0,0 +1,602 @@ +flags: [] +pipeline: + - align + - refineTagsAndSort + - assemble + - exportClones +align: + species: hsa + libraryName: default + trimmingQualityThreshold: 10 + trimmingWindowSize: 6 + chains: ALL + replaceWildcards: true + overlapPairedReads: true + bamDropNonVDJ: false + writeFailedAlignments: false + tagPattern: "^(R1:*) \\ ^(UMI:N{18})(R2:*)" + tagUnstranded: false + tagMaxBudget: 10.0 + headerExtractors: [] + readIdAsCellTag: false + sampleTable: null + tagsValidations: + - type: MustContainTagType + tagType: Molecule + splitBySample: true + limit: null + parameters: + vParameters: + geneFeatureToAlign: VTranscriptWithP + minSumScore: 150 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner2 + mapperNValue: 8 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: true + mapperAbsoluteMinClusterScore: 102 + mapperExtraClusterScore: -38 + mapperMatchScore: 98 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 8 + mapperMaxClusters: 11 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 103 + mapperRelativeMinScore: 0.81 + mapperMinSeedsDistance: 8 + mapperMaxSeedsDistance: 8 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 2 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + dParameters: + geneFeatureToAlign: DRegionWithP + relativeMinScore: 0.85 + absoluteMinScore: 25.0 + maxHits: 3 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + jParameters: + geneFeatureToAlign: JRegionWithP + minSumScore: 150 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner2 + mapperNValue: 8 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: false + mapperAbsoluteMinClusterScore: 102 + mapperExtraClusterScore: -38 + mapperMatchScore: 95 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 6 + mapperMaxClusters: 4 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 100 + mapperRelativeMinScore: 0.8 + mapperMinSeedsDistance: 5 + mapperMaxSeedsDistance: 5 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 3 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + cParameters: + geneFeatureToAlign: CExon1 + minSumScore: 40 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner + mapperKValue: 5 + floatingLeftBound: false + floatingRightBound: true + mapperAbsoluteMinScore: 1.5 + mapperRelativeMinScore: 0.75 + mapperMatchScore: 1.0 + mapperMismatchPenalty: -0.1 + mapperOffsetShiftPenalty: -0.3 + mapperMinSeedsDistance: 4 + mapperMaxSeedsDistance: 10 + minAlignmentLength: 15 + maxAdjacentIndels: 2 + alignmentStopPenalty: -1000 + absoluteMinScore: 40.0 + relativeMinScore: 0.87 + maxHits: 7 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + vjAlignmentOrder: VThenJ + libraryStructure: Unknown + includeDScore: false + includeCScore: false + minSumScore: 120.0 + relativeMinVFR3CDR3Score: 0.7 + allowPartialAlignments: false + allowNoCDR3PartAlignments: false + allowChimeras: false + readsLayout: DirectOnly + mergerParameters: + qualityMergingAlgorithm: MaxSubtraction + partsLayout: null + minimalOverlap: 13 + minimalMatchQualitySum: 364 + maxQuality: 50 + minimalIdentity: 0.7 + identityType: MinimalQualityWeighted + fixSeed: true + alignmentBoundaryTolerance: 5 + minChimeraDetectionScore: 120 + vjOverlapWindow: 3 + saveOriginalSequence: false + saveOriginalReads: false + smartForceEdgeAlignments: true +refineTagsAndSort: + whitelists: {} + runCorrection: true + parameters: + correctionPower: 0.001 + backgroundSubstitutionRate: 0.001 + backgroundIndelRate: 1.0E-5 + minQuality: 12 + maxSubstitutions: 2 + maxIndels: 2 + maxTotalErrors: 3 + postFilter: + type: filter_groups + groupingKeys: + - allTags:Molecule + predicates: + - metrics: + - type: group_metric_sum_weight + reportHist: + log: true + binNumber: 0 + minBinWidth: 0.2 + multiplyWeightByKey: false + operator: + type: group_operator_lowest_threshold + operators: + - type: group_operator_advanced_thresholding + algo: + type: otsu + histTransformers: + - type: inflate + fraction: 0.15 + value: 1.0 + logX: true + minimalSample: 20 + fallbackThreshold: 1.0 + accept: High + - type: group_operator_cumtop + share: 0.85 + round: Down + accept: High + accept: High + expectedSorting: [] + requiredSequences: [] + expectedSorting: + - allTags:Molecule + requiredSequences: [] +assemblePartial: + overlappedOnly: false + dropPartial: false + cellLevel: false + parameters: + kValue: 12 + kOffset: -7 + minimalAssembleOverlap: 12 + minimalNOverlap: 7 + minimalNOverlapShare: 0.65 + minimalAlignmentMergeIdentity: 0.85 + mergerParameters: + qualityMergingAlgorithm: SumSubtraction + partsLayout: CollinearDirect + minimalOverlap: 20 + minimalMatchQualitySum: 0 + maxQuality: 45 + minimalIdentity: 0.95 + identityType: Unweighted + maxLeftParts: 256000 + maxLeftMatches: 6144 +extend: + vAnchor: CDR3Begin + jAnchor: FR4Begin + minimalVScore: 50 + minimalJScore: 50 +assemble: + sortBySequence: false + clnaOutput: false + cellLevel: false + consensusAssemblerParameters: + assembler: + aAssemblerParameters: + bandWidth: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -4)" + gapPenalty: -14 + minAlignmentScore: 40 + maxNormalizedAlignmentPenalty: 0.2 + trimMinimalSumQuality: 0 + trimReferenceRegion: false + maxQuality: 45 + maxIterations: 6 + minAltSeedQualityScore: 11 + minAltSeedNormalizedPenalty: 0.35 + altSeedPenaltyTolerance: 0.5 + minRecordSharePerConsensus: 0.2 + minRecordsPerConsensus: 0 + minRecursiveRecordShare: 0.4 + minQualityScore: 0 + maxConsensuses: 3 + minTagSuffixShare: 0.0 + isolateChains: false + cloneAssemblerParameters: + assemblingFeatures: + - CDR3 + minimalClonalSequenceLength: 12 + qualityAggregationType: BetaScore + cloneClusteringParameters: + searchDepth: 2 + allowedMutationsInNRegions: 1 + searchParameters: twoMismatchesOrIndels + clusteringFilter: + type: advanced + correctionPower: 0.001 + backgroundSubstitutionRate: 5.0E-4 + backgroundIndelRate: 2.0E-4 + cloneFactoryParameters: + vParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + jParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + cParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + dParameters: + relativeMinScore: null + absoluteMinScore: null + maxHits: null + scoring: null + separateByV: false + separateByJ: false + separateByC: true + maximalPreClusteringRatio: 1.0 + preClusteringScoreFilteringRatio: 2.0 + preClusteringCountFilteringRatio: 2.0 + addReadsCountOnClustering: false + badQualityThreshold: 15 + maxBadPointsPercent: 0.7 + mappingThreshold: 2of5 + minimalQuality: 20 + postFilters: null + inferMinRecordsPerConsensus: true +assembleContigs: + ignoreTags: false + parameters: + branchingMinimalQualityShare: 0.1 + branchingMinimalSumQuality: 60 + decisiveBranchingSumQualityThreshold: 120 + alignedSequenceEdgeDelta: 3 + alignmentEdgeRegionSize: 7 + minimalNonEdgePointsFraction: 0.25 + minimalMeanNormalizedQuality: 5.0 + outputMinimalQualityShare: 0.75 + outputMinimalSumQuality: 0 + subCloningRegions: null + assemblingRegions: null + postFiltering: + type: NoFiltering + trimmingParameters: + averageQualityThreshold: 10.0 + windowSize: 8 + minimalContigLength: 20 + alignedRegionsOnly: false + discardAmbiguousNucleotideCalls: false +exportAlignments: + chains: ALL + noHeader: false + fields: + - field: -readIds + - field: -tags + args: + - Molecule + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + - field: -topChains +exportClones: + splitByTagType: null + filterOutOfFrames: false + filterStops: false + chains: ALL + noHeader: false + fields: + - field: -cloneId + - field: -readCount + - field: -readFraction + - field: -uniqueTagCount + args: + - Molecule + - field: -uniqueTagFraction + args: + - Molecule + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + splitFilesBy: + - chain + groupClonesBy: [] +qc: + checks: + - type: SuccessfullyAlignedReads + upper: 0.85 + middle: 0.7 + label: Successfully aligned reads + - type: OffTargetReads + upper: 0.2 + middle: 0.1 + label: Off target (non TCR/IG) reads + - type: ReadsWithNoVOrJHits + upper: 0.2 + middle: 0.1 + label: Reads with no V or J hits + - type: AlignmentsWithNoAssemblingFeature + upper: 0.15 + middle: 0.05 + label: Alignments without assembling feature + - type: ReadsWithNoBarcode + upper: 0.2 + middle: 0.1 + label: Reads with no barcode + - type: ReadsDroppedInTagRefinement + upper: 0.1 + middle: 0.05 + label: Reads dropped in tags error correction and filtering + - type: TagArtificialDiversityEliminated + tag: UMI + upper: 0.5 + middle: 0.3 + label: UMIs artificial diversity eliminated + - type: ReadsDroppedInTagCorrection + tag: UMI + upper: 0.1 + middle: 0.05 + label: Reads dropped in UMI error correction and whitelist + - type: ReadsDroppedInTagFiltering + upper: 0.1 + middle: 0.05 + label: Reads dropped in tags filtering + - type: TagGroupsWithNoAssemblingFeature + upper: 0.2 + middle: 0.1 + label: Tag groups with no assembling feature + - type: TagGroupsWithMultipleConsensuses + upper: 0.1 + middle: 0.01 + label: Barcode collisions in clonotype assembly + - type: UnassignedAlignments + upper: 0.1 + middle: 0.05 + label: Unassigned alignments in clonotype assembly + - type: ReadsUsedInClonotypes + upper: 0.9 + middle: 0.7 + label: Reads used in clonotypes + - type: AlignmentsDroppedLowQuality + upper: 0.05 + middle: 0.01 + label: Alignments dropped due to low sequence quality + - type: ClonesDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Clones dropped in post-filtering + - type: AlignmentsDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Alignments dropped in clones post-filtering + - type: OverlappedReadsMoreBetter + upper: 0.9 + middle: 0.8 + label: Overlapped paired-end reads + - type: SpikeInControls + alphabet: Nucleotide + geneFeature: CDR3 + error: 0.15 + abundanceMeasure: Molecule + spikeIns: + - label: BCR Spike-in-1a + seq: tgtgcgagatgcgcgatggaactgctgattgcgacccgcgaagaatgg + portion: 16.0 + - label: BCR Spike-in-1b + seq: tgtgcgagatgcgccatggatctgctcattgcgacccgcgaagaatgg + portion: 4.0 + - label: BCR Spike-in-1c + seq: tgtgcgagatgcgctatggacctgctaattgcgacccgcgaagaatgg + portion: 1.0 + - label: BCR Spike-in-2a + seq: tgtgcgagaggccgcgccccggaaagcgccctggccgattgg + portion: 16.0 + - label: BCR Spike-in-2b + seq: tgtgcgagaggccgggcccccgaaagggccctggccgattgg + portion: 4.0 + - label: BCR Spike-in-2c + seq: tgtgcgagaggccgagccccctgaaagagccctggccgattgg + portion: 1.0 + - label: BCR Spike-in-3a + seq: tgtgcgagagctcctcctcttgaaactattaatatttcttgg + portion: 16.0 + - label: BCR Spike-in-3b + seq: tgtgcgagagctccacctctagaaacgattaatatttcttgg + portion: 4.0 + - label: BCR Spike-in-3c + seq: tgtgcgagagctccgcctctcgaaacaattaatatttcttgg + portion: 1.0 + - label: BCR Spike-in-4a + seq: tgtgcgagatatgcgaacgatcgcgaagcgagctattgg + portion: 16.0 + - label: BCR Spike-in-4b + seq: tgtgcgagatatgccaacgaacgcgatgcgagctattgg + portion: 4.0 + - label: BCR Spike-in-4c + seq: tgtgcgagatatgcaaacgagcgcgacgcgagctattgg + portion: 1.0 + - label: BCR Spike-in-5a + seq: tgtgcgagacgcgaaatcaacgacgaagaacgctgg + portion: 16.0 + - label: BCR Spike-in-5b + seq: tgtgcgagacgcgatatcaaggacgatgaacgctgg + portion: 4.0 + - label: BCR Spike-in-5c + seq: tgtgcgagacgcgagatcaaagacgacgaacgctgg + portion: 1.0 + - label: BCR Spike-in-6a + seq: tgtgcgagaaacgagtggtggcacatcagcaaggagtactgg + portion: 16.0 + - label: BCR Spike-in-6b + seq: tgtgcgagaaacgactggtggcagatcaggaaggagtactgg + portion: 4.0 + - label: BCR Spike-in-6c + seq: tgtgcgagaaacgactggtggcagatcaggaaggagtactgg + portion: 1.0 + - label: BCR Spike-in-7a + seq: tgtgcgagatttgcgacctgccatatttgcaaagaaaactgg + portion: 16.0 + - label: BCR Spike-in-7b + seq: tgtgcgagatttgccacctgccaaatttgcaatgaaaactgg + portion: 4.0 + - label: BCR Spike-in-7c + seq: tgtgcgagatttgcaacctgccagatttgcaacgaaaactgg + portion: 1.0 + - label: BCR Spike-in-8a + seq: tgtgcgagaagcgagctgttcatcgagagcaccatctgcaagtgg + portion: 16.0 + - label: BCR Spike-in-8b + seq: tgtgcgagaagcgacctgttcatggagagcagcatctgcaagtgg + portion: 4.0 + - label: BCR Spike-in-8c + seq: tgtgcgagaagcgaactgttcattgagagcatcatctgcaagtgg + portion: 1.0 + - label: BCR Spike-in-9a + seq: tgtgcgagaagcagccagatcaggagggagctgtgg + portion: 16.0 + - label: BCR Spike-in-9b + seq: tgtgcgagaagcaggcagatgaggagcgagctgtgg + portion: 4.0 + - label: BCR Spike-in-9c + seq: tgtgcgagaagcagacagattaggagtgagctgtgg + portion: 1.0 + - label: BCR Spike-in-10a + seq: tgtgcgagagccaacggcgagctgttcgccctgctgagctgg + portion: 16.0 + - label: BCR Spike-in-10b + seq: tgtgcgagagccaagggcgacctgttcgcgctgctgagctgg + portion: 4.0 + - label: BCR Spike-in-10c + seq: tgtgcgagagccaaaggcgatctgttcgcactgctgagctgg + portion: 1.0 + - label: BCR Spike-in-11a + seq: tgtgcgagaggcagggcctacgccctggacgagaggtgg + portion: 16.0 + - label: BCR Spike-in-11b + seq: tgtgcgagaggcagcgcctacgcgctggatgagaggtgg + portion: 4.0 + - label: BCR Spike-in-11c + seq: tgtgcgagaggcagagcctacgcactggaggagaggtgg + portion: 1.0 + - label: BCR Spike-in-12a + seq: tgtgcgagaagctggatcagcagctgccatgcccgcgattgg + portion: 16.0 + - label: BCR Spike-in-12b + seq: tgtgcgagaagctggatgagcagatgccatccccgcgattgg + portion: 4.0 + - label: BCR Spike-in-12c + seq: tgtgcgagaagctggatgagcagatgccattcccgcgattgg + portion: 1.0 + - label: BCR Spike-in-13a + seq: tgtgcgagaatcatgccggccttggccccggccttggcctgg + portion: 16.0 + - label: BCR Spike-in-13b + seq: tgtgcgagaatcatcccggcgttggccccagccttggcctgg + portion: 4.0 + - label: BCR Spike-in-13c + seq: tgtgcgagaatcataccggcattggcccccgccttggcctgg + portion: 1.0 + - label: BCR Spike-in-14a + seq: tgtgcgagagatcgcagcgcgattcgcgaagatgattattgg + portion: 16.0 + - label: BCR Spike-in-14b + seq: tgtgcgagagatcggagcgcaattcgcgaggatgattattgg + portion: 4.0 + - label: BCR Spike-in-14c + seq: tgtgcgagagatcgtagcgccattcgcgatgatgattattgg + portion: 1.0 + - label: BCR Spike-in-15a + seq: TGTATGCAACTAGGTAATAGTTGGATCACTTTT + portion: 16.0 + - label: BCR Spike-in-15b + seq: TGTATGCAACTAGGAAATAGGTGCATCACTTTT + portion: 4.0 + - label: BCR Spike-in-15c + seq: TGTATGCAACTAGGGAATAGATGTATCACTTTT + portion: 1.0 + - label: BCR Spike-in-16a + seq: TGCGGGTCTTCCCTGTCAGGCTCATGCGTCTTC + portion: 16.0 + - label: BCR Spike-in-16b + seq: TGCGGGTCTTCCCTCTCAGGATCATGGGTCTTC + portion: 4.0 + - label: BCR Spike-in-16c + seq: TGCGGGTCTTCCCTATCAGGGTCATGTGTCTTC + portion: 1.0 + label: "" diff --git a/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-tcr-spikein-1-1-1.yaml b/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-tcr-spikein-1-1-1.yaml new file mode 100644 index 000000000..c5964c0ac --- /dev/null +++ b/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-tcr-spikein-1-1-1.yaml @@ -0,0 +1,575 @@ +flags: [] +pipeline: + - align + - refineTagsAndSort + - assemble + - exportClones +align: + species: hsa + libraryName: default + trimmingQualityThreshold: 10 + trimmingWindowSize: 6 + chains: ALL + replaceWildcards: true + overlapPairedReads: true + bamDropNonVDJ: false + writeFailedAlignments: false + tagPattern: "^(R1:*) \\ ^(UMI:N{18})(R2:*)" + tagUnstranded: false + tagMaxBudget: 10.0 + headerExtractors: [] + readIdAsCellTag: false + sampleTable: null + tagsValidations: + - type: MustContainTagType + tagType: Molecule + splitBySample: true + limit: null + parameters: + vParameters: + geneFeatureToAlign: VTranscriptWithP + minSumScore: 150 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner2 + mapperNValue: 8 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: true + mapperAbsoluteMinClusterScore: 102 + mapperExtraClusterScore: -38 + mapperMatchScore: 98 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 8 + mapperMaxClusters: 11 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 103 + mapperRelativeMinScore: 0.81 + mapperMinSeedsDistance: 8 + mapperMaxSeedsDistance: 8 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 2 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + dParameters: + geneFeatureToAlign: DRegionWithP + relativeMinScore: 0.85 + absoluteMinScore: 25.0 + maxHits: 3 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + jParameters: + geneFeatureToAlign: JRegionWithP + minSumScore: 150 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner2 + mapperNValue: 8 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: false + mapperAbsoluteMinClusterScore: 102 + mapperExtraClusterScore: -38 + mapperMatchScore: 95 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 6 + mapperMaxClusters: 4 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 100 + mapperRelativeMinScore: 0.8 + mapperMinSeedsDistance: 5 + mapperMaxSeedsDistance: 5 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 3 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + cParameters: + geneFeatureToAlign: CExon1 + minSumScore: 40 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner + mapperKValue: 5 + floatingLeftBound: false + floatingRightBound: true + mapperAbsoluteMinScore: 1.5 + mapperRelativeMinScore: 0.75 + mapperMatchScore: 1.0 + mapperMismatchPenalty: -0.1 + mapperOffsetShiftPenalty: -0.3 + mapperMinSeedsDistance: 4 + mapperMaxSeedsDistance: 10 + minAlignmentLength: 15 + maxAdjacentIndels: 2 + alignmentStopPenalty: -1000 + absoluteMinScore: 40.0 + relativeMinScore: 0.87 + maxHits: 7 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + vjAlignmentOrder: VThenJ + libraryStructure: Unknown + includeDScore: false + includeCScore: false + minSumScore: 120.0 + relativeMinVFR3CDR3Score: 0.7 + allowPartialAlignments: false + allowNoCDR3PartAlignments: false + allowChimeras: false + readsLayout: DirectOnly + mergerParameters: + qualityMergingAlgorithm: MaxSubtraction + partsLayout: null + minimalOverlap: 13 + minimalMatchQualitySum: 364 + maxQuality: 50 + minimalIdentity: 0.7 + identityType: MinimalQualityWeighted + fixSeed: true + alignmentBoundaryTolerance: 5 + minChimeraDetectionScore: 120 + vjOverlapWindow: 3 + saveOriginalSequence: false + saveOriginalReads: false + smartForceEdgeAlignments: true +refineTagsAndSort: + whitelists: {} + runCorrection: true + parameters: + correctionPower: 0.001 + backgroundSubstitutionRate: 0.001 + backgroundIndelRate: 1.0E-5 + minQuality: 12 + maxSubstitutions: 2 + maxIndels: 2 + maxTotalErrors: 3 + postFilter: + type: filter_groups + groupingKeys: + - allTags:Molecule + predicates: + - metrics: + - type: group_metric_sum_weight + reportHist: + log: true + binNumber: 0 + minBinWidth: 0.2 + multiplyWeightByKey: false + operator: + type: group_operator_lowest_threshold + operators: + - type: group_operator_advanced_thresholding + algo: + type: otsu + histTransformers: + - type: inflate + fraction: 0.15 + value: 1.0 + logX: true + minimalSample: 20 + fallbackThreshold: 1.0 + accept: High + - type: group_operator_cumtop + share: 0.85 + round: Down + accept: High + accept: High + expectedSorting: [] + requiredSequences: [] + expectedSorting: + - allTags:Molecule + requiredSequences: [] +assemblePartial: + overlappedOnly: false + dropPartial: false + cellLevel: false + parameters: + kValue: 12 + kOffset: -7 + minimalAssembleOverlap: 12 + minimalNOverlap: 7 + minimalNOverlapShare: 0.65 + minimalAlignmentMergeIdentity: 0.85 + mergerParameters: + qualityMergingAlgorithm: SumSubtraction + partsLayout: CollinearDirect + minimalOverlap: 20 + minimalMatchQualitySum: 0 + maxQuality: 45 + minimalIdentity: 0.95 + identityType: Unweighted + maxLeftParts: 256000 + maxLeftMatches: 6144 +extend: + vAnchor: CDR3Begin + jAnchor: FR4Begin + minimalVScore: 50 + minimalJScore: 50 +assemble: + sortBySequence: false + clnaOutput: false + cellLevel: false + consensusAssemblerParameters: + assembler: + aAssemblerParameters: + bandWidth: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -4)" + gapPenalty: -14 + minAlignmentScore: 40 + maxNormalizedAlignmentPenalty: 0.2 + trimMinimalSumQuality: 0 + trimReferenceRegion: false + maxQuality: 45 + maxIterations: 6 + minAltSeedQualityScore: 11 + minAltSeedNormalizedPenalty: 0.35 + altSeedPenaltyTolerance: 0.5 + minRecordSharePerConsensus: 0.2 + minRecordsPerConsensus: 0 + minRecursiveRecordShare: 0.4 + minQualityScore: 0 + maxConsensuses: 3 + minTagSuffixShare: 0.0 + isolateChains: false + cloneAssemblerParameters: + assemblingFeatures: + - CDR3 + minimalClonalSequenceLength: 12 + qualityAggregationType: BetaScore + cloneClusteringParameters: + searchDepth: 2 + allowedMutationsInNRegions: 1 + searchParameters: twoMismatchesOrIndels + clusteringFilter: + type: advanced + correctionPower: 0.001 + backgroundSubstitutionRate: 5.0E-4 + backgroundIndelRate: 2.0E-4 + cloneFactoryParameters: + vParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + jParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + cParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + dParameters: + relativeMinScore: null + absoluteMinScore: null + maxHits: null + scoring: null + separateByV: false + separateByJ: false + separateByC: true + maximalPreClusteringRatio: 1.0 + preClusteringScoreFilteringRatio: 2.0 + preClusteringCountFilteringRatio: 2.0 + addReadsCountOnClustering: false + badQualityThreshold: 15 + maxBadPointsPercent: 0.7 + mappingThreshold: 2of5 + minimalQuality: 20 + postFilters: null + inferMinRecordsPerConsensus: true +assembleContigs: + ignoreTags: false + parameters: + branchingMinimalQualityShare: 0.1 + branchingMinimalSumQuality: 60 + decisiveBranchingSumQualityThreshold: 120 + alignedSequenceEdgeDelta: 3 + alignmentEdgeRegionSize: 7 + minimalNonEdgePointsFraction: 0.25 + minimalMeanNormalizedQuality: 5.0 + outputMinimalQualityShare: 0.75 + outputMinimalSumQuality: 0 + subCloningRegions: null + assemblingRegions: null + postFiltering: + type: NoFiltering + trimmingParameters: + averageQualityThreshold: 10.0 + windowSize: 8 + minimalContigLength: 20 + alignedRegionsOnly: false + discardAmbiguousNucleotideCalls: false +exportAlignments: + chains: ALL + noHeader: false + fields: + - field: -readIds + - field: -tags + args: + - Molecule + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + - field: -topChains +exportClones: + splitByTagType: null + filterOutOfFrames: false + filterStops: false + chains: ALL + noHeader: false + fields: + - field: -cloneId + - field: -readCount + - field: -readFraction + - field: -uniqueTagCount + args: + - Molecule + - field: -uniqueTagFraction + args: + - Molecule + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + splitFilesBy: + - chain + groupClonesBy: [] +qc: + checks: + - type: SuccessfullyAlignedReads + upper: 0.85 + middle: 0.7 + label: Successfully aligned reads + - type: OffTargetReads + upper: 0.2 + middle: 0.1 + label: Off target (non TCR/IG) reads + - type: ReadsWithNoVOrJHits + upper: 0.2 + middle: 0.1 + label: Reads with no V or J hits + - type: AlignmentsWithNoAssemblingFeature + upper: 0.15 + middle: 0.05 + label: Alignments without assembling feature + - type: ReadsWithNoBarcode + upper: 0.2 + middle: 0.1 + label: Reads with no barcode + - type: ReadsDroppedInTagRefinement + upper: 0.1 + middle: 0.05 + label: Reads dropped in tags error correction and filtering + - type: TagArtificialDiversityEliminated + tag: UMI + upper: 0.5 + middle: 0.3 + label: UMIs artificial diversity eliminated + - type: ReadsDroppedInTagCorrection + tag: UMI + upper: 0.1 + middle: 0.05 + label: Reads dropped in UMI error correction and whitelist + - type: ReadsDroppedInTagFiltering + upper: 0.1 + middle: 0.05 + label: Reads dropped in tags filtering + - type: TagGroupsWithNoAssemblingFeature + upper: 0.2 + middle: 0.1 + label: Tag groups with no assembling feature + - type: TagGroupsWithMultipleConsensuses + upper: 0.1 + middle: 0.01 + label: Barcode collisions in clonotype assembly + - type: UnassignedAlignments + upper: 0.1 + middle: 0.05 + label: Unassigned alignments in clonotype assembly + - type: ReadsUsedInClonotypes + upper: 0.9 + middle: 0.7 + label: Reads used in clonotypes + - type: AlignmentsDroppedLowQuality + upper: 0.05 + middle: 0.01 + label: Alignments dropped due to low sequence quality + - type: ClonesDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Clones dropped in post-filtering + - type: AlignmentsDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Alignments dropped in clones post-filtering + - type: OverlappedReadsMoreBetter + upper: 0.9 + middle: 0.8 + label: Overlapped paired-end reads + - type: SpikeInControls + alphabet: Nucleotide + geneFeature: CDR3 + error: 0.15 + abundanceMeasure: Molecule + spikeIns: + - label: TCR Spike-in-1a + seq: TGCAGCGCAGAGCACACCGCCAACAATGAGCAGTTCTTC + portion: 1.0 + - label: TCR Spike-in-1b + seq: TGCAGCGCAGAGCAGACCGCGAACAACGAGCAGTTCTTC + portion: 1.0 + - label: TCR Spike-in-1c + seq: TGCAGCGCAGAGCAAACCGCTAACAAGGAGCAGTTCTTC + portion: 1.0 + - label: TCR Spike-in-2a + seq: TGTGCCTGGGAACGCGAAAGCGCTGACACTGAAGCTTTCTTT + portion: 1.0 + - label: TCR Spike-in-2b + seq: TGTGCCTGGGAACGGGAAAGAGCTGAGACTGAAGCTTTCTTT + portion: 1.0 + - label: TCR Spike-in-2c + seq: TGTGCCTGGGAACGAGAAAGGGCTGAAACTGAAGCTTTCTTT + portion: 1.0 + - label: TCR Spike-in-3a + seq: TGTGCCAGCGAGAGGCAGATCATGCTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-3b + seq: TGTGCCAGCGAGAGCCAGATGATACTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-3c + seq: TGTGCCAGCGAGAGACAGATTATCCTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-4a + seq: TGCAGTGCTAGTGCCAAGATCAAGAGTGAAAAACTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-4b + seq: TGCAGTGCTAGTGCGAAGATGAAGAGTGAGAAACTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-4c + seq: TGCAGTGCTAGTGCAAAGATAAAGAGTGACAAACTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-5a + seq: TGTGCCAGCAACACCATCAGCTACGAGCAGTACTTC + portion: 1.0 + - label: TCR Spike-in-5b + seq: TGTGCCAGCAACACGATCAGATACGATCAGTACTTC + portion: 1.0 + - label: TCR Spike-in-5c + seq: TGTGCCAGCAACACAATCAGGTACGACCAGTACTTC + portion: 1.0 + - label: TCR Spike-in-6a + seq: TGCAGTGCTAACCTGTATTGCGGGAACACTGAAGCTTTCTTT + portion: 1.0 + - label: TCR Spike-in-6b + seq: TGCAGTGCTAACCTCTATTGCGGAAACACGGAAGCTTTCTTT + portion: 1.0 + - label: TCR Spike-in-6c + seq: TGCAGTGCTAACCTATATTGCGGTAACACAGAAGCTTTCTTT + portion: 1.0 + - label: TCR Spike-in-7a + seq: TGCGCCAGCAGCGACGCCACCCCGTACGAGCAGTACTTC + portion: 1.0 + - label: TCR Spike-in-7b + seq: TGCGCCAGCAGCGAGGCCACACCGTACGATCAGTACTTC + portion: 1.0 + - label: TCR Spike-in-7c + seq: TGCGCCAGCAGCGAAGCCACGCCGTACGACCAGTACTTC + portion: 1.0 + - label: TCR Spike-in-8a + seq: TGTGCCAGCTTCGCCTTCGAAGCCGGGGAGCTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-8b + seq: TGTGCCAGCTTCGCGTTCGATGCCGGGGACCTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-8c + seq: TGTGCCAGCTTCGCATTCGACGCCGGGGATCTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-9a + seq: TGTGCCTGGAGTTTGCCGTTGGCCGGTAATTCACCCCTCCACTTT + portion: 1.0 + - label: TCR Spike-in-9b + seq: TGTGCCTGGAGTTTCCCGTTGGCGGGTAATTCTCCCCTCCACTTT + portion: 1.0 + - label: TCR Spike-in-9c + seq: TGTGCCTGGAGTTTACCGTTGGCTGGTAATTCGCCCCTCCACTTT + portion: 1.0 + - label: TCR Spike-in-10a + seq: TGTGCCAGCTGCATGGCCACCGGAAACACCATATATTTT + portion: 1.0 + - label: TCR Spike-in-10b + seq: TGTGCCAGCTGCATCGCCACGGGAAATACCATATATTTT + portion: 1.0 + - label: TCR Spike-in-10c + seq: TGTGCCAGCTGCATAGCCACTGGAAAGACCATATATTTT + portion: 1.0 + - label: TCR Spike-in-11a + seq: TGCGGCACCGAGGAGAGGCCGAACACAGGCTTTCAGAAACTTGTATTT + portion: 1.0 + - label: TCR Spike-in-11b + seq: TGCGGCACCGAGGACAGGCCAAACACAGGGTTTCAGAAACTTGTATTT + portion: 1.0 + - label: TCR Spike-in-11c + seq: TGCGGCACCGAGGAAAGGCCCAACACAGGTTTTCAGAAACTTGTATTT + portion: 1.0 + - label: TCR Spike-in-12a + seq: TGTGCCACCGAGGAGAGCCACTATTATAAGAAACTCTTT + portion: 1.0 + - label: TCR Spike-in-12b + seq: TGTGCCACCGAGGACAGCCAGTATTATAATAAACTCTTT + portion: 1.0 + - label: TCR Spike-in-12c + seq: TGTGCCACCGAGGAAAGCCAATATTATAACAAACTCTTT + portion: 1.0 + - label: TCR Spike-in-13a + seq: TGTGCTCTTCACGCCACCACCGTAACCGATAAACTCATCTTT + portion: 1.0 + - label: TCR Spike-in-13b + seq: TGTGCTCTTCACGCGACCACAGTAACGGATAAACTCATCTTT + portion: 1.0 + - label: TCR Spike-in-13c + seq: TGTGCTCTTCACGCAACCACGGTAACAGATAAACTCATCTTT + portion: 1.0 + label: "" diff --git a/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-tcr-spikein-16-4-1.yaml b/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-tcr-spikein-16-4-1.yaml new file mode 100644 index 000000000..f46ee58c5 --- /dev/null +++ b/regression/presets/analyze/cellecta-human-rna-xcr-umi-drivermap-air-tcr-spikein-16-4-1.yaml @@ -0,0 +1,575 @@ +flags: [] +pipeline: + - align + - refineTagsAndSort + - assemble + - exportClones +align: + species: hsa + libraryName: default + trimmingQualityThreshold: 10 + trimmingWindowSize: 6 + chains: ALL + replaceWildcards: true + overlapPairedReads: true + bamDropNonVDJ: false + writeFailedAlignments: false + tagPattern: "^(R1:*) \\ ^(UMI:N{18})(R2:*)" + tagUnstranded: false + tagMaxBudget: 10.0 + headerExtractors: [] + readIdAsCellTag: false + sampleTable: null + tagsValidations: + - type: MustContainTagType + tagType: Molecule + splitBySample: true + limit: null + parameters: + vParameters: + geneFeatureToAlign: VTranscriptWithP + minSumScore: 150 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner2 + mapperNValue: 8 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: true + mapperAbsoluteMinClusterScore: 102 + mapperExtraClusterScore: -38 + mapperMatchScore: 98 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 8 + mapperMaxClusters: 11 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 103 + mapperRelativeMinScore: 0.81 + mapperMinSeedsDistance: 8 + mapperMaxSeedsDistance: 8 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 2 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + dParameters: + geneFeatureToAlign: DRegionWithP + relativeMinScore: 0.85 + absoluteMinScore: 25.0 + maxHits: 3 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + jParameters: + geneFeatureToAlign: JRegionWithP + minSumScore: 150 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner2 + mapperNValue: 8 + mapperKValue: 1 + floatingLeftBound: true + floatingRightBound: false + mapperAbsoluteMinClusterScore: 102 + mapperExtraClusterScore: -38 + mapperMatchScore: 95 + mapperMismatchScore: -14 + mapperOffsetShiftScore: -82 + mapperSlotCount: 6 + mapperMaxClusters: 4 + mapperMaxClusterIndels: 4 + mapperKMersPerPosition: 4 + mapperAbsoluteMinScore: 100 + mapperRelativeMinScore: 0.8 + mapperMinSeedsDistance: 5 + mapperMaxSeedsDistance: 5 + alignmentStopPenalty: 0 + absoluteMinScore: 150 + relativeMinScore: 0.8 + maxHits: 3 + scoring: + type: affine + alphabet: nucleotide + subsMatrix: "simple(match = 10, mismatch = -19)" + gapOpenPenalty: -62 + gapExtensionPenalty: -11 + cParameters: + geneFeatureToAlign: CExon1 + minSumScore: 40 + relativeMinScore: 0.97 + maxHits: 5 + parameters: + type: kaligner + mapperKValue: 5 + floatingLeftBound: false + floatingRightBound: true + mapperAbsoluteMinScore: 1.5 + mapperRelativeMinScore: 0.75 + mapperMatchScore: 1.0 + mapperMismatchPenalty: -0.1 + mapperOffsetShiftPenalty: -0.3 + mapperMinSeedsDistance: 4 + mapperMaxSeedsDistance: 10 + minAlignmentLength: 15 + maxAdjacentIndels: 2 + alignmentStopPenalty: -1000 + absoluteMinScore: 40.0 + relativeMinScore: 0.87 + maxHits: 7 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -9)" + gapPenalty: -12 + vjAlignmentOrder: VThenJ + libraryStructure: Unknown + includeDScore: false + includeCScore: false + minSumScore: 120.0 + relativeMinVFR3CDR3Score: 0.7 + allowPartialAlignments: false + allowNoCDR3PartAlignments: false + allowChimeras: false + readsLayout: DirectOnly + mergerParameters: + qualityMergingAlgorithm: MaxSubtraction + partsLayout: null + minimalOverlap: 13 + minimalMatchQualitySum: 364 + maxQuality: 50 + minimalIdentity: 0.7 + identityType: MinimalQualityWeighted + fixSeed: true + alignmentBoundaryTolerance: 5 + minChimeraDetectionScore: 120 + vjOverlapWindow: 3 + saveOriginalSequence: false + saveOriginalReads: false + smartForceEdgeAlignments: true +refineTagsAndSort: + whitelists: {} + runCorrection: true + parameters: + correctionPower: 0.001 + backgroundSubstitutionRate: 0.001 + backgroundIndelRate: 1.0E-5 + minQuality: 12 + maxSubstitutions: 2 + maxIndels: 2 + maxTotalErrors: 3 + postFilter: + type: filter_groups + groupingKeys: + - allTags:Molecule + predicates: + - metrics: + - type: group_metric_sum_weight + reportHist: + log: true + binNumber: 0 + minBinWidth: 0.2 + multiplyWeightByKey: false + operator: + type: group_operator_lowest_threshold + operators: + - type: group_operator_advanced_thresholding + algo: + type: otsu + histTransformers: + - type: inflate + fraction: 0.15 + value: 1.0 + logX: true + minimalSample: 20 + fallbackThreshold: 1.0 + accept: High + - type: group_operator_cumtop + share: 0.85 + round: Down + accept: High + accept: High + expectedSorting: [] + requiredSequences: [] + expectedSorting: + - allTags:Molecule + requiredSequences: [] +assemblePartial: + overlappedOnly: false + dropPartial: false + cellLevel: false + parameters: + kValue: 12 + kOffset: -7 + minimalAssembleOverlap: 12 + minimalNOverlap: 7 + minimalNOverlapShare: 0.65 + minimalAlignmentMergeIdentity: 0.85 + mergerParameters: + qualityMergingAlgorithm: SumSubtraction + partsLayout: CollinearDirect + minimalOverlap: 20 + minimalMatchQualitySum: 0 + maxQuality: 45 + minimalIdentity: 0.95 + identityType: Unweighted + maxLeftParts: 256000 + maxLeftMatches: 6144 +extend: + vAnchor: CDR3Begin + jAnchor: FR4Begin + minimalVScore: 50 + minimalJScore: 50 +assemble: + sortBySequence: false + clnaOutput: false + cellLevel: false + consensusAssemblerParameters: + assembler: + aAssemblerParameters: + bandWidth: 4 + scoring: + type: linear + alphabet: nucleotide + subsMatrix: "simple(match = 5, mismatch = -4)" + gapPenalty: -14 + minAlignmentScore: 40 + maxNormalizedAlignmentPenalty: 0.2 + trimMinimalSumQuality: 0 + trimReferenceRegion: false + maxQuality: 45 + maxIterations: 6 + minAltSeedQualityScore: 11 + minAltSeedNormalizedPenalty: 0.35 + altSeedPenaltyTolerance: 0.5 + minRecordSharePerConsensus: 0.2 + minRecordsPerConsensus: 0 + minRecursiveRecordShare: 0.4 + minQualityScore: 0 + maxConsensuses: 3 + minTagSuffixShare: 0.0 + isolateChains: false + cloneAssemblerParameters: + assemblingFeatures: + - CDR3 + minimalClonalSequenceLength: 12 + qualityAggregationType: BetaScore + cloneClusteringParameters: + searchDepth: 2 + allowedMutationsInNRegions: 1 + searchParameters: twoMismatchesOrIndels + clusteringFilter: + type: advanced + correctionPower: 0.001 + backgroundSubstitutionRate: 5.0E-4 + backgroundIndelRate: 2.0E-4 + cloneFactoryParameters: + vParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + jParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + cParameters: + maxAlignmentWidthLinear: 5 + maxAlignmentWidthAffine: 500 + dParameters: + relativeMinScore: null + absoluteMinScore: null + maxHits: null + scoring: null + separateByV: false + separateByJ: false + separateByC: true + maximalPreClusteringRatio: 1.0 + preClusteringScoreFilteringRatio: 2.0 + preClusteringCountFilteringRatio: 2.0 + addReadsCountOnClustering: false + badQualityThreshold: 15 + maxBadPointsPercent: 0.7 + mappingThreshold: 2of5 + minimalQuality: 20 + postFilters: null + inferMinRecordsPerConsensus: true +assembleContigs: + ignoreTags: false + parameters: + branchingMinimalQualityShare: 0.1 + branchingMinimalSumQuality: 60 + decisiveBranchingSumQualityThreshold: 120 + alignedSequenceEdgeDelta: 3 + alignmentEdgeRegionSize: 7 + minimalNonEdgePointsFraction: 0.25 + minimalMeanNormalizedQuality: 5.0 + outputMinimalQualityShare: 0.75 + outputMinimalSumQuality: 0 + subCloningRegions: null + assemblingRegions: null + postFiltering: + type: NoFiltering + trimmingParameters: + averageQualityThreshold: 10.0 + windowSize: 8 + minimalContigLength: 20 + alignedRegionsOnly: false + discardAmbiguousNucleotideCalls: false +exportAlignments: + chains: ALL + noHeader: false + fields: + - field: -readIds + - field: -tags + args: + - Molecule + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + - field: -topChains +exportClones: + splitByTagType: null + filterOutOfFrames: false + filterStops: false + chains: ALL + noHeader: false + fields: + - field: -cloneId + - field: -readCount + - field: -readFraction + - field: -uniqueTagCount + args: + - Molecule + - field: -uniqueTagFraction + args: + - Molecule + - field: -targetSequences + - field: -targetQualities + - field: -vHitsWithScore + - field: -dHitsWithScore + - field: -jHitsWithScore + - field: -cHitsWithScore + - field: -vAlignments + - field: -dAlignments + - field: -jAlignments + - field: -cAlignments + - field: -allNFeaturesWithMinQuality + - field: -allAAFeatures + - field: -defaultAnchorPoints + splitFilesBy: + - chain + groupClonesBy: [] +qc: + checks: + - type: SuccessfullyAlignedReads + upper: 0.85 + middle: 0.7 + label: Successfully aligned reads + - type: OffTargetReads + upper: 0.2 + middle: 0.1 + label: Off target (non TCR/IG) reads + - type: ReadsWithNoVOrJHits + upper: 0.2 + middle: 0.1 + label: Reads with no V or J hits + - type: AlignmentsWithNoAssemblingFeature + upper: 0.15 + middle: 0.05 + label: Alignments without assembling feature + - type: ReadsWithNoBarcode + upper: 0.2 + middle: 0.1 + label: Reads with no barcode + - type: ReadsDroppedInTagRefinement + upper: 0.1 + middle: 0.05 + label: Reads dropped in tags error correction and filtering + - type: TagArtificialDiversityEliminated + tag: UMI + upper: 0.5 + middle: 0.3 + label: UMIs artificial diversity eliminated + - type: ReadsDroppedInTagCorrection + tag: UMI + upper: 0.1 + middle: 0.05 + label: Reads dropped in UMI error correction and whitelist + - type: ReadsDroppedInTagFiltering + upper: 0.1 + middle: 0.05 + label: Reads dropped in tags filtering + - type: TagGroupsWithNoAssemblingFeature + upper: 0.2 + middle: 0.1 + label: Tag groups with no assembling feature + - type: TagGroupsWithMultipleConsensuses + upper: 0.1 + middle: 0.01 + label: Barcode collisions in clonotype assembly + - type: UnassignedAlignments + upper: 0.1 + middle: 0.05 + label: Unassigned alignments in clonotype assembly + - type: ReadsUsedInClonotypes + upper: 0.9 + middle: 0.7 + label: Reads used in clonotypes + - type: AlignmentsDroppedLowQuality + upper: 0.05 + middle: 0.01 + label: Alignments dropped due to low sequence quality + - type: ClonesDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Clones dropped in post-filtering + - type: AlignmentsDroppedInPostFiltering + upper: 0.05 + middle: 0.01 + label: Alignments dropped in clones post-filtering + - type: OverlappedReadsMoreBetter + upper: 0.9 + middle: 0.8 + label: Overlapped paired-end reads + - type: SpikeInControls + alphabet: Nucleotide + geneFeature: CDR3 + error: 0.15 + abundanceMeasure: Molecule + spikeIns: + - label: TCR Spike-in-1a + seq: TGCAGCGCAGAGCACACCGCCAACAATGAGCAGTTCTTC + portion: 16.0 + - label: TCR Spike-in-1b + seq: TGCAGCGCAGAGCAGACCGCGAACAACGAGCAGTTCTTC + portion: 4.0 + - label: TCR Spike-in-1c + seq: TGCAGCGCAGAGCAAACCGCTAACAAGGAGCAGTTCTTC + portion: 1.0 + - label: TCR Spike-in-2a + seq: TGTGCCTGGGAACGCGAAAGCGCTGACACTGAAGCTTTCTTT + portion: 16.0 + - label: TCR Spike-in-2b + seq: TGTGCCTGGGAACGGGAAAGAGCTGAGACTGAAGCTTTCTTT + portion: 4.0 + - label: TCR Spike-in-2c + seq: TGTGCCTGGGAACGAGAAAGGGCTGAAACTGAAGCTTTCTTT + portion: 1.0 + - label: TCR Spike-in-3a + seq: TGTGCCAGCGAGAGGCAGATCATGCTGTTTTTT + portion: 16.0 + - label: TCR Spike-in-3b + seq: TGTGCCAGCGAGAGCCAGATGATACTGTTTTTT + portion: 4.0 + - label: TCR Spike-in-3c + seq: TGTGCCAGCGAGAGACAGATTATCCTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-4a + seq: TGCAGTGCTAGTGCCAAGATCAAGAGTGAAAAACTGTTTTTT + portion: 16.0 + - label: TCR Spike-in-4b + seq: TGCAGTGCTAGTGCGAAGATGAAGAGTGAGAAACTGTTTTTT + portion: 4.0 + - label: TCR Spike-in-4c + seq: TGCAGTGCTAGTGCAAAGATAAAGAGTGACAAACTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-5a + seq: TGTGCCAGCAACACCATCAGCTACGAGCAGTACTTC + portion: 16.0 + - label: TCR Spike-in-5b + seq: TGTGCCAGCAACACGATCAGATACGATCAGTACTTC + portion: 4.0 + - label: TCR Spike-in-5c + seq: TGTGCCAGCAACACAATCAGGTACGACCAGTACTTC + portion: 1.0 + - label: TCR Spike-in-6a + seq: TGCAGTGCTAACCTGTATTGCGGGAACACTGAAGCTTTCTTT + portion: 16.0 + - label: TCR Spike-in-6b + seq: TGCAGTGCTAACCTCTATTGCGGAAACACGGAAGCTTTCTTT + portion: 4.0 + - label: TCR Spike-in-6c + seq: TGCAGTGCTAACCTATATTGCGGTAACACAGAAGCTTTCTTT + portion: 1.0 + - label: TCR Spike-in-7a + seq: TGCGCCAGCAGCGACGCCACCCCGTACGAGCAGTACTTC + portion: 16.0 + - label: TCR Spike-in-7b + seq: TGCGCCAGCAGCGAGGCCACACCGTACGATCAGTACTTC + portion: 4.0 + - label: TCR Spike-in-7c + seq: TGCGCCAGCAGCGAAGCCACGCCGTACGACCAGTACTTC + portion: 1.0 + - label: TCR Spike-in-8a + seq: TGTGCCAGCTTCGCCTTCGAAGCCGGGGAGCTGTTTTTT + portion: 16.0 + - label: TCR Spike-in-8b + seq: TGTGCCAGCTTCGCGTTCGATGCCGGGGACCTGTTTTTT + portion: 4.0 + - label: TCR Spike-in-8c + seq: TGTGCCAGCTTCGCATTCGACGCCGGGGATCTGTTTTTT + portion: 1.0 + - label: TCR Spike-in-9a + seq: TGTGCCTGGAGTTTGCCGTTGGCCGGTAATTCACCCCTCCACTTT + portion: 16.0 + - label: TCR Spike-in-9b + seq: TGTGCCTGGAGTTTCCCGTTGGCGGGTAATTCTCCCCTCCACTTT + portion: 4.0 + - label: TCR Spike-in-9c + seq: TGTGCCTGGAGTTTACCGTTGGCTGGTAATTCGCCCCTCCACTTT + portion: 1.0 + - label: TCR Spike-in-10a + seq: TGTGCCAGCTGCATGGCCACCGGAAACACCATATATTTT + portion: 16.0 + - label: TCR Spike-in-10b + seq: TGTGCCAGCTGCATCGCCACGGGAAATACCATATATTTT + portion: 4.0 + - label: TCR Spike-in-10c + seq: TGTGCCAGCTGCATAGCCACTGGAAAGACCATATATTTT + portion: 1.0 + - label: TCR Spike-in-11a + seq: TGCGGCACCGAGGAGAGGCCGAACACAGGCTTTCAGAAACTTGTATTT + portion: 16.0 + - label: TCR Spike-in-11b + seq: TGCGGCACCGAGGACAGGCCAAACACAGGGTTTCAGAAACTTGTATTT + portion: 4.0 + - label: TCR Spike-in-11c + seq: TGCGGCACCGAGGAAAGGCCCAACACAGGTTTTCAGAAACTTGTATTT + portion: 1.0 + - label: TCR Spike-in-12a + seq: TGTGCCACCGAGGAGAGCCACTATTATAAGAAACTCTTT + portion: 16.0 + - label: TCR Spike-in-12b + seq: TGTGCCACCGAGGACAGCCAGTATTATAATAAACTCTTT + portion: 4.0 + - label: TCR Spike-in-12c + seq: TGTGCCACCGAGGAAAGCCAATATTATAACAAACTCTTT + portion: 1.0 + - label: TCR Spike-in-13a + seq: TGTGCTCTTCACGCCACCACCGTAACCGATAAACTCATCTTT + portion: 16.0 + - label: TCR Spike-in-13b + seq: TGTGCTCTTCACGCGACCACAGTAACGGATAAACTCATCTTT + portion: 4.0 + - label: TCR Spike-in-13c + seq: TGTGCTCTTCACGCAACCACGGTAACAGATAAACTCATCTTT + portion: 1.0 + label: "" diff --git a/regression/presets/list.txt b/regression/presets/list.txt index d99021f74..d255a9c43 100644 --- a/regression/presets/list.txt +++ b/regression/presets/list.txt @@ -117,10 +117,18 @@ invivoscribe-human-dna-trg-lymphotrack (LymphoTrack TRG Assay Panel) -----Cellecta----- cellecta-mouse-rna-xcr-umi-drivermap-air (DriverMap Adaptive Immune Receptor (AIR) TCR-BCR Profiling Mouse RNA) +cellecta-human-rna-xcr-umi-drivermap-air-bcr-spikein-16-4-1 (DriverMap Adaptive Immune Receptor (AIR) TCR-BCR Profiling Human RNA with BCR Spike-in mix 16:4:1) + cellecta-human-rna-xcr-full-length-umi-drivermap-air (DriverMap Adaptive Immune Receptor (AIR) Full-length TCR-BCR Profiling Human RNA) cellecta-human-dna-xcr-umi-drivermap-air (DriverMap Adaptive Immune Receptor (AIR) TCR-BCR Profiling Human DNA) +cellecta-human-rna-xcr-umi-drivermap-air-tcr-spikein-16-4-1 (DriverMap Adaptive Immune Receptor (AIR) TCR-BCR Profiling Human RNA with TCR Spike-in mix 16:4:1) + +cellecta-human-rna-xcr-umi-drivermap-air-bcr-spikein-1-1-1 (DriverMap Adaptive Immune Receptor (AIR) TCR-BCR Profiling Human RNA with BCR Spike-in mix 1:1:1) + +cellecta-human-rna-xcr-umi-drivermap-air-tcr-spikein-1-1-1 (DriverMap Adaptive Immune Receptor (AIR) TCR-BCR Profiling Human RNA with TCR Spike-in mix 1:1:1) + cellecta-human-rna-xcr-umi-drivermap-air (DriverMap Adaptive Immune Receptor (AIR) TCR-BCR Profiling) -----Thermo Fisher-----