diff --git a/bio_hansel/qc/checks.py b/bio_hansel/qc/checks.py index 0af0ea7..9d371ae 100644 --- a/bio_hansel/qc/checks.py +++ b/bio_hansel/qc/checks.py @@ -89,7 +89,7 @@ def is_mixed_subtype(st: Subtype, df: DataFrame, *args) -> Tuple[Optional[str], return QC.FAIL, '{}: Mixed subtype detected. Positive and negative tiles detected for ' \ 'the same target site "{}" for subtype "{}".'.format( QC.MIXED_SAMPLE_ERROR_2, - '; '.join(conflicting_tiles['refposition'].tolist()), + '; '.join(conflicting_tiles['refposition'].astype(str).tolist()), st.subtype) diff --git a/tests/data/fail-qc-mixed-subtype-pos-neg-tiles.fasta b/tests/data/fail-qc-mixed-subtype-pos-neg-tiles.fasta new file mode 100644 index 0000000..7e15837 --- /dev/null +++ b/tests/data/fail-qc-mixed-subtype-pos-neg-tiles.fasta @@ -0,0 +1,444 @@ +>202001-1.1 +CTGGAAGAGGCGGAAACGCTGTGCCGCAATATC +>600783-1.1 +AGCGGCGGCGGTATTCTCGACTCGATTGAGGCG +>1049933-1.1 +CCAGCTAACGCGTTGATGCGCATTCGGCCTGCA +>1193219-1.1 +GGTACAGATTAAGCAATTGCTCGACGTCGGCGC +>2778621-1.1 +TTTATCTACCCGGGAAACCTGGCGGCATCATCC +>2904061-1.1 +CTATAGGTGACAAAGCTGCGTTCAAACCAGGCG +>3278067-1.1 +TGTGAGTAAGTTGCGCAATATTCTGCTGGATTC +>3867228-1.1 +TTACCGTTTTGATAAAGCAGTGTTACCGCTACG +>4499501-1.1 +TTGGCCGCCTTCGGCGTCAGTTCAAACCAGGTT +>4579224-1.1 +TGATGATGGTCGCCATTGTTTTGCTGTTTGTGT +>4738855-1.1 +CACATCAAAACCCTGCACGCTAAGCTACGCGCG +>negative202001-1.1 +CTGGAAGAGGCGGAAATGCTGTGCCGCAATATC +>negative600783-1.1 +AGCGGCGGCGGTATTCGCGACTCGATTGAGGCG +>negative1049933-1.1 +CCAGCTAACGCGTTGACGCGCATTCGGCCTGCA +>negative1193219-1.1 +GGTACAGATTAAGCAACTGCTCGACGTCGGCGC +>negative2778621-1.1 +TTTATCTACCCGGGAAGCCTGGCGGCATCATCC +>negative2904061-1.1 +CTATAGGTGACAAAGCCGCGTTCAAACCAGGCG +>negative3278067-1.1 +TGTGAGTAAGTTGCGCGATATTCTGCTGGATTC +>negative3867228-1.1 +TTACCGTTTTGATAAACCAGTGTTACCGCTACG +>negative4499501-1.1 +TTGGCCGCCTTCGGCGCCAGTTCAAACCAGGTT +>negative4579224-1.1 +TGATGATGGTCGCCATCGTTTTGCTGTTTGTGT +>negative4738855-1.1 +CACATCAAAACCCTGCGCGCTAAGCTACGCGCG +>negative2981-2.2.3.1.4 +ACTGCCGCCGGAGCCGCGTGAAAATATTGTTTA +>negative21097-2.2.1.1.1 +GCAAATCGCGCCAGTCGAGTCCTCTTTTACCGT +>negative42232-2.2.2.2.2.2 +CCTGAAAAAGCGTTCCCTGCCTGCCTGGAAGAG +>negative64709-2.1.3 +CGCATGGTGGCAATTGCGCTGGCGGCATTTGAT +>negative92666-1.3 +TGCAGTGGTACAGCGTCCCTCCTTTACTCAAAT +>negative107801-2.2.1.2 +TAGCGTTTTTTACTGCCAGTATCCGCGCCGCGG +>negative157792-2.2.1.1.1 +ACGGTACTTTTACCGCTGCCAATACCGCCGGTT +>negative160366-2.2.1.2 +CAGTATCTCTTCAAAGGACGTCATGCCTTGCTC +>negative167020-2.2.2.1.1.1 +TACGATCGGCAAGCGCGCCCATCACCGGATCGG +>negative176708-2.2.2.2.6 +AATAGCGAGCTAATTATGAGGTTTGATATATGA +>negative198398-2.2.2.1.1.1 +GTACCGTCAACGGTAATATCCGCCAGCGTCGGC +>negative202001-1.1 +CTGGAAGAGGCGGAAATGCTGTGCCGCAATATC +>negative205838-2.1.1.1.1 +CTGTTCGCTGCCGTCGGTTTCTTTTACCGTCAC +>negative227831-2.2.3.2.1 +TCCCGGCGAATTTACCGTGCAGGCCAATAGCAT +>negative239152-2.2.2.2.2.1 +TTCCGTCGATGTCACCCATTCCCGTGCGACGCT +>negative271435-2.2.1.1.3 +TCGGGTGCAAATGGGATGGTAAGGAGTTTATTG +>negative293728-2.2.1.1 +GTTGAAACGACGCGCGGTTTCAGAAAGCAGCGG +>negative467662-2.2.3.1.2 +CTAACTTATGTCATAACGCCGTTGACCGCTGGC +>negative484993-2.2.2.2.1.2 +CGGGAATGGTTAAAGTCTGCGGATATGGCGCTT +>negative489687-2.2.3.2.1 +CCTTGTCGTTAAATCCAGGCGGTATAGTAACGA +>negative508759-2.2.2.2.1.3 +CCTGGTGATTTCCAGCCAGAGCGGTAATCAACT +>negative573259-2.2.2.2.5 +ACCACAACGTCTGGCGGTACGTTTTTTATCACC +>negative600350-2.1.2 +GTCGGAAAATAAAAAACAGGTGAATTACGTACC +>negative600783-1.1 +AGCGGCGGCGGTATTCGCGACTCGATTGAGGCG +>negative607438-2.2.1 +TCCAGCGGCATCATGACCACTTTGCTGTTATTC +>negative649988-2.2.2.1 +GGAGCCGACAGGATGCCGAAACCGGGTGTGTGT +>negative691203-2.2.1.1 +CAGTTTTACATCCTGCAAAATGCGCAGCGTCAA +>negative720751-2.2.1.1.2 +ATGGCCATCTGCATACCTGGGGCTTTAACTATA +>negative775920-2.2.2.2 +GTTCAGGTGCTACCGATGATCGTTTTTGGTGCG +>negative842143-2.1.1.1.1 +TGCCCAGCTCTTTCAGCACTTCGTGGCAGGTTT +>negative848800-2.2.3.1 +TCGTTATCAACAACCAGGTGGGTTTCACCACCT +>negative862828-2.1 +CGCCAAGTCTGGCACCTAACGGCACGATGGTAA +>negative869993-2.2.1.1.3 +CGGACTACTGAATCCCAGGTTCATCGCGAACTG +>negative908618-2.2.2.1.1.1 +TTGACCACGCGCTGCGCGGCTGGCGGGCGTTTC +>negative944885-2.2.1.1.1 +GTTTACGTGTTGACCCCGGCATCCTTTCTCCTG +>negative1036874-2.2.2.1 +GACAATGAGCAGAATTGTAGGGAATTTACAGAC +>negative1037658-2.2.2.2.1.4 +GATGTGTTTGAACAGTTTAATGCCGCCGTGCAA +>negative1037736-2.2.2.1.1 +GACTACCTGTTGAAAACCCGTGTACCGGATATG +>negative1047714-2.2.1.1.1 +GCGAACTGGCGAAACGTCTTGGCGTGGAACAAC +>negative1049933-1.1 +CCAGCTAACGCGTTGACGCGCATTCGGCCTGCA +>negative1052031-2.2.2.2.1.3 +ACCGTCGTTGCCTTGCAGGGGCTGACGCTGGCA +>negative1061770-2.2.2.1 +AAAATAAAGACGTTAAGCCGGGGTTAAGCATTC +>negative1062011-2.1.2 +TGTTGTGTGACGCATGCCGCATTTTCTCCTCGC +>negative1091342-2.2.3.1 +TTAAAACCAATCTGGGCATAAGTCTGGTCGGCA +>negative1155229-2.2.3.1.5 +GCTGACTGACGATCTGACCCACGTACAAATCAA +>negative1174327-2.2.2.2.6 +CCATTAACATACCGCCCAGGAAAGACATATCCT +>negative1193219-1.1 +GGTACAGATTAAGCAACTGCTCGACGTCGGCGC +>negative1276017-2.1 +TGCTTGATTTGGGGGCCAACGTCGACTGTGACA +>negative1288160-2.2.2.2.4 +GCGTGGTATCGTCCTGCGCCAGATTTTGTGGCG +>negative1313808-2.1 +CAATATGCGCGAAAAAGTGGTTGAGCATCCGCA +>negative1329594-2.1.1.2 +AGGGGACGTTATTCGGCGACCAGCCACATATCA +>negative1462807-2.2.3 +ATAATGTTGCCGACGGCGCACACAAACGCCACA +>negative1534092-2.1.1.1 +CCGTCGAGTGGGGGGCAATAGCGATAACCGTCC +>negative1603329-2.2.2.2.1.1 +ATTGGCCAGGGCAAAGTCGCTGACTATATTCCG +>negative1649579-2.2.2.2.4 +GCGCATCGCGCATTACTGATGAAATGCTGATGT +>negative1689446-2.2.2.2.4 +GGGCCGAAGACCTCGCGTTGCACGATGGCATCT +>negative1697407-2.2.2.2.1.5 +ACACTGCCCAGAATAAGCGACACACAGCGCGCC +>negative1760974-2.2.3.1.3 +TCGCGATGCGCAGGTCGGAGAAATCGCGCTAAA +>negative1825773-2.2.2.2.2 +CATCCCGGTTCATAGCGGCTCCGCTACGCTAAA +>negative1831783-2.2.2.1.2 +TTTTCCGGGCTTATCGGCAACGCTATTCTGGCC +>negative1840959-2.2.2.2.7 +CACCATTGAGCCGGTAATAATGCCGACAAAGGC +>negative1856421-2.1.1.1 +GTTTGCGCAGCAGGCAGCCCCTGGCGAGCGGCT +>negative1910623-2.2.3.3 +GCTCCGATTTTTGCCTGATAATTATCCAATTAC +>negative1950957-2.2.2.2.2.2 +TTTCCTTGCCCTAATCGCGCCCTACGGCTGGTC +>negative1976727-2.1 +TACTTTATCTTTGTACTTATTCCACAATACGGA +>negative1983064-2.2.1.1.1.1 +TCCAGCATATCGCCCAGGCGACTGAACGCAGGC +>negative2045923-2.1.3 +CGACATATTCCCGCACCGTGCCATGGCTCATCT +>negative2048364-2.1 +GGTCGCGACAGGCCGAGCCAGAACAGGAAAAGC +>negative2069216-2.2.2 +ATACTTAGGCTGTCAGGAACCCGTGAGGTAGTG +>negative2131791-2.2.3.1.3 +GCTGGGCGAAATGATGCAGTTCACCACTTGCTC +>negative2154958-2.2.2.2.1.4 +GGCGCGCCACGGTTACACCCCGGTGGTCAGCCG +>negative2201150-2.2.2.2.7 +GGTCGCGATCCTCTCCCGTGAACGTCAGGCTCA +>negative2213229-2.1.1.3 +ATTCCAGTTGTCGCCGCGACGACATTTCGCTAA +>negative2217680-2.2.2.2.1 +TCCCTGATTGCGGTGCTGATCCCGTTGCTCTTT +>negative2239956-1.2 +CCAGGAAGCCATCATCGCTGCGGAACGCGCCGG +>negative2283825-2.1.1 +TTGAGTGTAACTTCTCTTGCCCGCAGATGACCT +>negative2293563-2.2.2.1.2 +ACCGGTCTCGTACATCGCAGCAATGATCTCATC +>negative2406889-2.2.2.2.1.2 +TTGCCTATCACGATATGGGATGTCAGGGCGTGC +>negative2422397-2.1.1 +CCCGCTGGTAAGCCGGCAGTTGCTAAGTTAAAT +>negative2438389-2.2 +TAGGCCAGCCGCCAGGGAATACCCGCTGCATTT +>negative2455459-2.2.2.2.1.2 +ATCACGAGCCTTACGCACGCCGGAACAGGTCTC +>negative2482056-2.2.3.1 +GGATCGGCCATTATTGCCTGCTGAACATAATAC +>negative2496132-2.1.1.2 +GCTCCCTGCGCTCCGCCATGCGCACTAATTGTT +>negative2503823-2.2.3.1.2 +TAAATGGATATAACGAGCCCCTCCTAAGGGCTA +>negative2561007-2.2.2.2.3 +TGAGTCGTTTGAATCCGCTGGCGCTTATCTGAA +>negative2592097-2.2.1.1.2 +TGCAAACTATCACGACGCGGCGCAAGAGTTTGT +>negative2605132-2.2.2.2.1.5 +TTCTACGCAACAGCAGACCCTGAAAGTCGTTGA +>negative2606626-2.2.2.2.1.1 +TTCGATAAAATCTTTCCAGTTCCCCAGTTCACG +>negative2658490-2.2.1.1.3 +GCTGGTCAGCCACAACGCTGACGCTATTTCTGA +>negative2663535-2.2.3.2 +AGATTCGCCGCGCAGCGACAACGAAGGCATATC +>negative2673994-2.2.2.2.7 +GATGCCGACATTACCGGTAATGGTCGCCAGTAG +>negative2676021-2.2.3.2 +AATTGCCCCACCACAGGCGTGCCGTCGGGTCTG +>negative2693425-2.2.2.2.3 +GAGTAATTTGGTCAACTATTTACTTGAGTAAAT +>negative2741489-2.2.2.2.6 +TATGAGCAAGCAACAGGTTAGAGTGTCTATGTT +>negative2778422-2.2.2.2.3 +ATGAAGCTTGATTCCAGGGCGTGACCGTTGCAG +>negative2778621-1.1 +TTTATCTACCCGGGAAGCCTGGCGGCATCATCC +>negative2785633-2.1 +TCAGCCCGGAAGCGCTGCCGCAGCTTTATCAGT +>negative2801759-2.2.2.2.2.2 +GACCTCGGTTCCCGCCCGCTGCTCGACGTGCTT +>negative2802706-2.2.1.1 +AACAGCGCTATTCCCCGTTCCACCGGCAGTTTT +>negative2805778-2.2.2.2.3 +TTCAATGAGTTGCAGGCTTTTTTGTTGCATGTG +>negative2863936-2.2.3.2 +AGTCGGCGCTAATTCTGCTGGCGACGGTGTACA +>negative2877590-2.2.1.2 +GTACCGCCAGTCCGGCCTGATTCGCTTTTGCTA +>negative2878560-2.1.1.3 +AGCGGGTATCGGCGGCGCCGTCTCAAACCGTAG +>negative2904061-1.1 +CTATAGGTGACAAAGCCGCGTTCAAACCAGGCG +>negative2928609-2.2.1 +GTGAAGATAAGCCGCTGAAGTATCCCCATATGT +>negative2934077-2.2.2.2.1.3 +AATTAACCACATTGTTGCGAGGGATACTATGAC +>negative2963935-2.1.3 +TCATCCTGCTGCGGCAGTAAAATGTTCCCGGAT +>negative3011381-2.1.1.3 +GTTGAGCCACAGCTTGCGCCAGAGCGGCGTTTG +>negative3012157-2.2.2.2.3 +ATTCCACCAGCTCTGACGTACTCGCAGCACGCT +>negative3060368-2.2.3.2.1 +ACCAGGCGGTATGATACAGTTCGACAATGTCCG +>negative3062689-2.2.2.2.2.2 +CCAGAACCGACCGCACGCAAAAGCTTCTGAACC +>negative3082064-1.2 +GACGTTCGCTATCCACCCCGCGTAAAAAGAGAT +>negative3090898-1.2 +CGCATGGCTTTCCAGTTCAAGGATGTCCGCCTG +>negative3113857-1.2 +TTCATGACGTCATCCCAGTCTTTTTCCGTGAAA +>negative3118525-2.2.3.1.3 +CCTGGCCTCTAACATACTGGCCGCATAAGGCGC +>negative3159204-2.2.1.1.3 +CCGCCTCGCCAACCTGCGGCGGAGTCGCGAGCT +>negative3187428-2.2.3.1.1 +CTTTATCAGCGCGCAGTGTCCCATTCCATCATC +>negative3200083-2.1 +ACCCGGTCTACCGCAAAATGGAAAGCGATATGC +>negative3204925-2.2.3.1.5 +CTCGCTGGCAAGCAGTGCGGGTACTATCGGCGG +>negative3212817-2.2.2.2 +CAAAACTACGAATACGGCGCAGCGGGCGACCGT +>negative3230678-2.2.2.1.1.1 +AGCGGTGCGCCAAACCACCCGGAATGATGAGTG +>negative3233869-2.1.1.1.1 +CAGCGCTGGTATGTGGCTGCACCATCGTCATTA +>negative3254229-2.2.3.1.3 +CGCCACCACGCGGTTAGCGTCACGCTGACATTC +>negative3257074-2.2.1 +CGGCAACCAGACCGACTACGCCGCCAAGCAGAC +>negative3264474-2.2.2.1.1.1 +AATGGCGCCGATCGTCGCCAGATAACCGTTGCC +>negative3267927-2.2.2.2.2.1 +AAAGAGAAATATGATGCCAGGCTGATACATGAC +>negative3278067-1.1 +TGTGAGTAAGTTGCGCGATATTCTGCTGGATTC +>negative3299717-2.2.3.1.4 +ATGCCGGACAGCAGGCGAAACTCGAACCGGATA +>negative3305400-2.1.1.1 +CATCGTGAAGCAGAACAGACGCGCATTCTTGCT +>negative3324813-2.2.2.2 +CAGATGAAGCGGGCGCTGGATAACTTCAAGGCC +>negative3373069-2.2.2.2.1.1 +CTCTCCAGAAGATGAAGCCCGTGATGCGGCGCA +>negative3408895-2.1.1 +ACAGTAAAAACAGGATCTGCTGCGAGGCCAGAC +>negative3436797-2.2.2.1.2 +ATGCGGTATTCCTTGGCGTCGGCACCTATCAGT +>negative3479768-2.2 +CGCCTTGCCGCCGCCGCGCGCCACACAGGTCAG +>negative3523461-2.2.2.2.4 +CGATTTTACCGCCAGCAGCTTCGATAGCAGCAC +>negative3540924-2.2.2.2.2.2 +ACCGCCGCTATTACAAGCGGCGGCTCTCAGTTT +>negative3553986-2.2.3.3 +AGAAGCTATCCGACTGATCCTCAGTAACGATCC +>negative3580227-2.2.2.2.5 +GCACCGCATCCCAGGACGTTTTCAGCGCTATTT +>negative3586333-2.2.2.2.2.1 +CGCTGGGGACTGTTTGGGGAGAATAATCAGGAG +>negative3587211-2.2.1.1.2 +TATTTCTCGCGAGACCGCGCGTGCCGCCCAGTC +>negative3647258-2.2.2.2.2.1 +TACGGGTAACTGTTATCGGTAACATTGTCCAAC +>negative3671709-2.2.3.1.5 +TCAGCGAGCAACAACCGTGCCAAAGCCGATGAG +>negative3686624-2.2.2.2.5 +CCGAAAGAAGACTAACGTCCCAGCGCTTTGCCG +>negative3691258-2.2.2.1.1 +GCACGTGACGTTTAGAGACGGAGTCCAGCTCAG +>negative3696399-2.2.2.2.2 +TCATTTGTATAATAACCTATTGATCAGGCTAAT +>negative3742501-2.1.1.3 +CCTGCTGCCGCTGCACGATGGCAACCACTTCCC +>negative3766668-2.2 +GGCCGTACCGTCGGTCGTGCCCAAAAGATAGTC +>negative3785187-2.2.2.2.1.4 +TTAAAAAGCACCTCAGGGAATAGTCTTAATCCG +>negative3791096-2.2.2 +AACTGCCGGTGGACGGCGGTTTCGACGATAACG +>negative3801966-2.1.1 +TCAACCATATTTACGATCACGTTAAGCCCATCC +>negative3802645-2.1 +ATCGCGCCAGCCGCAGACGCTAACGTTCATTCC +>negative3811523-2.2.2.1.1 +CACAATACTAATGAAGAGAAAGTACGCGACCAT +>negative3819645-2.2.2.2.3 +TCGAATCTGGATACCCGCGTCACCAATATCGAA +>negative3822905-2.2.1.1.3 +GAGTCGCCAGCGTATGTATCTCGTTCCGCCCGT +>negative3827083-2.2.2.2.1 +ACCGGGGAAATGCTGACCAGTTTCCGGGAACAC +>negative3855172-2.2.3.1.3 +CCGGTACTGATGGGTCCGCATACCTTTAACTTT +>negative3864871-2.2.1.1 +CCCTGACCTGGTTACTGATATTCAGAGCAGCTT +>negative3867228-1.1 +TTACCGTTTTGATAAACCAGTGTTACCGCTACG +>negative3913221-1.3 +CGGAGATCCCCAGATCCTGCGCCATCGGCGTCA +>negative3931056-2.2.3.2.1 +ATATTCATCATCAAACTACCGTCGCCGGAAAAA +>negative3932517-2.2.3.2 +CGGCGCATTGCCGACGACCACCACCACACGCAC +>negative3939049-2.1.1.2 +GAGATCATTGCGATTGCCATTGGTTCCGGCGCC +>negative3943311-2.2.1.1.2 +AGGATCGGCACATCCTGGATAGCCATATTATTG +>negative4013058-2.1.1.2 +CGCGCAGGCGCCCAGCCCTCTTTGTCGGCGGAA +>negative4030144-2.2.3.2 +TTCCTTTCTCTTTCGTTTGGTTATTAGTATGCC +>negative4035893-2.1.2 +GTGTTGGGAGTTCGAGTCTCTCCGCCCCTGCCA +>negative4047770-2.2.2.1.2 +TCCTGTTGTAATGTGGGTTCATTCGCACAGATA +>negative4079393-2.2.3.2.1 +ACCAGGATCATGACGCGCCGCTATTGCAGCAGT +>negative4081360-2.2.2.2.1.5 +TGCTGGTGGTTGAGCCGCCCTATGATCCTGAGT +>negative4086376-2.2.2.1.3 +AGGTCTACCATCGCCCGTCGCGTTTCATCGGCG +>negative4101230-2.1.2 +CTTTTATTGCCTCGCAGATCGTTTTTCCTGTCA +>negative4120499-2.1.1.3 +GTCGTACCAGATGAAGCGAAGTGTGTATTTTGT +>negative4136269-2.2.2.2.2 +CCGCTGCTGGAAGATGAAAAAGTGCGCAAAGTG +>negative4136928-2.1.2 +GATTTTGGAGTATCGTGGTCTGGCGAAGCTAAA +>negative4145957-2.1 +CGGTCGTCGTGGTAACGAGATCTTCAATTCTGC +>negative4157035-2.2.2.2.1.2 +CCCAGGACTCCAGGCACATCTGCTCTTCTTCAC +>negative4166216-1.2 +TTTTTGTACTGTGAAACGATGATGCTATACGCC +>negative4178681-2.2.3.2 +TCCACGCCCATCGCCTGTGCGATGCGAGCAAAA +>negative4211912-2.2.1.1.1.1 +GGAGTGCCGAATCAAATTTCCAGCGCATTGTCC +>negative4215208-2.1.2 +TAAGCGCAGCGCCATCAGACATTGATTGGCAAT +>negative4241351-2.2 +GGGATTACGTCCATAACGAATCCATGTTTTTTG +>negative4282538-2.1.3 +TGTCTATCACTAAAGATCAAATCATTGAAGCAG +>negative4286359-2.2.3.2 +TGAAGATCGTGAAGGTCTATCTGGCCGTTAAAC +>negative4297704-2.2.3.1.2 +CGGCTCCGATTCCGGGTGGTTGATGTTGGCGGG +>negative4299778-2.2.3.2.1 +CGTGCATACCGTGCTGGCCGGGTTTGTTGAAGT +>negative4327318-2.1.2 +TTCCCGGCGAACCGTGTGGGTGACGACATCGAA +>negative4445342-2.2.2.2.5 +TCCCGGGATTTTCAGGCAACCCTCCCGGTGATG +>negative4472427-1.3 +GAGCTTTACCCCTGGCGCGATCGCGGACATATT +>negative4499501-1.1 +TTGGCCGCCTTCGGCGCCAGTTCAAACCAGGTT +>negative4516469-2.2.3.1.1 +TTTACTACGCTGCGGCGTATTGTTAACCCTGAT +>negative4525936-2.2.2.2.3 +CGGTAACGGTGTTGTGCTGTCTCCGGCCGCGCT +>negative4540735-2.2.2.2.3 +CGTAATACCGCGCGGGTTTTCACCGTAAGAACC +>negative4542157-2.1.1.1.1 +TGGGTGATGCGTATAGTTGGGTAGGCTACGCGG +>negative4579224-1.1 +TGATGATGGTCGCCATCGTTTTGCTGTTTGTGT +>negative4581051-2.2.1.2 +TGACCCTCTTTACGCTTAAACGCCAGCCCACCG +>negative4642573-1.2 +TACCAGGAAGTGCTGGAAGAGTTTAACGAACAT +>negative4696391-2.2.3.2.1 +TGCCGATATTATTGAGGTTCAACTGCTGGCCGT +>negative4707754-1.3 +CATCGCGAGCAGCCCGCGGGTATTCATCCAACC +>negative4738855-1.1 +CACATCAAAACCCTGCGCGCTAAGCTACGCGCG +>negative4744778-2.2 +CTGCTTTAATTTTTGACCATGAGTTGGTGACGT \ No newline at end of file diff --git a/tests/test_qc.py b/tests/test_qc.py index f9fec9a..212bc14 100644 --- a/tests/test_qc.py +++ b/tests/test_qc.py @@ -4,7 +4,7 @@ from bio_hansel.qc.const import QC from bio_hansel.subtype import Subtype -from bio_hansel.subtyper import subtype_reads_ac +from bio_hansel.subtyper import subtype_reads_ac, subtype_contigs_ac genome_name = 'test' @@ -44,3 +44,22 @@ def test_mixed_tiles(): assert QC.MIXED_SAMPLE_ERROR_2 in st.qc_message assert 'Mixed subtypes found: "1; 2; 2.1"' in st.qc_message assert st.qc_status == QC.FAIL + + +def test_mixed_subtype_positive_negative_tiles_same_target(): + scheme = 'heidelberg' + fasta = 'tests/data/fail-qc-mixed-subtype-pos-neg-tiles.fasta' + st, df = subtype_contigs_ac(fasta_path=fasta, genome_name=genome_name, scheme=scheme) + assert isinstance(st, Subtype) + assert isinstance(df, DataFrame) + assert st.scheme == scheme + assert st.qc_status == QC.FAIL + assert QC.MIXED_SAMPLE_ERROR_2 in st.qc_message + expected_qc_msg = 'FAIL: Mixed Sample Error 2: Mixed subtype detected. ' \ + 'Positive and negative tiles detected for the same ' \ + 'target site ' \ + '"202001; 600783; 1049933; 1193219; 2778621; 2904061; ' \ + '3278067; 3867228; 4499501; 4579224; 4738855; 202001; ' \ + '600783; 1049933; 1193219; 2778621; 2904061; 3278067; ' \ + '3867228; 4499501; 4579224; 4738855" for subtype "1.1".' + assert expected_qc_msg in st.qc_message