diff --git a/README.md b/README.md index e0a7843..47b92d3 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ venv/bin/flask load-genes \ venv/bin/flask load-snv 2 data/basis/vcfs/*.vcf.gz -venv/bin/flask load-qc-metrics +venv/bin/flask load-qc-metrics \ data/basis/qc_metrics/metrics.json.gz ``` diff --git a/bravo_api/__init__.py b/bravo_api/__init__.py index cb9213e..4947321 100644 --- a/bravo_api/__init__.py +++ b/bravo_api/__init__.py @@ -1,5 +1,5 @@ from logging.config import dictConfig -from os import getenv +from os import getenv, getcwd from flask import Flask from flask_cors import CORS from flask_pymongo import PyMongo @@ -43,7 +43,7 @@ def version(): def create_app(test_config=None): - instance_path = getenv('BRAVO_API_INSTANCE_DIR', None) + instance_path = getenv('BRAVO_API_INSTANCE_DIR', getcwd()) app = Flask(__name__, instance_relative_config=True, instance_path=instance_path) @@ -51,6 +51,7 @@ def create_app(test_config=None): app.version = pkg_resources.read_text(__package__, 'VERSION').strip() if test_config is None: + print(getcwd()) app.config.from_object('bravo_api.default_config') app.config.from_envvar('BRAVO_API_CONFIG_FILE', silent=True) else: diff --git a/bravo_api/blueprints/status/status.py b/bravo_api/blueprints/status/status.py index 89b1ffb..9bb8f8e 100644 --- a/bravo_api/blueprints/status/status.py +++ b/bravo_api/blueprints/status/status.py @@ -3,6 +3,7 @@ import sys from flask import Blueprint, Response, current_app, jsonify, make_response + bp = Blueprint('status', __name__) logger = logging.getLogger(__name__) @@ -38,11 +39,33 @@ def usage() -> Response: result = current_app.cache.get('usage') if result is None: result = usage_stats(current_app.mmongo.db) - current_app.cache.set('usage', result, timeout=3600) - logger.debug("Usage result updated") + current_app.cache.set('usage', result, timeout=0) + return make_response(jsonify(result)) + + +@bp.route('/counts', methods=['GET']) +def counts() -> Response: + result = current_app.cache.get('counts') + if result is None: + snv_count = count_collection(current_app.mmongo.db.snv) + transcript_count = count_collection(current_app.mmongo.db.transcripts) + gene_count = count_collection(current_app.mmongo.db.genes) + + result = {'snvs': snv_count, 'transcripts': transcript_count, 'genes': gene_count} + + current_app.cache.set('counts', result, timeout=3600) + logger.debug('variant counts updated') return make_response(jsonify(result)) +def count_collection(collection: pymongo.collection.Collection) -> int: + """ + Count (estimate) the number of snv in backing database + """ + result = collection.estimated_document_count() + return result + + def usage_stats(db: pymongo.database.Database) -> dict: """ Given a mongo database, run queries to compile statistics about user usage of API. diff --git a/bravo_api/blueprints/structvar/structvar.py b/bravo_api/blueprints/structvar/structvar.py index 9954c47..ecc4ae1 100644 --- a/bravo_api/blueprints/structvar/structvar.py +++ b/bravo_api/blueprints/structvar/structvar.py @@ -67,7 +67,8 @@ def sv_region(structvars: pymongo.collection.Collection, ]} ]} ]} - } + }, + {'$project': {'_id': 0}} ] cursor = structvars.aggregate(pipeline) diff --git a/tests/mongo_fixtures/snv.json b/tests/mongo_fixtures/snv.json index 1c5b74f..c5f4a72 100644 --- a/tests/mongo_fixtures/snv.json +++ b/tests/mongo_fixtures/snv.json @@ -1,51 +1,305 @@ [ { "_id": { - "$oid": "deadbeefdeadbeef00000004" + "$oid": "60afed33728788f0495b2fc2" }, - "chrom": "77", - "pos": 10510077, - "xpos": { - "$numberLong": "77010510077" + "chrom": "11", + "pos": 5200003, + "xpos": 11005200003, + "stop": 5200003, + "xstop": 11005200003, + "variant_id": "11-5200003-G-A", + "rsids": [ + "rs7933549" + ], + "site_quality": 255, + "filter": [], + "allele_count": 476, + "allele_num": 8344, + "allele_freq": 0.05704699829220772, + "hom_count": 42, + "het_count": 392, + "cadd_phred": 23.399999618530273, + "annotation": { + "region": { + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "hgvs": [ + "p.Ser227Leu" + ] + }, + "genes": [ + { + "name": "ENSG00000176742", + "transcripts": [ + { + "name": "ENST00000641270", + "biotype": "protein_coding", + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "HGVSp": "p.Ser227Leu", + "HGVSc": "c.680C>T" + } + ], + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "hgvs": [ + "p.Ser227Leu" + ] + } + ] + }, + "avg_dp": 32.438201904296875, + "avg_dp_alt": 32.956199645996094, + "dp_hist": [ + 0, + 1, + 6, + 92, + 513, + 1161, + 1157, + 767, + 317, + 103, + 24, + 14, + 6, + 5, + 3, + 1, + 0, + 1, + 0, + 1 + ], + "dp_hist_alt": [ + 0, + 0, + 0, + 9, + 64, + 106, + 112, + 82, + 38, + 15, + 4, + 3, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0 + ], + "qc_metrics": { + "ABE": [ + 0.5054510235786438, + 0.4960309863090515, + 0.4960309863090515 + ], + "ABZ": [ + 1.2525399923324585, + 0.7614099979400635, + 0.7614099979400635 + ], + "BQZ": [ + -5.947289943695068, + 0.032972101122140884, + 0.032972101122140884 + ], + "CYZ": [ + -1.6105899810791016, + 0.07342389971017838, + 0.07342389971017838 + ], + "FIBC_I": [ + 0.12631599605083466, + 0.9498550295829773, + 0.9498550295829773 + ], + "FIBC_P": [ + 0.1263660043478012, + 0.9494349956512451, + 0.9494349956512451 + ], + "HWE_SLP_I": [ + 5.752950191497803, + 0.9511139988899231, + 0.9511139988899231 + ], + "HWE_SLP_P": [ + 5.7529802322387695, + 0.9509239792823792, + 0.9509239792823792 + ], + "IOR": [ + -0.568792998790741, + 0.713936984539032, + 0.713936984539032 + ], + "NM0": [ + 0.056255001574754715, + 0.03701720014214516, + 0.03701720014214516 + ], + "NM1": [ + 0.060108400881290436, + 0.12085899710655212, + 0.12085899710655212 + ], + "NMZ": [ + -1.0713200569152832, + 0.24252000451087952, + 0.24252000451087952 + ], + "STZ": [ + -0.4767569899559021, + 0.23687200248241425, + 0.23687200248241425 + ], + "SVM": [ + [ + 60 + ], + 0, + 1 + ], + "QUAL": [ + 255, + 0.23694899678230286, + 1 + ] }, - "stop": 10510077, - "xstop": { - "$numberLong": "77010510077" + "pub_freq": [ + { + "ds": "1000G", + "ALL": 0.0529, + "AFR": 0.1936, + "AMR": 0.013, + "EAS": 0, + "EUR": 0, + "SAS": 0 + }, + { + "ds": "gnomAD", + "ALL": 0.01228, + "AFR": 0.1725, + "AMR": 0.005985, + "ASJ": 0, + "EAS": 5.44e-05, + "FIN": 0, + "NFE": 0.0003524, + "OTH": 0.004568, + "SAS": 0.000196 + } + ] + }, + { + "_id": { + "$oid": "60afed33728788f0495b2fc3" }, - "variant_id": "77-10510077-C-A", + "chrom": "11", + "pos": 5200010, + "xpos": 11005200010, + "stop": 5200010, + "xstop": 11005200010, + "variant_id": "11-5200010-G-A", "rsids": [], - "site_quality": 60, - "filter": [ - "SVM" - ], - "allele_count": 3, - "allele_num": 125568, - "allele_freq": 2.3891399905551225e-05, + "site_quality": 48, + "filter": [], + "allele_count": 1, + "allele_num": 8344, + "allele_freq": 0.00011984699813183397, "hom_count": 0, - "het_count": 3, - "cadd_phred": null, + "het_count": 1, + "cadd_phred": 13.949999809265137, "annotation": { "region": { "consequence": [ - "intergenic_variant" + "missense_variant" ], "_consequence": [ - 1 + 26 + ], + "hgvs": [ + "p.Leu225Phe" ] - } + }, + "genes": [ + { + "name": "ENSG00000176742", + "transcripts": [ + { + "name": "ENST00000641270", + "biotype": "protein_coding", + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "HGVSp": "p.Leu225Phe", + "HGVSc": "c.673C>T" + } + ], + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "hgvs": [ + "p.Leu225Phe" + ] + } + ] }, - "avg_dp": 4.511340141296387, - "avg_dp_alt": 7.666669845581055, - "avg_gq": 61.90449905395508, - "avg_gq_alt": 39, + "avg_dp": 32.72700119018555, + "avg_dp_alt": 30, "dp_hist": [ - 42293, - 16891, - 3101, - 420, - 66, - 11, + 0, + 0, + 4, + 71, + 468, + 1121, + 1216, + 801, + 314, + 114, + 32, + 13, + 8, + 4, + 2, + 2, 1, + 0, + 0, + 1 + ], + "dp_hist_alt": [ + 0, + 0, + 0, + 0, + 0, 1, 0, 0, @@ -58,19 +312,187 @@ 0, 0, 0, + 0, + 0, 0 ], - "dp_hist_alt": [ - 1, + "qc_metrics": { + "ABE": [ + 0.6546390056610107, + 0.930429995059967, + 0.9312700033187866 + ], + "ABZ": [ + 1.6711399555206299, + 0.8265910148620605, + 0.8274310231208801 + ], + "BQZ": [ + 1.1909799575805664, + 0.8331549763679504, + 0.8331549763679504 + ], + "CYZ": [ + 0.03093229979276657, + 0.5240039825439453, + 0.5240039825439453 + ], + "FIBC_I": [ + -0.030542699620127678, + 0.017821699380874634, + 0.018584899604320526 + ], + "FIBC_P": [ + -0.333391010761261, + 0.010036599822342396, + 0.010227399878203869 + ], + "HWE_SLP_I": [ + -1.0995699994964525e-05, + 0.7799190282821655, + 0.7807210087776184 + ], + "HWE_SLP_P": [ + -1.5571700714644976e-05, + 0.7798810005187988, + 0.7801100015640259 + ], + "IOR": [ + -0.701645016670227, + 0.5240799784660339, + 0.5240799784660339 + ], + "NM0": [ + 0.47655799984931946, + 0.31006699800491333, + 0.31006699800491333 + ], + "NM1": [ + 0.11029800027608871, + 0.2525950074195862, + 0.2525950074195862 + ], + "NMZ": [ + -0.19527900218963623, + 0.4084869921207428, + 0.4084869921207428 + ], + "STZ": [ + -1.0200200080871582, + 0.07311859726905823, + 0.07311859726905823 + ], + "SVM": [ + [ + 60 + ], + 0, + 1 + ], + "QUAL": [ + 48, + 0.028316300362348557, + 0.028965000063180923 + ] + } + }, + { + "_id": { + "$oid": "60afed33728788f0495b2fc4" + }, + "chrom": "11", + "pos": 5200016, + "xpos": 11005200016, + "stop": 5200016, + "xstop": 11005200016, + "variant_id": "11-5200016-G-A", + "rsids": [ + "rs548436244" + ], + "site_quality": 255, + "filter": [], + "allele_count": 3, + "allele_num": 8344, + "allele_freq": 0.00035953999031335115, + "hom_count": 1, + "het_count": 1, + "cadd_phred": 0.6639999747276306, + "annotation": { + "region": { + "consequence": [ + "synonymous_variant" + ], + "_consequence": [ + 20 + ], + "hgvs": [ + "c.667C>T" + ] + }, + "genes": [ + { + "name": "ENSG00000176742", + "transcripts": [ + { + "name": "ENST00000641270", + "biotype": "protein_coding", + "consequence": [ + "synonymous_variant" + ], + "_consequence": [ + 20 + ], + "HGVSp": "p.Leu223=", + "HGVSc": "c.667C>T" + } + ], + "consequence": [ + "synonymous_variant" + ], + "_consequence": [ + 20 + ], + "hgvs": [ + "c.667C>T" + ] + } + ] + }, + "avg_dp": 32.80179977416992, + "avg_dp_alt": 42, + "dp_hist": [ + 0, + 0, + 5, + 81, + 464, + 1077, + 1261, + 776, + 325, + 124, + 28, + 14, + 5, + 6, + 3, + 0, 1, 1, 0, + 1 + ], + "dp_hist_alt": [ + 0, + 0, 0, 0, 0, 0, 0, + 1, 0, + 1, 0, 0, 0, @@ -82,32 +504,210 @@ 0, 0 ], - "gq_hist": [ - 1, - 0, - 0, + "qc_metrics": { + "ABE": [ + 0.5623700022697449, + 0.7838500142097473, + 0.7839639782905579 + ], + "ABZ": [ + 0.8659890294075012, + 0.6741340160369873, + 0.6742479801177979 + ], + "BQZ": [ + 0.6593019962310791, + 0.7306140065193176, + 0.7306140065193176 + ], + "CYZ": [ + 1.712249994277954, + 0.9497399926185608, + 0.9497399926185608 + ], + "FIBC_I": [ + 0.4959920048713684, + 0.9963359832763672, + 0.9963359832763672 + ], + "FIBC_P": [ + 0.4998610019683838, + 0.9965649843215942, + 0.9965649843215942 + ], + "HWE_SLP_I": [ + 0.3211809992790222, + 0.8400629758834839, + 0.8401010036468506 + ], + "HWE_SLP_P": [ + 0.31913700699806213, + 0.8402150273323059, + 0.8403300046920776 + ], + "IOR": [ + -0.73888099193573, + 0.4657689929008484, + 0.4657689929008484 + ], + "NM0": [ + 0.3496350049972534, + 0.20619000494480133, + 0.20619000494480133 + ], + "NM1": [ + 0.10610300302505493, + 0.24580200016498566, + 0.24580200016498566 + ], + "NMZ": [ + -0.13334600627422333, + 0.4281409978866577, + 0.4281409978866577 + ], + "STZ": [ + -0.11316999793052673, + 0.4290179908275604, + 0.4290570020675659 + ], + "SVM": [ + [ + 60 + ], + 0, + 1 + ], + "QUAL": [ + 255, + 0.23694899678230286, + 1 + ] + }, + "pub_freq": [ + { + "ds": "gnomAD", + "ALL": 0.000223, + "AFR": 0.000123, + "AMR": 0.001041, + "ASJ": 0, + "EAS": 0, + "FIN": 0, + "NFE": 0.0001058, + "OTH": 0.0009798, + "SAS": 0 + } + ] + }, + { + "_id": { + "$oid": "60afed33728788f0495b2fc5" + }, + "chrom": "11", + "pos": 5200043, + "xpos": 11005200043, + "stop": 5200043, + "xstop": 11005200043, + "variant_id": "11-5200043-C-T", + "rsids": [ + "rs1400349848" + ], + "site_quality": 255, + "filter": [], + "allele_count": 1, + "allele_num": 8344, + "allele_freq": 0.00011984699813183397, + "hom_count": 0, + "het_count": 1, + "cadd_phred": 11.869999885559082, + "annotation": { + "region": { + "consequence": [ + "missense_variant", + "upstream_gene_variant" + ], + "_consequence": [ + 26, + 11 + ], + "hgvs": [ + "p.Ala214Thr" + ] + }, + "genes": [ + { + "name": "ENSG00000224091", + "transcripts": [ + { + "name": "ENST00000418080", + "biotype": "lncRNA", + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + } + ], + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + }, + { + "name": "ENSG00000176742", + "transcripts": [ + { + "name": "ENST00000641270", + "biotype": "protein_coding", + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "HGVSp": "p.Ala214Thr", + "HGVSc": "c.640G>A" + } + ], + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "hgvs": [ + "p.Ala214Thr" + ] + } + ] + }, + "avg_dp": 33.108299255371094, + "avg_dp_alt": 39, + "dp_hist": [ 0, 1, 4, - 282, - 1172, - 1933, - 2155, - 11995, - 15133, - 7924, - 10023, - 6105, - 2371, - 1928, - 977, - 444, - 336 - ], - "gq_hist_alt": [ + 65, + 445, + 1057, + 1216, + 858, + 355, + 105, + 29, + 15, + 9, + 4, + 5, + 2, 1, 0, 0, + 1 + ], + "dp_hist_alt": [ 0, 0, 0, @@ -116,7 +716,10 @@ 0, 0, 1, - 1, + 0, + 0, + 0, + 0, 0, 0, 0, @@ -128,90 +731,1436 @@ ], "qc_metrics": { "ABE": [ - 0.7013429999351501, - 0.9354339838027954, - 0.9354349970817566 + 0.5895140171051025, + 0.8484200239181519, + 0.8496789932250977 ], "ABZ": [ - 11.212699890136719, - 0.9492700099945068, - 0.9492700099945068 - ], - "AVGDP": [ - 4.511340141296387, - 0.00042815800406970084, - 0.0004281679866835475 + 1.1208399534225464, + 0.735040009021759, + 0.736299991607666 ], "BQZ": [ - -17.79129981994629, - 0.01255439966917038, - 0.012554500252008438 + -0.3053860068321228, + 0.32006600499153137, + 0.32006600499153137 ], "CYZ": [ - 1.1288000345230103, - 0.8752790093421936, - 0.8752809762954712 - ], - "DP": [ - 283240, - 0.00042815800406970084, - 0.0004281679866835475 + -0.5941309928894043, + 0.2863300144672394, + 0.2863300144672394 ], "FIBC_I": [ - 0.1082649976015091, - 0.7980070114135742, - 0.7980080246925354 + -0.01541849970817566, + 0.03713170066475868, + 0.354449987411499 ], "FIBC_P": [ - 0.005319979973137379, - 0.927482008934021, - 0.927482008934021 + -5.9926900576101616e-05, + 0.5020989775657654, + 0.7904520034790039 ], "HWE_SLP_I": [ - 0.34643200039863586, - 0.8737350106239319, - 0.8737360239028931 + -2.139309981430415e-05, + 0.4964889883995056, + 0.7690050005912781 ], "HWE_SLP_P": [ - -0.252377986907959, - 0.34620898962020874, - 0.34620898962020874 + -2.0764899090863764e-05, + 0.49706199765205383, + 0.7690809965133667 ], "IOR": [ - -0.5560309886932373, - 0.5153970122337341, - 0.5153989791870117 + -0.7243160009384155, + 0.48916199803352356, + 0.48916199803352356 ], "NM0": [ - 0.2963100075721741, - 0.5862969756126404, - 0.5862979888916016 + 0.37397798895835876, + 0.22561399638652802, + 0.22565299272537231 ], "NM1": [ - 1.0928900241851807, - 0.5506500005722046, - 0.5506629943847656 + 0.09031129628419876, + 0.21550099551677704, + 0.21550099551677704 ], "NMZ": [ - 8.484000205993652, - 0.9596289992332458, - 0.9596289992332458 - ], - "QUAL": [ - 60, - 0.03607510030269623, - 0.03690670058131218 + -0.7514899969100952, + 0.2774389982223511, + 0.2774769961833954 ], "STZ": [ - -4.797810077667236, - 0.012951799668371677, - 0.012951799668371677 + 0.6464120149612427, + 0.8243020176887512, + 0.8243780136108398 ], "SVM": [ - -1.7629499435424805, - 0.01810220070183277, - 0.018102599307894707 + [ + 60 + ], + 0, + 1 + ], + "QUAL": [ + 255, + 0.23694899678230286, + 1 ] } + }, + { + "_id": { + "$oid": "60afed33728788f0495b2fc6" + }, + "chrom": "11", + "pos": 5200050, + "xpos": 11005200050, + "stop": 5200050, + "xstop": 11005200050, + "variant_id": "11-5200050-C-T", + "rsids": [ + "rs201823376" + ], + "site_quality": 255, + "filter": [], + "allele_count": 1, + "allele_num": 8344, + "allele_freq": 0.00011984699813183397, + "hom_count": 0, + "het_count": 1, + "cadd_phred": 7.570000171661377, + "annotation": { + "region": { + "consequence": [ + "synonymous_variant", + "upstream_gene_variant" + ], + "_consequence": [ + 20, + 11 + ], + "hgvs": [ + "c.633G>A" + ] + }, + "genes": [ + { + "name": "ENSG00000224091", + "transcripts": [ + { + "name": "ENST00000418080", + "biotype": "lncRNA", + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + } + ], + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + }, + { + "name": "ENSG00000176742", + "transcripts": [ + { + "name": "ENST00000641270", + "biotype": "protein_coding", + "consequence": [ + "synonymous_variant" + ], + "_consequence": [ + 20 + ], + "HGVSp": "p.Leu211=", + "HGVSc": "c.633G>A" + } + ], + "consequence": [ + "synonymous_variant" + ], + "_consequence": [ + 20 + ], + "hgvs": [ + "c.633G>A" + ] + } + ] + }, + "avg_dp": 33.43339920043945, + "avg_dp_alt": 38, + "dp_hist": [ + 0, + 1, + 2, + 58, + 421, + 1022, + 1201, + 875, + 398, + 127, + 32, + 9, + 14, + 3, + 3, + 5, + 0, + 0, + 0, + 1 + ], + "dp_hist_alt": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "qc_metrics": { + "ABE": [ + 0.44750601053237915, + 0.14062699675559998, + 0.14196300506591797 + ], + "ABZ": [ + -0.6488519906997681, + 0.2162269949913025, + 0.21752400696277618 + ], + "BQZ": [ + 0, + 0.3658980131149292, + 0.6231489777565002 + ], + "CYZ": [ + 1.227370023727417, + 0.8895210027694702, + 0.8895589709281921 + ], + "FIBC_I": [ + -0.01541849970817566, + 0.03713170066475868, + 0.354449987411499 + ], + "FIBC_P": [ + -5.992699880152941e-05, + 0.4742409884929657, + 0.5020610094070435 + ], + "HWE_SLP_I": [ + -2.1393199858721346e-05, + 0.44607698917388916, + 0.4964509904384613 + ], + "HWE_SLP_P": [ + -2.076499913528096e-05, + 0.44611498713493347, + 0.49702298641204834 + ], + "IOR": [ + -0.6763389706611633, + 0.5635780096054077, + 0.5635780096054077 + ], + "NM0": [ + 0.48889198899269104, + 0.3202950060367584, + 0.3202950060367584 + ], + "NM1": [ + 0.08776970207691193, + 0.20981499552726746, + 0.20981499552726746 + ], + "NMZ": [ + -0.0925062969326973, + 0.4395129978656769, + 0.4395129978656769 + ], + "STZ": [ + -1.1186100244522095, + 0.05979999899864197, + 0.05979999899864197 + ], + "SVM": [ + [ + 60 + ], + 0, + 1 + ], + "QUAL": [ + 255, + 0.23694899678230286, + 1 + ] + }, + "pub_freq": [ + { + "ds": "1000G", + "ALL": 0.0002, + "AFR": 0.0008, + "AMR": 0, + "EAS": 0, + "EUR": 0, + "SAS": 0 + }, + { + "ds": "gnomAD", + "ALL": 3.984e-06, + "AFR": 6.155e-05, + "AMR": 0, + "ASJ": 0, + "EAS": 0, + "FIN": 0, + "NFE": 0, + "OTH": 0, + "SAS": 0 + } + ] + }, + { + "_id": { + "$oid": "60afed33728788f0495b2fc7" + }, + "chrom": "11", + "pos": 5200078, + "xpos": 11005200078, + "stop": 5200078, + "xstop": 11005200078, + "variant_id": "11-5200078-G-T", + "rsids": [ + "rs148749797" + ], + "site_quality": 255, + "filter": [], + "allele_count": 2, + "allele_num": 8344, + "allele_freq": 0.0002396930067334324, + "hom_count": 0, + "het_count": 2, + "cadd_phred": 18.889999389648438, + "annotation": { + "region": { + "consequence": [ + "missense_variant", + "upstream_gene_variant" + ], + "_consequence": [ + 26, + 11 + ], + "hgvs": [ + "p.Ala202Asp" + ] + }, + "genes": [ + { + "name": "ENSG00000224091", + "transcripts": [ + { + "name": "ENST00000418080", + "biotype": "lncRNA", + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + } + ], + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + }, + { + "name": "ENSG00000176742", + "transcripts": [ + { + "name": "ENST00000641270", + "biotype": "protein_coding", + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "HGVSp": "p.Ala202Asp", + "HGVSc": "c.605C>A" + } + ], + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "hgvs": [ + "p.Ala202Asp" + ] + } + ] + }, + "avg_dp": 33.902198791503906, + "avg_dp_alt": 33.5, + "dp_hist": [ + 0, + 0, + 2, + 60, + 362, + 967, + 1231, + 904, + 423, + 142, + 37, + 15, + 11, + 8, + 7, + 1, + 1, + 0, + 0, + 1 + ], + "dp_hist_alt": [ + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "qc_metrics": { + "ABE": [ + 0.5062609910964966, + 0.5032060146331787, + 0.5032439827919006 + ], + "ABZ": [ + 0.11611299961805344, + 0.44554299116134644, + 0.44554299116134644 + ], + "BQZ": [ + -0.6708790063858032, + 0.2571359872817993, + 0.2571359872817993 + ], + "CYZ": [ + 1.2282500267028809, + 0.8897110223770142, + 0.8897110223770142 + ], + "FIBC_I": [ + -0.010463600046932697, + 0.40963199734687805, + 0.5373610258102417 + ], + "FIBC_P": [ + -0.00015982099284883589, + 0.30258700251579285, + 0.42482098937034607 + ], + "HWE_SLP_I": [ + -8.433649782091379e-05, + 0.2889249920845032, + 0.3564339876174927 + ], + "HWE_SLP_P": [ + -8.306570089189336e-05, + 0.29606199264526367, + 0.39990100264549255 + ], + "IOR": [ + -1.0068600177764893, + 0.14848899841308594, + 0.14848899841308594 + ], + "NM0": [ + 0.5312269926071167, + 0.3575409948825836, + 0.35757899284362793 + ], + "NM1": [ + 0.08758030086755753, + 0.20935699343681335, + 0.20935699343681335 + ], + "NMZ": [ + -0.0012556399451568723, + 0.4612269997596741, + 0.4612269997596741 + ], + "STZ": [ + -1.7651300430297852, + 0.022592000663280487, + 0.022592000663280487 + ], + "SVM": [ + [ + 60 + ], + 0, + 1 + ], + "QUAL": [ + 255, + 0.23694899678230286, + 1 + ] + }, + "pub_freq": [ + { + "ds": "1000G", + "ALL": 0.0004, + "AFR": 0, + "AMR": 0, + "EAS": 0, + "EUR": 0.002, + "SAS": 0 + }, + { + "ds": "gnomAD", + "ALL": 0.000571, + "AFR": 0.0001234, + "AMR": 2.893e-05, + "ASJ": 0.0006967, + "EAS": 0, + "FIN": 0.0002311, + "NFE": 0.001097, + "OTH": 0.0006544, + "SAS": 0 + } + ] + }, + { + "_id": { + "$oid": "60afed33728788f0495b2fc8" + }, + "chrom": "11", + "pos": 5200088, + "xpos": 11005200088, + "stop": 5200088, + "xstop": 11005200088, + "variant_id": "11-5200088-T-TATTGAATCC", + "rsids": [ + "rs574256417" + ], + "site_quality": 255, + "filter": [], + "allele_count": 31, + "allele_num": 8344, + "allele_freq": 0.003715239930897951, + "hom_count": 1, + "het_count": 29, + "cadd_phred": 6.098999977111816, + "annotation": { + "region": { + "consequence": [ + "inframe_insertion", + "upstream_gene_variant" + ], + "_consequence": [ + 28, + 11 + ], + "hgvs": [ + "p.Asn198_Ser199insGlyPheAsn" + ] + }, + "genes": [ + { + "name": "ENSG00000224091", + "transcripts": [ + { + "name": "ENST00000418080", + "biotype": "lncRNA", + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + } + ], + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + }, + { + "name": "ENSG00000176742", + "transcripts": [ + { + "name": "ENST00000641270", + "biotype": "protein_coding", + "consequence": [ + "inframe_insertion" + ], + "_consequence": [ + 28 + ], + "HGVSp": "p.Asn198_Ser199insGlyPheAsn", + "HGVSc": "c.594_595insGGATTCAAT" + } + ], + "consequence": [ + "inframe_insertion" + ], + "_consequence": [ + 28 + ], + "hgvs": [ + "p.Asn198_Ser199insGlyPheAsn" + ] + } + ] + }, + "avg_dp": 34.34519958496094, + "avg_dp_alt": 32.900001525878906, + "dp_hist": [ + 0, + 0, + 2, + 48, + 334, + 895, + 1242, + 929, + 472, + 157, + 49, + 15, + 10, + 8, + 7, + 2, + 1, + 0, + 0, + 1 + ], + "dp_hist_alt": [ + 0, + 0, + 0, + 0, + 4, + 8, + 9, + 5, + 2, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "qc_metrics": { + "ABE": [ + 0.5472049713134766, + 0.7323309779167175, + 0.7323309779167175 + ], + "ABZ": [ + 2.8378100395202637, + 0.9026100039482117, + 0.9026100039482117 + ], + "BQZ": [ + 0, + 0.3658980131149292, + 0.6231489777565002 + ], + "CYZ": [ + -0.3053460121154785, + 0.38650599122047424, + 0.38650599122047424 + ], + "FIBC_I": [ + 0.05780189856886864, + 0.8680739998817444, + 0.8680739998817444 + ], + "FIBC_P": [ + 0.058706700801849365, + 0.8687599897384644, + 0.8687599897384644 + ], + "HWE_SLP_I": [ + 0.2966870069503784, + 0.8245689868927002, + 0.8245689868927002 + ], + "HWE_SLP_P": [ + 0.29650598764419556, + 0.8245310187339783, + 0.8245310187339783 + ], + "IOR": [ + -0.3110339939594269, + 0.8726909756660461, + 0.8726909756660461 + ], + "NM0": [ + 0.02396940067410469, + 0.012402700260281563, + 0.012402700260281563 + ], + "NM1": [ + 0.08848919719457626, + 0.2117999941110611, + 0.2117999941110611 + ], + "NMZ": [ + -0.8920080065727234, + 0.260342001914978, + 0.260342001914978 + ], + "STZ": [ + 0.4193579852581024, + 0.7314149737358093, + 0.7314149737358093 + ], + "SVM": [ + [ + 60 + ], + 0, + 1 + ], + "QUAL": [ + 255, + 0.23694899678230286, + 1 + ] + }, + "pub_freq": [ + { + "ds": "gnomAD", + "ALL": 0.002212, + "AFR": 6.184e-05, + "AMR": 0, + "ASJ": 0, + "EAS": 5.445e-05, + "FIN": 0, + "NFE": 3.541e-05, + "OTH": 0.0009807, + "SAS": 0.01771 + } + ] + }, + { + "_id": { + "$oid": "60afed33728788f0495b2fc9" + }, + "chrom": "11", + "pos": 5200089, + "xpos": 11005200089, + "stop": 5200089, + "xstop": 11005200089, + "variant_id": "11-5200089-A-G", + "rsids": [ + "rs568258579" + ], + "site_quality": 255, + "filter": [], + "allele_count": 1, + "allele_num": 8344, + "allele_freq": 0.00011984699813183397, + "hom_count": 0, + "het_count": 1, + "cadd_phred": 0.017000000923871994, + "annotation": { + "region": { + "consequence": [ + "synonymous_variant", + "upstream_gene_variant" + ], + "_consequence": [ + 20, + 11 + ], + "hgvs": [ + "c.594T>C" + ] + }, + "genes": [ + { + "name": "ENSG00000224091", + "transcripts": [ + { + "name": "ENST00000418080", + "biotype": "lncRNA", + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + } + ], + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + }, + { + "name": "ENSG00000176742", + "transcripts": [ + { + "name": "ENST00000641270", + "biotype": "protein_coding", + "consequence": [ + "synonymous_variant" + ], + "_consequence": [ + 20 + ], + "HGVSp": "p.Asn198=", + "HGVSc": "c.594T>C" + } + ], + "consequence": [ + "synonymous_variant" + ], + "_consequence": [ + 20 + ], + "hgvs": [ + "c.594T>C" + ] + } + ] + }, + "avg_dp": 34.34830093383789, + "avg_dp_alt": 76, + "dp_hist": [ + 0, + 0, + 2, + 49, + 334, + 899, + 1224, + 938, + 475, + 159, + 48, + 15, + 10, + 8, + 7, + 2, + 1, + 0, + 0, + 1 + ], + "dp_hist_alt": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0 + ], + "qc_metrics": { + "ABE": [ + 0.5394219756126404, + 0.7015720009803772, + 0.7015720009803772 + ], + "ABZ": [ + 0.6882290244102478, + 0.6298279762268066, + 0.6299039721488953 + ], + "BQZ": [ + -1.979830026626587, + 0.0913219004869461, + 0.0913219004869461 + ], + "CYZ": [ + -0.46217501163482666, + 0.3309420049190521, + 0.3309420049190521 + ], + "FIBC_I": [ + -0.01541849970817566, + 0.03713170066475868, + 0.354449987411499 + ], + "FIBC_P": [ + -5.9926900576101616e-05, + 0.5020989775657654, + 0.7904520034790039 + ], + "HWE_SLP_I": [ + -2.139309981430415e-05, + 0.4964889883995056, + 0.7690050005912781 + ], + "HWE_SLP_P": [ + -2.0764899090863764e-05, + 0.49706199765205383, + 0.7690809965133667 + ], + "IOR": [ + -0.2783510088920593, + 0.8804380297660828, + 0.8804380297660828 + ], + "NM0": [ + 0.3000580072402954, + 0.16951599717140198, + 0.16951599717140198 + ], + "NM1": [ + 0.08997999876737595, + 0.21481500566005707, + 0.21481500566005707 + ], + "NMZ": [ + 1.3733700513839722, + 0.7300029993057251, + 0.7300029993057251 + ], + "STZ": [ + 1.4134299755096436, + 0.9629449844360352, + 0.9629449844360352 + ], + "SVM": [ + [ + 60 + ], + 0, + 1 + ], + "QUAL": [ + 255, + 0.23694899678230286, + 1 + ] + }, + "pub_freq": [ + { + "ds": "gnomAD", + "ALL": 5.214e-05, + "AFR": 0, + "AMR": 2.893e-05, + "ASJ": 0, + "EAS": 0, + "FIN": 0, + "NFE": 0, + "OTH": 0.0003274, + "SAS": 0.0003385 + } + ] + }, + { + "_id": { + "$oid": "60afed33728788f0495b2fca" + }, + "chrom": "11", + "pos": 5200091, + "xpos": 11005200091, + "stop": 5200091, + "xstop": 11005200091, + "variant_id": "11-5200091-T-C", + "rsids": [ + "rs563721121" + ], + "site_quality": 255, + "filter": [], + "allele_count": 2, + "allele_num": 8344, + "allele_freq": 0.0002396930067334324, + "hom_count": 0, + "het_count": 2, + "cadd_phred": null, + "annotation": { + "region": { + "consequence": [ + "missense_variant", + "upstream_gene_variant" + ], + "_consequence": [ + 26, + 11 + ], + "hgvs": [ + "p.Asn198Asp" + ] + }, + "genes": [ + { + "name": "ENSG00000224091", + "transcripts": [ + { + "name": "ENST00000418080", + "biotype": "lncRNA", + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + } + ], + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + }, + { + "name": "ENSG00000176742", + "transcripts": [ + { + "name": "ENST00000641270", + "biotype": "protein_coding", + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "HGVSp": "p.Asn198Asp", + "HGVSc": "c.592A>G" + } + ], + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "hgvs": [ + "p.Asn198Asp" + ] + } + ] + }, + "avg_dp": 34.27899932861328, + "avg_dp_alt": 44.5, + "dp_hist": [ + 0, + 0, + 2, + 47, + 342, + 897, + 1242, + 933, + 474, + 148, + 44, + 15, + 12, + 5, + 7, + 2, + 1, + 0, + 0, + 1 + ], + "dp_hist_alt": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "qc_metrics": { + "ABE": [ + 0.5362719893455505, + 0.6869180202484131, + 0.6869180202484131 + ], + "ABZ": [ + 0.6531850099563599, + 0.6184930205345154, + 0.6184930205345154 + ], + "BQZ": [ + -3.776240110397339, + 0.049038298428058624, + 0.049038298428058624 + ], + "CYZ": [ + -0.6275069713592529, + 0.27556899189949036, + 0.27556899189949036 + ], + "FIBC_I": [ + -0.010463600046932697, + 0.40963199734687805, + 0.5373610258102417 + ], + "FIBC_P": [ + -0.00015982099284883589, + 0.30258700251579285, + 0.42482098937034607 + ], + "HWE_SLP_I": [ + -8.433640323346481e-05, + 0.356471985578537, + 0.40066400170326233 + ], + "HWE_SLP_P": [ + -8.306570089189336e-05, + 0.29606199264526367, + 0.39990100264549255 + ], + "IOR": [ + -0.5333170294761658, + 0.750495970249176, + 0.750495970249176 + ], + "NM0": [ + 0.4057430028915405, + 0.25034299492836, + 0.25034299492836 + ], + "NM1": [ + 0.0910836011171341, + 0.21698999404907227, + 0.21698999404907227 + ], + "NMZ": [ + 3.959280014038086, + 0.8379259705543518, + 0.8379259705543518 + ], + "STZ": [ + 0.05351340025663376, + 0.5315600037574768, + 0.5315600037574768 + ], + "SVM": [ + [ + 60 + ], + 0, + 1 + ], + "QUAL": [ + 255, + 0.23694899678230286, + 1 + ] + } + }, + { + "_id": { + "$oid": "60afed33728788f0495b2fcb" + }, + "chrom": "11", + "pos": 5200118, + "xpos": 11005200118, + "stop": 5200118, + "xstop": 11005200118, + "variant_id": "11-5200118-G-A", + "rsids": [ + "rs151191536" + ], + "site_quality": 70, + "filter": [], + "allele_count": 1, + "allele_num": 8344, + "allele_freq": 0.00011984699813183397, + "hom_count": 0, + "het_count": 1, + "cadd_phred": 18.59000015258789, + "annotation": { + "region": { + "consequence": [ + "missense_variant", + "upstream_gene_variant" + ], + "_consequence": [ + 26, + 11 + ], + "hgvs": [ + "p.Arg189Cys" + ] + }, + "genes": [ + { + "name": "ENSG00000224091", + "transcripts": [ + { + "name": "ENST00000418080", + "biotype": "lncRNA", + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + } + ], + "consequence": [ + "upstream_gene_variant" + ], + "_consequence": [ + 11 + ] + }, + { + "name": "ENSG00000176742", + "transcripts": [ + { + "name": "ENST00000641270", + "biotype": "protein_coding", + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "HGVSp": "p.Arg189Cys", + "HGVSc": "c.565C>T" + } + ], + "consequence": [ + "missense_variant" + ], + "_consequence": [ + 26 + ], + "hgvs": [ + "p.Arg189Cys" + ] + } + ] + }, + "avg_dp": 34.299400329589844, + "avg_dp_alt": 32, + "dp_hist": [ + 0, + 0, + 0, + 54, + 320, + 929, + 1237, + 925, + 455, + 160, + 50, + 15, + 9, + 8, + 5, + 1, + 3, + 0, + 0, + 1 + ], + "dp_hist_alt": [ + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "qc_metrics": { + "ABE": [ + 0.6768490076065063, + 0.9456189870834351, + 0.9461910128593445 + ], + "ABZ": [ + 1.9755300283432007, + 0.8582280278205872, + 0.8587999939918518 + ], + "BQZ": [ + 0.655102014541626, + 0.7300029993057251, + 0.7300029993057251 + ], + "CYZ": [ + 0.4657289981842041, + 0.6855059862136841, + 0.6855059862136841 + ], + "FIBC_I": [ + -0.030542999505996704, + 0.008853609673678875, + 0.0154557004570961 + ], + "FIBC_P": [ + -0.3334130048751831, + 0.0010303800227120519, + 0.0035872398875653744 + ], + "HWE_SLP_I": [ + -1.0995900083798915e-05, + 0.770950973033905, + 0.778659999370575 + ], + "HWE_SLP_P": [ + -1.5572399206575938e-05, + 0.7709130048751831, + 0.7725160121917725 + ], + "IOR": [ + -0.8276879787445068, + 0.33193400502204895, + 0.33193400502204895 + ], + "NM0": [ + 0.3692089915275574, + 0.2219890058040619, + 0.2219890058040619 + ], + "NM1": [ + 0.10001400113105774, + 0.23503999412059784, + 0.23503999412059784 + ], + "NMZ": [ + -0.3444179892539978, + 0.3548310101032257, + 0.35486900806427 + ], + "STZ": [ + -0.2361290007829666, + 0.35807499289512634, + 0.35815098881721497 + ], + "SVM": [ + [ + 60 + ], + 0, + 1 + ], + "QUAL": [ + 70, + 0.047893501818180084, + 0.049000199884176254 + ] + }, + "pub_freq": [ + { + "ds": "gnomAD", + "ALL": 0.0001002, + "AFR": 6.259e-05, + "AMR": 8.679e-05, + "ASJ": 0, + "EAS": 0.0001089, + "FIN": 0, + "NFE": 4.451e-05, + "OTH": 0.000164, + "SAS": 0.0004248 + } + ] } ] diff --git a/tests/mongo_fixtures/variants.json b/tests/mongo_fixtures/variants.json deleted file mode 100644 index 8c1528d..0000000 --- a/tests/mongo_fixtures/variants.json +++ /dev/null @@ -1,443 +0,0 @@ -[ - { - "_id": { - "$oid": "deadbeefdeadbeef00000007" - }, - "allele_count": 3, - "pos": 10510077, - "genotype_depths": [ - [ - [ - 2.5, - 42293 - ], - [ - 7.5, - 16891 - ], - [ - 12.5, - 3101 - ], - [ - 17.5, - 420 - ], - [ - 22.5, - 66 - ], - [ - 27.5, - 11 - ], - [ - 32.5, - 1 - ], - [ - 37.5, - 1 - ], - [ - 42.5, - 0 - ], - [ - 47.5, - 0 - ], - [ - 52.5, - 0 - ], - [ - 57.5, - 0 - ], - [ - 62.5, - 0 - ], - [ - 67.5, - 0 - ], - [ - 72.5, - 0 - ], - [ - 77.5, - 0 - ], - [ - 82.5, - 0 - ], - [ - 87.5, - 0 - ], - [ - 92.5, - 0 - ], - [ - 97.5, - 0 - ] - ], - [ - [ - 2.5, - 1 - ], - [ - 7.5, - 1 - ], - [ - 12.5, - 1 - ], - [ - 17.5, - 0 - ], - [ - 22.5, - 0 - ], - [ - 27.5, - 0 - ], - [ - 32.5, - 0 - ], - [ - 37.5, - 0 - ], - [ - 42.5, - 0 - ], - [ - 47.5, - 0 - ], - [ - 52.5, - 0 - ], - [ - 57.5, - 0 - ], - [ - 62.5, - 0 - ], - [ - 67.5, - 0 - ], - [ - 72.5, - 0 - ], - [ - 77.5, - 0 - ], - [ - 82.5, - 0 - ], - [ - 87.5, - 0 - ], - [ - 92.5, - 0 - ], - [ - 97.5, - 0 - ] - ] - ], - "quality_metrics": { - "BQZ": -17.79129981994629, - "SVM": -1.7629499435424805, - "FIBC_P": 0.005319979973137379, - "NM0": 0.2963100075721741, - "NM1": 1.0928900241851807, - "CYZ": 1.1288000345230103, - "NMZ": 8.484000205993652, - "IOR": -0.5560309886932373, - "HWE_SLP_I": 0.34643200039863586, - "STZ": -4.797810077667236, - "ABZ": 11.212699890136719, - "FIBC_I": 0.1082649976015091, - "HWE_SLP_P": -0.252377986907959, - "DP": 283240, - "ABE": 0.7013429999351501 - }, - "cadd_phred": null, - "variant_id": "11-11111111-C-A", - "worst_csqidx": 34, - "avggq_alt": 39, - "alt": "A", - "allele_freq": 2.3891399905551225e-05, - "genotype_qualities": [ - [ - [ - 2.5, - 1 - ], - [ - 7.5, - 0 - ], - [ - 12.5, - 0 - ], - [ - 17.5, - 0 - ], - [ - 22.5, - 1 - ], - [ - 27.5, - 4 - ], - [ - 32.5, - 282 - ], - [ - 37.5, - 1172 - ], - [ - 42.5, - 1933 - ], - [ - 47.5, - 2155 - ], - [ - 52.5, - 11995 - ], - [ - 57.5, - 15133 - ], - [ - 62.5, - 7924 - ], - [ - 67.5, - 10023 - ], - [ - 72.5, - 6105 - ], - [ - 77.5, - 2371 - ], - [ - 82.5, - 1928 - ], - [ - 87.5, - 977 - ], - [ - 92.5, - 444 - ], - [ - 97.5, - 336 - ] - ], - [ - [ - 2.5, - 1 - ], - [ - 7.5, - 0 - ], - [ - 12.5, - 0 - ], - [ - 17.5, - 0 - ], - [ - 22.5, - 0 - ], - [ - 27.5, - 0 - ], - [ - 32.5, - 0 - ], - [ - 37.5, - 0 - ], - [ - 42.5, - 0 - ], - [ - 47.5, - 0 - ], - [ - 52.5, - 1 - ], - [ - 57.5, - 1 - ], - [ - 62.5, - 0 - ], - [ - 67.5, - 0 - ], - [ - 72.5, - 0 - ], - [ - 77.5, - 0 - ], - [ - 82.5, - 0 - ], - [ - 87.5, - 0 - ], - [ - 92.5, - 0 - ], - [ - 97.5, - 0 - ] - ] - ], - "vep_annotations": [ - { - "AMR_AF": "", - "SYMBOL": "", - "AFR_AF": "", - "Feature": "", - "Codons": "", - "MOTIF_NAME": "", - "DOMAINS": "", - "SIFT": "", - "CDS_position": "", - "CCDS": "", - "worst_csqidx": 34, - "Allele": "A", - "PolyPhen": "", - "MOTIF_SCORE_CHANGE": "", - "IMPACT": "MODIFIER", - "HGVSp": "", - "ENSP": "", - "LoF": "", - "INTRON": "", - "Existing_variation": "", - "HGVSc": "", - "LoF_filter": "", - "MOTIF_POS": "", - "HIGH_INF_POS": "", - "LoF_flags": "", - "UNIPARC": "", - "cDNA_position": "", - "PUBMED": "", - "ALLELE_NUM": "1", - "EAS_AF": "", - "Feature_type": "", - "AF": "", - "HGNC_ID": "", - "SAS_AF": "", - "LoF_info": "", - "SWISSPROT": "", - "FLAGS": "", - "Consequence": "intergenic_variant", - "Protein_position": "", - "Gene": "", - "STRAND": "", - "EUR_AF": "", - "DISTANCE": "", - "PHENO": "", - "SYMBOL_SOURCE": "", - "Amino_acids": "", - "TREMBL": "", - "CLIN_SIG": "", - "HGVS_OFFSET": "", - "BIOTYPE": "", - "HGVS": "", - "EXON": "", - "SOMATIC": "", - "CANONICAL": false - } - ], - "worst_csq_HGVS": "", - "worst_csq_CANONICAL": false, - "avggq": 61.90449905395508, - "ref": "C", - "avgdp_alt": 7.666669845581055, - "xpos": { - "$numberLong": "77010510077" - }, - "site_quality": 60, - "genes": [], - "rsids": [], - "hom_count": 0, - "chrom": "77", - "allele_num": 125568, - "cadd_raw": null, - "filter": "", - "avgdp": 4.511340141296387, - "xstop": { - "$numberLong": "77010510077" - }, - "transcripts": [] - } -] diff --git a/tests/status/test_counts.py b/tests/status/test_counts.py new file mode 100644 index 0000000..d44e697 --- /dev/null +++ b/tests/status/test_counts.py @@ -0,0 +1,73 @@ +from bravo_api.blueprints.status import status +from flask import Flask + +app = Flask('dummy') +app.register_blueprint(status.bp) + +# Total expected documents counts set at time of fixuture creating in tests/mongo_fixtures/ +TOTAL_SNV_EXPECTED = 10 +TOTAL_TRANSCRIPT_EXPECTED = 1 +TOTAL_GENE_EXPECTED = 3 + + +def test_count_collection(mongodb): + snv_result = status.count_collection(mongodb.snv) + trx_result = status.count_collection(mongodb.transcripts) + gene_result = status.count_collection(mongodb.genes) + + assert(snv_result == TOTAL_SNV_EXPECTED) + assert(trx_result == TOTAL_TRANSCRIPT_EXPECTED) + assert(gene_result == TOTAL_GENE_EXPECTED) + + +def test_counts_aggregation(mocker, mongodb): + # Mock PyMongo's database with pytest-mongo's fixtures + pymongo_mock = mocker.Mock() + pymongo_mock.db = mongodb + app.mmongo = pymongo_mock + + # Mock the app's cache to return None (no caching) + cache_mock = mocker.Mock() + cache_mock.get = mocker.Mock(return_value=None) + app.cache = cache_mock + + expected = {'snvs': TOTAL_SNV_EXPECTED, + 'transcripts': TOTAL_TRANSCRIPT_EXPECTED, + 'genes': TOTAL_GENE_EXPECTED} + + with app.test_client() as client: + resp = client.get('/counts') + content = resp.get_json() + + assert(resp.content_type == 'application/json') + assert(content == expected) + + +def test_counts_cached_value_skips_query(mocker): + # Mock the app's cache to return cached value + cached = {'snvs': 300, 'transcripts': 200, 'genes': 100} + + cache_mock = mocker.Mock() + cache_mock.get = mocker.Mock(return_value=cached) + app.cache = cache_mock + + spy = mocker.spy(status, 'count_collection') + + with app.test_client() as client: + client.get('/counts') + + spy.assert_not_called() + + +def test_no_counts_cached_does_query(mocker): + # Mock the app's cache to return cached value + cache_mock = mocker.Mock() + cache_mock.get = mocker.Mock(return_value=None) + app.cache = cache_mock + + spy = mocker.spy(status, 'count_collection') + + with app.test_client() as client: + client.get('/counts') + + spy.assert_called() diff --git a/tests/status/test_status.py b/tests/status/test_usage_status.py similarity index 100% rename from tests/status/test_status.py rename to tests/status/test_usage_status.py diff --git a/tests/structvar/test_structvar.py b/tests/structvar/test_structvar.py index f68f979..71f960d 100644 --- a/tests/structvar/test_structvar.py +++ b/tests/structvar/test_structvar.py @@ -34,6 +34,13 @@ def test_structvar_spans_roi(mongodb): assert result[0]['pos'] == 718601 +def test_structvar_removes_objectid(mongodb): + result = structvar.sv_region(mongodb.structvar, "1", roi_start=720000, roi_stop=820000) + sv = result[0] + print(sv.keys()) + assert '_id' not in sv.keys() + + def test_structvar_multiple_results(mocker, mongodb): result = structvar.sv_region(mongodb.structvar, "3", roi_start=950000, roi_stop=1400000) assert isinstance(result, list)