From 33920bea636217f8bcf3f8c19601c6f40921504b Mon Sep 17 00:00:00 2001 From: DaasDaham Date: Sun, 1 Mar 2020 14:32:16 +0530 Subject: [PATCH 1/9] Solves Issue_26 using uri filenames. --- pysradb/basedb.py | 5 ++++- pysradb/sradb.py | 9 ++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pysradb/basedb.py b/pysradb/basedb.py index d84fe7e5..c60c555b 100644 --- a/pysradb/basedb.py +++ b/pysradb/basedb.py @@ -2,6 +2,7 @@ import sys import pandas as pd +import os from .utils import _extract_first_field @@ -24,7 +25,9 @@ def __init__(self, sqlite_file): def open(self): """Open sqlite connection.""" - self.db = sqlite3.connect(self.sqlite_file) + sqlite_file_path = os.getcwd() + check_file = "{}/{}".format(sqlite_file_path, self.sqlite_file) # str(sqlite_file_path)+"/"+str(self.sqlite_file) + self.db = sqlite3.connect('file:{}?mode=rw'.format(check_file), uri=True) self.db.text_factory = str def close(self): diff --git a/pysradb/sradb.py b/pysradb/sradb.py index 80afdf7b..2893be9d 100644 --- a/pysradb/sradb.py +++ b/pysradb/sradb.py @@ -177,7 +177,14 @@ def __init__(self, sqlite_file): """ - super(SRAdb, self).__init__(sqlite_file) + try: + super(SRAdb, self).__init__(sqlite_file) + except: + print( + "{} not a valid SRAmetadb.sqlite file.\n".format(sqlite_file) + + "Please download one using `pysradb metadb`." + ) + sys.exit(1) _verify_srametadb(sqlite_file) self._db_type = "SRA" self.valid_in_acc_type = [ From b7be4d42e518a156dd0320f940ae97bf93744060 Mon Sep 17 00:00:00 2001 From: DaasDaham Date: Tue, 3 Mar 2020 04:24:10 +0530 Subject: [PATCH 2/9] Issue #26 test_case added --- pysradb/basedb.py | 6 ++-- pysradb/cli.py | 2 ++ pysradb/sradb.py | 11 ++----- testingdb.py | 21 ++++++++++++ tests/_test_sradb.py | 77 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 105 insertions(+), 12 deletions(-) create mode 100644 testingdb.py diff --git a/pysradb/basedb.py b/pysradb/basedb.py index c60c555b..d0d2960d 100644 --- a/pysradb/basedb.py +++ b/pysradb/basedb.py @@ -25,9 +25,9 @@ def __init__(self, sqlite_file): def open(self): """Open sqlite connection.""" - sqlite_file_path = os.getcwd() - check_file = "{}/{}".format(sqlite_file_path, self.sqlite_file) # str(sqlite_file_path)+"/"+str(self.sqlite_file) - self.db = sqlite3.connect('file:{}?mode=rw'.format(check_file), uri=True) + # Originally sqlite3.connect(self.sqlite_file) + self.sqlite_file = self.sqlite_file.replace('?','') + self.db = sqlite3.connect('file:{}?mode=rw'.format(self.sqlite_file), uri=True) self.db.text_factory = str def close(self): diff --git a/pysradb/cli.py b/pysradb/cli.py index 90111f08..e3799762 100644 --- a/pysradb/cli.py +++ b/pysradb/cli.py @@ -43,6 +43,8 @@ def _print_save_df(df, saveto=None): to_print = [] for line in to_print_split: to_print.append(line.lstrip()) + print("sajgndsfsdfdsfsdfdfasfadsfasdfsdfsdfsdfsgsgdasljgnasflnsa") + print(sys.getsizeof(to_print_split)) print(("{}".format(os.linesep)).join(to_print)) diff --git a/pysradb/sradb.py b/pysradb/sradb.py index 2893be9d..fc57a964 100644 --- a/pysradb/sradb.py +++ b/pysradb/sradb.py @@ -151,7 +151,7 @@ def _verify_srametadb(filepath): db = BASEdb(filepath) except: print( - "{} not a valid SRAmetadb.sqlite file.\n".format(filepath) + "{} not a valid SRAmetadb.sqlite file or path.\n".format(filepath) + "Please download one using `pysradb metadb`." ) sys.exit(1) @@ -177,15 +177,8 @@ def __init__(self, sqlite_file): """ - try: - super(SRAdb, self).__init__(sqlite_file) - except: - print( - "{} not a valid SRAmetadb.sqlite file.\n".format(sqlite_file) - + "Please download one using `pysradb metadb`." - ) - sys.exit(1) _verify_srametadb(sqlite_file) + super(SRAdb, self).__init__(sqlite_file) self._db_type = "SRA" self.valid_in_acc_type = [ "SRA", diff --git a/testingdb.py b/testingdb.py new file mode 100644 index 00000000..5c56e3a9 --- /dev/null +++ b/testingdb.py @@ -0,0 +1,21 @@ +from pysradb import SRAdb +import os +#db = SRAdb('SRAmetadb.sqlite') + +def test_list_tables(sradb_connection): + fields = sradb_connection.list_fields("sqlite_sequence") + print(fields) + +def test_changed_paths2(): + wrong_filename = "SRAme'tadb2.sql.ite" + path = os.path.join(os.getcwd(), "data", "{}".format(wrong_filename)) + try: + db = SRAdb(path) + except: + pass + assert os.path.isfile(path) == False + #assert os.path.isfile(path) == False + + + +test_changed_paths2() \ No newline at end of file diff --git a/tests/_test_sradb.py b/tests/_test_sradb.py index 78bddf3e..c463d7c7 100644 --- a/tests/_test_sradb.py +++ b/tests/_test_sradb.py @@ -76,6 +76,9 @@ def test_desc_table(sradb_connection): def test_all_row_counts(sradb_connection): assert sradb_connection.all_row_counts().loc["metaInfo", "count"] == 2 +def test_all_row_counts2(sradb_connection): + assert len(sradb_connection.all_row_counts()) == 13 + def test_sra_metadata(sradb_connection): df = sradb_connection.sra_metadata("SRP017942") @@ -148,3 +151,77 @@ def test_strain_type(sradb_connection): "s288c", "s288c", ] + +def test_srp_to_srx(sradb_connection): + assert len(sradb_connection.srp_to_srx("SRP082570")) == 14 + + +def test_srp_to_srr(sradb_connection): + df = sradb_connection.srp_to_srr("SRP091987") + assert sorted(list(df["run_accession"])[:3]) == [ + 'SRR4447104', + 'SRR4447105', + 'SRR4447106' + ] + + +def test_srp_to_gse(sradb_connection): + gse_id = sradb_connection.srp_to_gse("SRP050443").iloc[0,1] + df = sradb_connection.gse_to_gsm(gse_id) + assert("GSM1557451" in df["experiment_alias"].to_list()) + + +def test_gsm_to_gse(sradb_connection): + df = sradb_connection.gsm_to_gse(["GSM1020651","GSM1020664","GSM1020771"]) + assert set(list(df["study_alias"])) == {"GSE41637"} + + +@pytest.mark.xfail(raises=ValueError) +def test_wrong_input_metadata(sradb_connection): + df = sradb_connection.sra_metadata("should_throw_error") + + +def test_search_by_expt_id(sradb_connection): + srx_id = "SRX116363" + df_expt = sradb_connection.search_by_expt_id(srx_id) + sra_id = df_expt["submission_accession"].loc[0] + df = sradb_connection.sra_metadata(sra_id) + connected_srp = sradb_connection.srx_to_srp("SRX116363").iloc[0,1] + assert(srx_id in df["experiment_accession"].to_list()) and (connected_srp == "SRP010374") + + +def test_changed_paths(): + wrong_filename = "SRAmet?adb.?sql?ite" + path = os.path.join(os.getcwd(), "data", "{}".format(wrong_filename)) + wrong_path = os.path.join(os.getcwd(), "data", "SRAmet") + try: + db = SRAdb(path) + except: + pass + assert os.path.isfile(wrong_path) == False + + +def test_changed_paths2(): + wrong_filename = "SRAme'tadb2.sql.ite" + path = os.path.join(os.getcwd(), "data", "{}".format(wrong_filename)) + try: + db = SRAdb(path) + except: + pass + assert os.path.isfile(path) == False + + +def test_wrong_path_exists(): + wrong_filename = "wrongdb.sqlite" + path = os.path.join(os.getcwd(), "data") + wrongfile_path = os.path.join(path, wrong_filename) + with open(wrongfile_path, 'w') as f: + pass + try: + db = SRAdb(wrongfile_path) + assert False + except Exception as e: + assert True + finally: + os.remove(wrongfile_path) + From a21b62ddad246320380ead4c86e50fb4ad0f8dae Mon Sep 17 00:00:00 2001 From: DaasDaham Date: Tue, 3 Mar 2020 04:28:52 +0530 Subject: [PATCH 3/9] Issue #26 test_case added updated --- pysradb/cli.py | 2 -- testingdb.py | 21 --------------------- 2 files changed, 23 deletions(-) delete mode 100644 testingdb.py diff --git a/pysradb/cli.py b/pysradb/cli.py index e3799762..90111f08 100644 --- a/pysradb/cli.py +++ b/pysradb/cli.py @@ -43,8 +43,6 @@ def _print_save_df(df, saveto=None): to_print = [] for line in to_print_split: to_print.append(line.lstrip()) - print("sajgndsfsdfdsfsdfdfasfadsfasdfsdfsdfsdfsgsgdasljgnasflnsa") - print(sys.getsizeof(to_print_split)) print(("{}".format(os.linesep)).join(to_print)) diff --git a/testingdb.py b/testingdb.py deleted file mode 100644 index 5c56e3a9..00000000 --- a/testingdb.py +++ /dev/null @@ -1,21 +0,0 @@ -from pysradb import SRAdb -import os -#db = SRAdb('SRAmetadb.sqlite') - -def test_list_tables(sradb_connection): - fields = sradb_connection.list_fields("sqlite_sequence") - print(fields) - -def test_changed_paths2(): - wrong_filename = "SRAme'tadb2.sql.ite" - path = os.path.join(os.getcwd(), "data", "{}".format(wrong_filename)) - try: - db = SRAdb(path) - except: - pass - assert os.path.isfile(path) == False - #assert os.path.isfile(path) == False - - - -test_changed_paths2() \ No newline at end of file From 7d9db56858ed547117a06fab512ed02800a88651 Mon Sep 17 00:00:00 2001 From: DaasDaham Date: Wed, 4 Mar 2020 23:19:01 +0530 Subject: [PATCH 4/9] test cases updated --- pysradb/basedb.py | 3 +- pysradb/sradb.py | 3 ++ pysradb/sraweb.py | 4 +- tests/{_test_sradb.py => test_sradb.py} | 61 +++++++++---------------- 4 files changed, 29 insertions(+), 42 deletions(-) rename tests/{_test_sradb.py => test_sradb.py} (85%) diff --git a/pysradb/basedb.py b/pysradb/basedb.py index d0d2960d..d98446ae 100644 --- a/pysradb/basedb.py +++ b/pysradb/basedb.py @@ -26,8 +26,7 @@ def __init__(self, sqlite_file): def open(self): """Open sqlite connection.""" # Originally sqlite3.connect(self.sqlite_file) - self.sqlite_file = self.sqlite_file.replace('?','') - self.db = sqlite3.connect('file:{}?mode=rw'.format(self.sqlite_file), uri=True) + self.db = sqlite3.connect('file:{}?mode=ro'.format(self.sqlite_file), uri=True) self.db.text_factory = str def close(self): diff --git a/pysradb/sradb.py b/pysradb/sradb.py index fc57a964..4930e0f0 100644 --- a/pysradb/sradb.py +++ b/pysradb/sradb.py @@ -290,7 +290,10 @@ def sra_metadata( output_columns += ["sample_attribute"] output_columns = [x for x in output_columns if x != in_type] output_columns = unique(output_columns) + print(in_type) + print(output_columns) select_type = [in_type + "_accession"] + output_columns + print(select_type) select_type_sql = (",").join(select_type) sql = ( "SELECT DISTINCT " diff --git a/pysradb/sraweb.py b/pysradb/sraweb.py index 3f942089..738a96c8 100644 --- a/pysradb/sraweb.py +++ b/pysradb/sraweb.py @@ -183,8 +183,10 @@ def get_esummary_response(self, db, term, usehistory="y"): if isinstance(term, list): term = " OR ".join(term) payload += [("term", term)] - + print("gsjkgsdjgbsdkjgskjgdskjgbdskjgsdgjbdskjgsdlgnsdljgndslgndslgndjndslgvdsjglsngsa") + print(OrderedDict(payload)) request = requests.get(self.base_url["esearch"], params=OrderedDict(payload)) + print(request.url) esearch_response = request.json() if "esummaryresult" in esearch_response: print("No result found") diff --git a/tests/_test_sradb.py b/tests/test_sradb.py similarity index 85% rename from tests/_test_sradb.py rename to tests/test_sradb.py index c463d7c7..f1d4d2cb 100644 --- a/tests/_test_sradb.py +++ b/tests/test_sradb.py @@ -5,6 +5,7 @@ import pytest from pysradb import SRAdb from pysradb.filter_attrs import guess_cell_type, guess_tissue_type, guess_strain_type +from sqlite3 import OperationalError @pytest.fixture(scope="module") @@ -96,11 +97,22 @@ def test_search(sradb_connection): df = sradb_connection.search_sra(search_str="breast cancer") assert len(df.index) +def test_search2(sradb_connection): + df = sradb_connection.search_sra('"salivary microbiome" AND "diabetes mellitus"', detailed=True) + assert "SRP241848" in df["study_accession"].to_list() + def test_search_by_expt_id(sradb_connection): df = sradb_connection.search_by_expt_id("SRX1254413") assert df.study_name.tolist()[0] == "GSE73136" +def test_search_by_expt_id2(sradb_connection): + srx_id = "SRX116363" + df_expt = sradb_connection.search_by_expt_id(srx_id) + sra_id = df_expt["submission_accession"].loc[0] + df = sradb_connection.sra_metadata(sra_id) + connected_srp = sradb_connection.srx_to_srp("SRX116363").iloc[0,1] + assert(srx_id in df["experiment_accession"].to_list()) and (connected_srp == "SRP010374") # def test_download_fasp(sradb_connection): # df = sradb_connection.sra_metadata("SRP098789") @@ -176,52 +188,23 @@ def test_gsm_to_gse(sradb_connection): assert set(list(df["study_alias"])) == {"GSE41637"} +def test_srs_to_gsm(sradb_connection): + df = sradb_connection.srs_to_gsm("SRS1757470") + assert "GSM2358940" == df.iloc[0,1] + + @pytest.mark.xfail(raises=ValueError) def test_wrong_input_metadata(sradb_connection): df = sradb_connection.sra_metadata("should_throw_error") -def test_search_by_expt_id(sradb_connection): - srx_id = "SRX116363" - df_expt = sradb_connection.search_by_expt_id(srx_id) - sra_id = df_expt["submission_accession"].loc[0] - df = sradb_connection.sra_metadata(sra_id) - connected_srp = sradb_connection.srx_to_srp("SRX116363").iloc[0,1] - assert(srx_id in df["experiment_accession"].to_list()) and (connected_srp == "SRP010374") - - -def test_changed_paths(): - wrong_filename = "SRAmet?adb.?sql?ite" - path = os.path.join(os.getcwd(), "data", "{}".format(wrong_filename)) - wrong_path = os.path.join(os.getcwd(), "data", "SRAmet") +def test_file_creation(): + """Test to check creation of file if it isn't available""" + path = 'SRAmetadb.sqlite' try: db = SRAdb(path) - except: + except OperationalError: pass - assert os.path.isfile(wrong_path) == False - + assert os.path.isfile(path) == False -def test_changed_paths2(): - wrong_filename = "SRAme'tadb2.sql.ite" - path = os.path.join(os.getcwd(), "data", "{}".format(wrong_filename)) - try: - db = SRAdb(path) - except: - pass - assert os.path.isfile(path) == False - - -def test_wrong_path_exists(): - wrong_filename = "wrongdb.sqlite" - path = os.path.join(os.getcwd(), "data") - wrongfile_path = os.path.join(path, wrong_filename) - with open(wrongfile_path, 'w') as f: - pass - try: - db = SRAdb(wrongfile_path) - assert False - except Exception as e: - assert True - finally: - os.remove(wrongfile_path) From f781f1d44c34fe6242784280f9bd3097e1e44215 Mon Sep 17 00:00:00 2001 From: DaasDaham Date: Wed, 4 Mar 2020 23:22:31 +0530 Subject: [PATCH 5/9] test cases updated --- pysradb/sradb.py | 3 --- pysradb/sraweb.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/pysradb/sradb.py b/pysradb/sradb.py index 4930e0f0..fc57a964 100644 --- a/pysradb/sradb.py +++ b/pysradb/sradb.py @@ -290,10 +290,7 @@ def sra_metadata( output_columns += ["sample_attribute"] output_columns = [x for x in output_columns if x != in_type] output_columns = unique(output_columns) - print(in_type) - print(output_columns) select_type = [in_type + "_accession"] + output_columns - print(select_type) select_type_sql = (",").join(select_type) sql = ( "SELECT DISTINCT " diff --git a/pysradb/sraweb.py b/pysradb/sraweb.py index 738a96c8..d0b2e75b 100644 --- a/pysradb/sraweb.py +++ b/pysradb/sraweb.py @@ -183,10 +183,7 @@ def get_esummary_response(self, db, term, usehistory="y"): if isinstance(term, list): term = " OR ".join(term) payload += [("term", term)] - print("gsjkgsdjgbsdkjgskjgdskjgbdskjgsdgjbdskjgsdlgnsdljgndslgndslgndjndslgvdsjglsngsa") - print(OrderedDict(payload)) request = requests.get(self.base_url["esearch"], params=OrderedDict(payload)) - print(request.url) esearch_response = request.json() if "esummaryresult" in esearch_response: print("No result found") From 952a5c5ab065e01e42c1eea6426da4b4bfc56128 Mon Sep 17 00:00:00 2001 From: DaasDaham Date: Fri, 6 Mar 2020 02:10:06 +0530 Subject: [PATCH 6/9] test_sradb.py changes --- tests/_test_sradb.py | 198 +++++++++++++++++++++++++++++++++++++++++ tests/test_sradb.py | 204 ++----------------------------------------- 2 files changed, 206 insertions(+), 196 deletions(-) create mode 100644 tests/_test_sradb.py diff --git a/tests/_test_sradb.py b/tests/_test_sradb.py new file mode 100644 index 00000000..3da8c062 --- /dev/null +++ b/tests/_test_sradb.py @@ -0,0 +1,198 @@ +"""Tests for sradb.py +""" + +import os +import pytest +from pysradb import SRAdb +from pysradb.filter_attrs import guess_cell_type, guess_tissue_type, guess_strain_type +from sqlite3 import OperationalError + + +@pytest.fixture(scope="module") +def sradb_connection(conf_download_sradb_file): + db_file = conf_download_sradb_file + db = SRAdb(db_file) + return db + + +def test_list_tables(sradb_connection): + sra_tables = sradb_connection.list_tables() + assert sra_tables == [ + "metaInfo", + "submission", + "study", + "sample", + "experiment", + "run", + "sra", + "sra_ft", + "sra_ft_content", + "sra_ft_segments", + "sra_ft_segdir", + "col_desc", + "fastq", + ] + + +def test_list_fields(sradb_connection): + fields = sradb_connection.list_fields("study") + assert fields == [ + "study_ID", + "study_alias", + "study_accession", + "study_title", + "study_type", + "study_abstract", + "broker_name", + "center_name", + "center_project_name", + "study_description", + "related_studies", + "primary_study", + "sra_link", + "study_url_link", + "xref_link", + "study_entrez_link", + "ddbj_link", + "ena_link", + "study_attribute", + "submission_accession", + "sradb_updated", + ] + + +def test_desc_table(sradb_connection): + names = sorted(sradb_connection.desc_table("sra_ft").name.tolist()) + assert names[:7] == [ + "SRR_bamFile", + "SRX_bamFile", + "SRX_fastqFTP", + "adapter_spec", + "anonymized_name", + "base_caller", + "bases", + ] + + +def test_all_row_counts(sradb_connection): + assert sradb_connection.all_row_counts().loc["metaInfo", "count"] == 2 + +def test_all_row_counts2(sradb_connection): + assert len(sradb_connection.all_row_counts()) == 13 + + +def test_sra_metadata(sradb_connection): + df = sradb_connection.sra_metadata("SRP017942") + assert df["experiment_accession"][0] == "SRX217027" + + +def test_sra_metadata2(sradb_connection): + df = sradb_connection.sra_metadata( + "SRP017942", detailed=True, expand_sample_attributes=True + ) + assert "3xflag-gfp" in df["transfected_with"].tolist() + + +def test_search(sradb_connection): + df = sradb_connection.search_sra(search_str="breast cancer") + assert len(df.index) + +def test_search2(sradb_connection): + df = sradb_connection.search_sra('"salivary microbiome" AND "diabetes mellitus"', detailed=True) + assert "SRP241848" in df["study_accession"].to_list() + + +def test_search_by_expt_id(sradb_connection): + df = sradb_connection.search_by_expt_id("SRX1254413") + assert df.study_name.tolist()[0] == "GSE73136" + +def test_search_by_expt_id2(sradb_connection): + srx_id = "SRX116363" + df_expt = sradb_connection.search_by_expt_id(srx_id) + sra_id = df_expt["submission_accession"].loc[0] + df = sradb_connection.sra_metadata(sra_id) + connected_srp = sradb_connection.srx_to_srp("SRX116363").iloc[0,1] + assert(srx_id in df["experiment_accession"].to_list()) and (connected_srp == "SRP010374") + +# def test_download_fasp(sradb_connection): +# df = sradb_connection.sra_metadata("SRP098789") +# df = df[df.experiment_accession == "SRX2536403"] +# sradb_connection.download(df=df, out_dir="data/", skip_confirmation=True) +# assert os.path.isfile("data/SRP098789/SRX2536403/SRR5227288.sra") +# assert os.path.getsize("data/SRP098789/SRX2536403/SRR5227288.sra") +# os.remove("data/SRP098789/SRX2536403/SRR5227288.sra") + + +@pytest.mark.xfail +def test_download_ftp(sradb_connection): + # This happens to fail because of ftp problems + df = sradb_connection.sra_metadata("SRP098789") + df = df[df.experiment_accession == "SRX2536404"] + sradb_connection.download( + df=df, protocol="ftp", out_dir="data/", skip_confirmation=True + ) + assert os.path.isfile("data/SRP098789/SRX2536404/SRR5227289.sra") + assert os.path.getsize("data/SRP098789/SRX2536404/SRR5227289.sra") + os.remove("data/SRP098789/SRX2536404/SRR5227289.sra") + + +def test_tissue_type(sradb_connection): + df = sradb_connection.sra_metadata("SRP016501", detailed=True) + df = df[df.experiment_accession == "SRX196389"] + cell_type = df["sample_attribute"].apply(lambda x: guess_cell_type(x)) + tissue_type = df["sample_attribute"].apply(lambda x: guess_tissue_type(x)) + assert cell_type.tolist() == ["chicken_brain"] + assert tissue_type.tolist() == ["brain"] + + +def test_strain_type(sradb_connection): + df = sradb_connection.sra_metadata("SRP043036", detailed=True) + df = df.sort_values(by="experiment_accession") + strains = df["sample_attribute"].apply(lambda x: guess_strain_type(x)).tolist() + assert strains == [ + "by4741", + "by4741", + "by4741", + "by4741", + "by4741", + "by4741", + "by4741", + "by4741", + "s288c", + "s288c", + "s288c", + "s288c", + ] + +def test_srp_to_srx(sradb_connection): + assert len(sradb_connection.srp_to_srx("SRP082570")) == 14 + + +def test_srp_to_srr(sradb_connection): + df = sradb_connection.srp_to_srr("SRP091987") + assert sorted(list(df["run_accession"])[:3]) == [ + 'SRR4447104', + 'SRR4447105', + 'SRR4447106' + ] + + +def test_srp_to_gse(sradb_connection): + gse_id = sradb_connection.srp_to_gse("SRP050443").iloc[0,1] + df = sradb_connection.gse_to_gsm(gse_id) + assert("GSM1557451" in df["experiment_alias"].to_list()) + + +def test_gsm_to_gse(sradb_connection): + df = sradb_connection.gsm_to_gse(["GSM1020651","GSM1020664","GSM1020771"]) + assert set(list(df["study_alias"])) == {"GSE41637"} + + +def test_srs_to_gsm(sradb_connection): + df = sradb_connection.srs_to_gsm("SRS1757470") + assert "GSM2358940" == df.iloc[0,1] + + +@pytest.mark.xfail(raises=ValueError) +def test_wrong_input_metadata(sradb_connection): + df = sradb_connection.sra_metadata("should_throw_error") \ No newline at end of file diff --git a/tests/test_sradb.py b/tests/test_sradb.py index f1d4d2cb..c27b732e 100644 --- a/tests/test_sradb.py +++ b/tests/test_sradb.py @@ -7,204 +7,16 @@ from pysradb.filter_attrs import guess_cell_type, guess_tissue_type, guess_strain_type from sqlite3 import OperationalError - -@pytest.fixture(scope="module") -def sradb_connection(conf_download_sradb_file): - db_file = conf_download_sradb_file - db = SRAdb(db_file) - return db - - -def test_list_tables(sradb_connection): - sra_tables = sradb_connection.list_tables() - assert sra_tables == [ - "metaInfo", - "submission", - "study", - "sample", - "experiment", - "run", - "sra", - "sra_ft", - "sra_ft_content", - "sra_ft_segments", - "sra_ft_segdir", - "col_desc", - "fastq", - ] - - -def test_list_fields(sradb_connection): - fields = sradb_connection.list_fields("study") - assert fields == [ - "study_ID", - "study_alias", - "study_accession", - "study_title", - "study_type", - "study_abstract", - "broker_name", - "center_name", - "center_project_name", - "study_description", - "related_studies", - "primary_study", - "sra_link", - "study_url_link", - "xref_link", - "study_entrez_link", - "ddbj_link", - "ena_link", - "study_attribute", - "submission_accession", - "sradb_updated", - ] - - -def test_desc_table(sradb_connection): - names = sorted(sradb_connection.desc_table("sra_ft").name.tolist()) - assert names[:7] == [ - "SRR_bamFile", - "SRX_bamFile", - "SRX_fastqFTP", - "adapter_spec", - "anonymized_name", - "base_caller", - "bases", - ] - - -def test_all_row_counts(sradb_connection): - assert sradb_connection.all_row_counts().loc["metaInfo", "count"] == 2 - -def test_all_row_counts2(sradb_connection): - assert len(sradb_connection.all_row_counts()) == 13 - - -def test_sra_metadata(sradb_connection): - df = sradb_connection.sra_metadata("SRP017942") - assert df["experiment_accession"][0] == "SRX217027" - - -def test_sra_metadata2(sradb_connection): - df = sradb_connection.sra_metadata( - "SRP017942", detailed=True, expand_sample_attributes=True - ) - assert "3xflag-gfp" in df["transfected_with"].tolist() - - -def test_search(sradb_connection): - df = sradb_connection.search_sra(search_str="breast cancer") - assert len(df.index) - -def test_search2(sradb_connection): - df = sradb_connection.search_sra('"salivary microbiome" AND "diabetes mellitus"', detailed=True) - assert "SRP241848" in df["study_accession"].to_list() - - -def test_search_by_expt_id(sradb_connection): - df = sradb_connection.search_by_expt_id("SRX1254413") - assert df.study_name.tolist()[0] == "GSE73136" - -def test_search_by_expt_id2(sradb_connection): - srx_id = "SRX116363" - df_expt = sradb_connection.search_by_expt_id(srx_id) - sra_id = df_expt["submission_accession"].loc[0] - df = sradb_connection.sra_metadata(sra_id) - connected_srp = sradb_connection.srx_to_srp("SRX116363").iloc[0,1] - assert(srx_id in df["experiment_accession"].to_list()) and (connected_srp == "SRP010374") - -# def test_download_fasp(sradb_connection): -# df = sradb_connection.sra_metadata("SRP098789") -# df = df[df.experiment_accession == "SRX2536403"] -# sradb_connection.download(df=df, out_dir="data/", skip_confirmation=True) -# assert os.path.isfile("data/SRP098789/SRX2536403/SRR5227288.sra") -# assert os.path.getsize("data/SRP098789/SRX2536403/SRR5227288.sra") -# os.remove("data/SRP098789/SRX2536403/SRR5227288.sra") - - -@pytest.mark.xfail -def test_download_ftp(sradb_connection): - # This happens to fail because of ftp problems - df = sradb_connection.sra_metadata("SRP098789") - df = df[df.experiment_accession == "SRX2536404"] - sradb_connection.download( - df=df, protocol="ftp", out_dir="data/", skip_confirmation=True - ) - assert os.path.isfile("data/SRP098789/SRX2536404/SRR5227289.sra") - assert os.path.getsize("data/SRP098789/SRX2536404/SRR5227289.sra") - os.remove("data/SRP098789/SRX2536404/SRR5227289.sra") - - -def test_tissue_type(sradb_connection): - df = sradb_connection.sra_metadata("SRP016501", detailed=True) - df = df[df.experiment_accession == "SRX196389"] - cell_type = df["sample_attribute"].apply(lambda x: guess_cell_type(x)) - tissue_type = df["sample_attribute"].apply(lambda x: guess_tissue_type(x)) - assert cell_type.tolist() == ["chicken_brain"] - assert tissue_type.tolist() == ["brain"] - - -def test_strain_type(sradb_connection): - df = sradb_connection.sra_metadata("SRP043036", detailed=True) - df = df.sort_values(by="experiment_accession") - strains = df["sample_attribute"].apply(lambda x: guess_strain_type(x)).tolist() - assert strains == [ - "by4741", - "by4741", - "by4741", - "by4741", - "by4741", - "by4741", - "by4741", - "by4741", - "s288c", - "s288c", - "s288c", - "s288c", - ] - -def test_srp_to_srx(sradb_connection): - assert len(sradb_connection.srp_to_srx("SRP082570")) == 14 - - -def test_srp_to_srr(sradb_connection): - df = sradb_connection.srp_to_srr("SRP091987") - assert sorted(list(df["run_accession"])[:3]) == [ - 'SRR4447104', - 'SRR4447105', - 'SRR4447106' - ] - - -def test_srp_to_gse(sradb_connection): - gse_id = sradb_connection.srp_to_gse("SRP050443").iloc[0,1] - df = sradb_connection.gse_to_gsm(gse_id) - assert("GSM1557451" in df["experiment_alias"].to_list()) - - -def test_gsm_to_gse(sradb_connection): - df = sradb_connection.gsm_to_gse(["GSM1020651","GSM1020664","GSM1020771"]) - assert set(list(df["study_alias"])) == {"GSE41637"} - - -def test_srs_to_gsm(sradb_connection): - df = sradb_connection.srs_to_gsm("SRS1757470") - assert "GSM2358940" == df.iloc[0,1] - - -@pytest.mark.xfail(raises=ValueError) -def test_wrong_input_metadata(sradb_connection): - df = sradb_connection.sra_metadata("should_throw_error") - - -def test_file_creation(): - """Test to check creation of file if it isn't available""" - path = 'SRAmetadb.sqlite' +def test_not_valid_file(): + """Test to check for error if file is either not + present or not a valid sqlite file""" + path = 'SRAmetadb.sqlite' try: db = SRAdb(path) + assert False + except SystemExit: + assert os.path.isfile(path) == False except OperationalError: - pass - assert os.path.isfile(path) == False + assert True From a26a89a1af82f5b36115016f8eeaff693c9a1e32 Mon Sep 17 00:00:00 2001 From: Saad Ahmad <45657541+DaasDaham@users.noreply.github.com> Date: Tue, 10 Mar 2020 10:06:58 +0530 Subject: [PATCH 7/9] updated test_not_valid_file() in test_sradb.py removed assert False statement --- tests/test_sradb.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_sradb.py b/tests/test_sradb.py index c27b732e..86fcac4a 100644 --- a/tests/test_sradb.py +++ b/tests/test_sradb.py @@ -13,7 +13,6 @@ def test_not_valid_file(): path = 'SRAmetadb.sqlite' try: db = SRAdb(path) - assert False except SystemExit: assert os.path.isfile(path) == False except OperationalError: From ec87aaacae0ebcbb55940426184d01cf7ee6e942 Mon Sep 17 00:00:00 2001 From: DaasDaham Date: Tue, 10 Mar 2020 12:16:45 +0530 Subject: [PATCH 8/9] black --- pysradb/cli.py | 3 ++- tests/test_sradb.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pysradb/cli.py b/pysradb/cli.py index 90111f08..7f290864 100644 --- a/pysradb/cli.py +++ b/pysradb/cli.py @@ -43,7 +43,8 @@ def _print_save_df(df, saveto=None): to_print = [] for line in to_print_split: to_print.append(line.lstrip()) - print(("{}".format(os.linesep)).join(to_print)) + sys.stdout.write(("{}".format(os.linesep)).join(to_print)) + #print(("{}".format(os.linesep)).join(to_print)) def _check_sradb_file(db): diff --git a/tests/test_sradb.py b/tests/test_sradb.py index c27b732e..deb883cf 100644 --- a/tests/test_sradb.py +++ b/tests/test_sradb.py @@ -13,10 +13,10 @@ def test_not_valid_file(): path = 'SRAmetadb.sqlite' try: db = SRAdb(path) - assert False - except SystemExit: - assert os.path.isfile(path) == False except OperationalError: assert True + except SystemExit: + assert os.path.isfile(path) == False + From 783e017df0bb68d37ae049c32a05f5e61e3fe697 Mon Sep 17 00:00:00 2001 From: DaasDaham Date: Tue, 10 Mar 2020 12:20:26 +0530 Subject: [PATCH 9/9] reformatted with black --- pysradb/basedb.py | 2 +- pysradb/cli.py | 2 +- tests/_test_sradb.py | 31 ++++++++++++++++++++----------- tests/test_sradb.py | 6 ++---- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/pysradb/basedb.py b/pysradb/basedb.py index d98446ae..a1e76514 100644 --- a/pysradb/basedb.py +++ b/pysradb/basedb.py @@ -26,7 +26,7 @@ def __init__(self, sqlite_file): def open(self): """Open sqlite connection.""" # Originally sqlite3.connect(self.sqlite_file) - self.db = sqlite3.connect('file:{}?mode=ro'.format(self.sqlite_file), uri=True) + self.db = sqlite3.connect("file:{}?mode=ro".format(self.sqlite_file), uri=True) self.db.text_factory = str def close(self): diff --git a/pysradb/cli.py b/pysradb/cli.py index 7f290864..2891122d 100644 --- a/pysradb/cli.py +++ b/pysradb/cli.py @@ -44,7 +44,7 @@ def _print_save_df(df, saveto=None): for line in to_print_split: to_print.append(line.lstrip()) sys.stdout.write(("{}".format(os.linesep)).join(to_print)) - #print(("{}".format(os.linesep)).join(to_print)) + # print(("{}".format(os.linesep)).join(to_print)) def _check_sradb_file(db): diff --git a/tests/_test_sradb.py b/tests/_test_sradb.py index 3da8c062..621607a9 100644 --- a/tests/_test_sradb.py +++ b/tests/_test_sradb.py @@ -77,6 +77,7 @@ def test_desc_table(sradb_connection): def test_all_row_counts(sradb_connection): assert sradb_connection.all_row_counts().loc["metaInfo", "count"] == 2 + def test_all_row_counts2(sradb_connection): assert len(sradb_connection.all_row_counts()) == 13 @@ -97,8 +98,11 @@ def test_search(sradb_connection): df = sradb_connection.search_sra(search_str="breast cancer") assert len(df.index) + def test_search2(sradb_connection): - df = sradb_connection.search_sra('"salivary microbiome" AND "diabetes mellitus"', detailed=True) + df = sradb_connection.search_sra( + '"salivary microbiome" AND "diabetes mellitus"', detailed=True + ) assert "SRP241848" in df["study_accession"].to_list() @@ -106,13 +110,17 @@ def test_search_by_expt_id(sradb_connection): df = sradb_connection.search_by_expt_id("SRX1254413") assert df.study_name.tolist()[0] == "GSE73136" + def test_search_by_expt_id2(sradb_connection): srx_id = "SRX116363" df_expt = sradb_connection.search_by_expt_id(srx_id) sra_id = df_expt["submission_accession"].loc[0] df = sradb_connection.sra_metadata(sra_id) - connected_srp = sradb_connection.srx_to_srp("SRX116363").iloc[0,1] - assert(srx_id in df["experiment_accession"].to_list()) and (connected_srp == "SRP010374") + connected_srp = sradb_connection.srx_to_srp("SRX116363").iloc[0, 1] + assert (srx_id in df["experiment_accession"].to_list()) and ( + connected_srp == "SRP010374" + ) + # def test_download_fasp(sradb_connection): # df = sradb_connection.sra_metadata("SRP098789") @@ -164,6 +172,7 @@ def test_strain_type(sradb_connection): "s288c", ] + def test_srp_to_srx(sradb_connection): assert len(sradb_connection.srp_to_srx("SRP082570")) == 14 @@ -171,28 +180,28 @@ def test_srp_to_srx(sradb_connection): def test_srp_to_srr(sradb_connection): df = sradb_connection.srp_to_srr("SRP091987") assert sorted(list(df["run_accession"])[:3]) == [ - 'SRR4447104', - 'SRR4447105', - 'SRR4447106' + "SRR4447104", + "SRR4447105", + "SRR4447106", ] def test_srp_to_gse(sradb_connection): - gse_id = sradb_connection.srp_to_gse("SRP050443").iloc[0,1] + gse_id = sradb_connection.srp_to_gse("SRP050443").iloc[0, 1] df = sradb_connection.gse_to_gsm(gse_id) - assert("GSM1557451" in df["experiment_alias"].to_list()) + assert "GSM1557451" in df["experiment_alias"].to_list() def test_gsm_to_gse(sradb_connection): - df = sradb_connection.gsm_to_gse(["GSM1020651","GSM1020664","GSM1020771"]) + df = sradb_connection.gsm_to_gse(["GSM1020651", "GSM1020664", "GSM1020771"]) assert set(list(df["study_alias"])) == {"GSE41637"} def test_srs_to_gsm(sradb_connection): df = sradb_connection.srs_to_gsm("SRS1757470") - assert "GSM2358940" == df.iloc[0,1] + assert "GSM2358940" == df.iloc[0, 1] @pytest.mark.xfail(raises=ValueError) def test_wrong_input_metadata(sradb_connection): - df = sradb_connection.sra_metadata("should_throw_error") \ No newline at end of file + df = sradb_connection.sra_metadata("should_throw_error") diff --git a/tests/test_sradb.py b/tests/test_sradb.py index c29bbf66..e04e8bf0 100644 --- a/tests/test_sradb.py +++ b/tests/test_sradb.py @@ -7,16 +7,14 @@ from pysradb.filter_attrs import guess_cell_type, guess_tissue_type, guess_strain_type from sqlite3 import OperationalError + def test_not_valid_file(): """Test to check for error if file is either not present or not a valid sqlite file""" - path = 'SRAmetadb.sqlite' + path = "SRAmetadb.sqlite" try: db = SRAdb(path) except SystemExit: assert os.path.isfile(path) == False except OperationalError: assert True - - -