diff --git a/ndexstringloader/ndexloadstring.py b/ndexstringloader/ndexloadstring.py index ac2a8bf..97adb98 100644 --- a/ndexstringloader/ndexloadstring.py +++ b/ndexstringloader/ndexloadstring.py @@ -495,48 +495,75 @@ def run(self): return 0 - def _generate_CX_file(self, file_name, network_name): - new_cx_file = file_name + '.cx' - logger.debug('generating CX file for network {}...'.format(network_name)) + def _init_network_attributes(self): + net_attributes = {} - with open(file_name, 'r') as tsvfile: + net_attributes['name'] = 'STRING - Human Protein Links - High Confidence (Score >= ' \ + + str(self._cutoffscore) + ')' - with open(new_cx_file, "w") as out: - loader = StreamTSVLoader(self._load_plan, self._template) + net_attributes['description'] = '
This network contains high confidence (score >= ' \ + + str(self._cutoffscore) + ') human protein links with combined scores. ' \ + + 'Edge color was mapped to the combined score value using a gradient from light grey ' \ + + '(low Score) to black (high Score).' + + net_attributes['rights'] = 'Attribution 4.0 International (CC BY 4.0)' + + net_attributes['rightsHolder'] = 'STRING CONSORTIUM' + + net_attributes['version'] = self._string_version - description = '
This network contains high confidence (score >= ' \ - + str(self._cutoffscore) + ') human protein links with combined scores. Edge color was mapped to ' \ - + 'the combined score value using a gradient from light grey (low Score) to black (high Score).' + net_attributes['organism'] = 'Homo sapiens (human)' - reference = '

Szklarczyk D, Morris JH, Cook H, Kuhn M, Wyder S, Simonovic M, Santos A, ' \ - + 'Doncheva NT, Roth A, Bork P, Jensen LJ, von Mering C.
' \ + net_attributes['networkType'] = ['interactome', 'ppi'] + + net_attributes['reference'] = '

Szklarczyk D, Morris JH, Cook H, Kuhn M, Wyder S, ' \ + + 'Simonovic M, Santos A, Doncheva NT, Roth A, Bork P, Jensen LJ, von Mering C.
' \ + 'The STRING database in 2017: quality-controlled protein-protein association networks, ' \ + 'made broadly accessible.
Nucleic Acids Res. 2017 Jan; ' \ + '45:D362-68.
' \ + 'DOI:10.1093/nar/gkw937

' + net_attributes['prov:wasDerivedFrom'] = \ + 'https://stringdb-static.org/download/protein.links.full.v11.0/9606.protein.links.full.v11.0.txt.gz' + + net_attributes['prov:wasGeneratedBy'] = \ + 'ndexstringloader ' \ + + str(ndexstringloader.__version__) + '' + + net_attributes['__iconurl'] = self._iconurl + + return net_attributes + + + + def _generate_CX_file(self, network_attributes): + file_name = self._output_tsv_file_name + new_cx_file = file_name + '.cx' + + logger.debug('generating CX file for network {}...'.format(network_attributes['name'])) + + with open(file_name, 'r') as tsvfile: + + with open(new_cx_file, "w") as out: + loader = StreamTSVLoader(self._load_plan, self._template) + loader.write_cx_network(tsvfile, out, [ - {'n': 'name', 'v': network_name}, - {'n': 'description', 'v': description}, - {'n': 'rights', 'v': 'Attribution 4.0 International (CC BY 4.0)'}, - {'n': 'rightsHolder', 'v': 'STRING CONSORTIUM'}, - {'n': 'version', 'v': self._string_version}, - {'n': 'organism', 'v': 'Human, 9606, Homo sapiens '}, - {'n': 'networkType', 'v': ['interactome', 'ppi'], 'd': 'list_of_string'}, - {'n': 'reference', 'v': reference}, - {'n': 'prov:wasDerivedFrom', 'v': - 'https://stringdb-static.org/download/protein.links.full.v11.0/9606.protein.links.full.v11.0.txt.gz' - }, - {'n': 'prov:wasGeneratedBy', 'v': - 'ndexstringloader ' + str( - ndexstringloader.__version__) + ''}, - - {'n': '__iconurl', 'v': self._iconurl} + {'n': 'name', 'v': network_attributes['name']}, + {'n': 'description', 'v': network_attributes['description']}, + {'n': 'rights', 'v': network_attributes['rights']}, + {'n': 'rightsHolder', 'v': network_attributes['rightsHolder']}, + {'n': 'version', 'v': network_attributes['version']}, + {'n': 'organism', 'v': network_attributes['organism']}, + {'n': 'networkType', 'v': network_attributes['networkType'], 'd': 'list_of_string'}, + {'n': 'reference', 'v': network_attributes['reference']}, + {'n': 'prov:wasDerivedFrom', 'v': network_attributes['prov:wasDerivedFrom']}, + {'n': 'prov:wasGeneratedBy', 'v': network_attributes['prov:wasGeneratedBy']}, + {'n': '__iconurl', 'v': network_attributes['__iconurl']} ]) - logger.debug('CX file for network {} generated\n'.format(network_name)) + logger.debug('CX file for network {} generated\n'.format(network_attributes['name'])) return new_cx_file @@ -594,15 +621,15 @@ def get_network_uuid(self, network_name): def load_to_NDEx(self): - file_name = self._output_tsv_file_name - network_name = 'STRING - Human Protein Links - ' \ - 'High Confidence (Score > ' +\ - str(self._cutoffscore) + ')' - if self.create_ndex_connection() is None: return 2 - cx_file_name = self._generate_CX_file(file_name, network_name) + + network_attributes = self._init_network_attributes() + + cx_file_name = self._generate_CX_file(network_attributes) + + network_name = network_attributes['name'] network_id = self.get_network_uuid(network_name) diff --git a/tests/test_ndexloadstring.py b/tests/test_ndexloadstring.py index 9b7f430..c53e4b2 100644 --- a/tests/test_ndexloadstring.py +++ b/tests/test_ndexloadstring.py @@ -10,6 +10,7 @@ import unittest from ndexutil.config import NDExUtilConfig from ndexstringloader.ndexloadstring import NDExSTRINGLoader +import ndexstringloader class Param(object): @@ -36,11 +37,11 @@ def setUp(self): 'conf': None, 'profile': None, 'loadplan': None, - 'stringversion': None, + 'stringversion': '11.0', 'args': None, - 'datadir': None, + 'datadir': tempfile.mkdtemp(), 'cutoffscore': 0.7, - 'iconurl': None + 'iconurl': 'https://home.ndexbio.org/img/STRING-logo.png' } self._args = dotdict(self._args) @@ -51,9 +52,9 @@ def tearDown(self): """Tear down test fixtures, if any.""" @unittest.skip("skip it now - will add later") - def test_parse_config(self): + def test_0010_parse_config(self): - temp_dir = tempfile.mkdtemp() + temp_dir = self._args['datadir'] try: p = Param() p.profile = 'test_conf_section' @@ -76,7 +77,7 @@ def test_parse_config(self): shutil.rmtree(temp_dir) @unittest.skip("skip it now - uncomment later") - def test_remove_duplicate_edges(self): + def test_0020_remove_duplicate_edges(self): # some duplicate records in the same format as in STRING 9606.protein.links.full.v11.0.txt duplicate_records = [ @@ -120,14 +121,13 @@ def test_remove_duplicate_edges(self): } } - temp_dir = tempfile.mkdtemp() + temp_dir = self._args['datadir'] temp_file = 'tmp.txt' temp_file_1 = 'tmp1.txt' try: f = os.path.join(temp_dir, temp_file) - self._args.datadir = temp_dir self._full_name_file = f self._output_tsv_file_name = os.path.join(temp_dir, temp_file_1) @@ -178,7 +178,7 @@ def test_remove_duplicate_edges(self): shutil.rmtree(temp_dir) @unittest.skip("skip it now - uncomment later") - def test_exception_on_duplicate_edge_with_different_scores(self): + def test_0030_exception_on_duplicate_edge_with_different_scores(self): # some duplicate records in the same format as in STRING 9606.protein.links.full.v11.0.txt @@ -201,14 +201,13 @@ def test_exception_on_duplicate_edge_with_different_scores(self): for i in range(0, 2): - temp_dir = tempfile.mkdtemp() + temp_dir = self._args['datadir'] temp_file = 'tmp.txt' temp_file_1 = 'tmp1.txt' try: f = os.path.join(temp_dir, temp_file) - self._args.datadir = temp_dir self._full_name_file = f self._output_tsv_file_name = os.path.join(temp_dir, temp_file_1) @@ -242,13 +241,57 @@ def test_exception_on_duplicate_edge_with_different_scores(self): finally: shutil.rmtree(temp_dir) + self._args['datadir'] = tempfile.mkdtemp() - # re-init dudplicates and re-rerun the teast + # re-init duplicates and re-rerun the teast duplicate_records = [ '9606.ENSP00000238651 9606.ENSP00000364486 0 0 0 0 0 0 45 0 0 800 0 0 0 801', '9606.ENSP00000364486 9606.ENSP00000238651 0 0 0 0 0 0 45 0 0 800 0 0 0 800' ] - def test_get_network_uuid(self): - pass + def test_0040_init_network_atributes(self): + net_attributes = {} + + cutoffscore = str(self._args['cutoffscore']) + + net_attributes['name'] = 'STRING - Human Protein Links - High Confidence (Score >= ' + cutoffscore + ')' + + net_attributes['description'] = '
This network contains high confidence (score >= ' \ + + cutoffscore + ') human protein links with combined scores. ' \ + + 'Edge color was mapped to the combined score value using a gradient from light grey ' \ + + '(low Score) to black (high Score).' + + net_attributes['rights'] = 'Attribution 4.0 International (CC BY 4.0)' + + net_attributes['rightsHolder'] = 'STRING CONSORTIUM' + + net_attributes['version'] = self._args['stringversion'] + + net_attributes['organism'] = 'Homo sapiens (human)' + + net_attributes['networkType'] = ['interactome', 'ppi'] + + net_attributes['reference'] = '

Szklarczyk D, Morris JH, Cook H, Kuhn M, Wyder S, ' \ + + 'Simonovic M, Santos A, Doncheva NT, Roth A, Bork P, Jensen LJ, von Mering C.
' \ + + 'The STRING database in 2017: quality-controlled protein-protein association networks, ' \ + + 'made broadly accessible.
Nucleic Acids Res. 2017 Jan; ' \ + + '45:D362-68.
' \ + + 'DOI:10.1093/nar/gkw937

' + + net_attributes['prov:wasDerivedFrom'] = \ + 'https://stringdb-static.org/download/protein.links.full.v11.0/9606.protein.links.full.v11.0.txt.gz' + + net_attributes['prov:wasGeneratedBy'] = \ + 'ndexstringloader ' \ + + str(ndexstringloader.__version__) + '' + + net_attributes['__iconurl'] = self._args['iconurl'] + + + loader = NDExSTRINGLoader(self._args) + + # get network attributes from STRING loader object + network_attributes = loader._init_network_attributes() + + self.assertDictEqual(net_attributes, network_attributes, 'unexpected network properties')