diff --git a/ndexstringloader/ndexloadstring.py b/ndexstringloader/ndexloadstring.py
index ac2a8bf..97adb98 100644
--- a/ndexstringloader/ndexloadstring.py
+++ b/ndexstringloader/ndexloadstring.py
@@ -495,48 +495,75 @@ def run(self):
return 0
- def _generate_CX_file(self, file_name, network_name):
- new_cx_file = file_name + '.cx'
- logger.debug('generating CX file for network {}...'.format(network_name))
+ def _init_network_attributes(self):
+ net_attributes = {}
- with open(file_name, 'r') as tsvfile:
+ net_attributes['name'] = 'STRING - Human Protein Links - High Confidence (Score >= ' \
+ + str(self._cutoffscore) + ')'
- with open(new_cx_file, "w") as out:
- loader = StreamTSVLoader(self._load_plan, self._template)
+ net_attributes['description'] = '
This network contains high confidence (score >= ' \
+ + str(self._cutoffscore) + ') human protein links with combined scores. ' \
+ + 'Edge color was mapped to the combined score value using a gradient from light grey ' \
+ + '(low Score) to black (high Score).'
+
+ net_attributes['rights'] = 'Attribution 4.0 International (CC BY 4.0)'
+
+ net_attributes['rightsHolder'] = 'STRING CONSORTIUM'
+
+ net_attributes['version'] = self._string_version
- description = '
This network contains high confidence (score >= ' \
- + str(self._cutoffscore) + ') human protein links with combined scores. Edge color was mapped to ' \
- + 'the combined score value using a gradient from light grey (low Score) to black (high Score).'
+ net_attributes['organism'] = 'Homo sapiens (human)'
- reference = '
Szklarczyk D, Morris JH, Cook H, Kuhn M, Wyder S, Simonovic M, Santos A, ' \
- + 'Doncheva NT, Roth A, Bork P, Jensen LJ, von Mering C.
' \
+ net_attributes['networkType'] = ['interactome', 'ppi']
+
+ net_attributes['reference'] = '
Szklarczyk D, Morris JH, Cook H, Kuhn M, Wyder S, ' \
+ + 'Simonovic M, Santos A, Doncheva NT, Roth A, Bork P, Jensen LJ, von Mering C.
' \
+ 'The STRING database in 2017: quality-controlled protein-protein association networks, ' \
+ 'made broadly accessible.
Nucleic Acids Res. 2017 Jan; ' \
+ '45:D362-68.
' \
+ 'DOI:10.1093/nar/gkw937
'
+ net_attributes['prov:wasDerivedFrom'] = \
+ 'https://stringdb-static.org/download/protein.links.full.v11.0/9606.protein.links.full.v11.0.txt.gz'
+
+ net_attributes['prov:wasGeneratedBy'] = \
+ 'ndexstringloader ' \
+ + str(ndexstringloader.__version__) + ''
+
+ net_attributes['__iconurl'] = self._iconurl
+
+ return net_attributes
+
+
+
+ def _generate_CX_file(self, network_attributes):
+ file_name = self._output_tsv_file_name
+ new_cx_file = file_name + '.cx'
+
+ logger.debug('generating CX file for network {}...'.format(network_attributes['name']))
+
+ with open(file_name, 'r') as tsvfile:
+
+ with open(new_cx_file, "w") as out:
+ loader = StreamTSVLoader(self._load_plan, self._template)
+
loader.write_cx_network(tsvfile, out,
[
- {'n': 'name', 'v': network_name},
- {'n': 'description', 'v': description},
- {'n': 'rights', 'v': 'Attribution 4.0 International (CC BY 4.0)'},
- {'n': 'rightsHolder', 'v': 'STRING CONSORTIUM'},
- {'n': 'version', 'v': self._string_version},
- {'n': 'organism', 'v': 'Human, 9606, Homo sapiens '},
- {'n': 'networkType', 'v': ['interactome', 'ppi'], 'd': 'list_of_string'},
- {'n': 'reference', 'v': reference},
- {'n': 'prov:wasDerivedFrom', 'v':
- 'https://stringdb-static.org/download/protein.links.full.v11.0/9606.protein.links.full.v11.0.txt.gz'
- },
- {'n': 'prov:wasGeneratedBy', 'v':
- 'ndexstringloader ' + str(
- ndexstringloader.__version__) + ''},
-
- {'n': '__iconurl', 'v': self._iconurl}
+ {'n': 'name', 'v': network_attributes['name']},
+ {'n': 'description', 'v': network_attributes['description']},
+ {'n': 'rights', 'v': network_attributes['rights']},
+ {'n': 'rightsHolder', 'v': network_attributes['rightsHolder']},
+ {'n': 'version', 'v': network_attributes['version']},
+ {'n': 'organism', 'v': network_attributes['organism']},
+ {'n': 'networkType', 'v': network_attributes['networkType'], 'd': 'list_of_string'},
+ {'n': 'reference', 'v': network_attributes['reference']},
+ {'n': 'prov:wasDerivedFrom', 'v': network_attributes['prov:wasDerivedFrom']},
+ {'n': 'prov:wasGeneratedBy', 'v': network_attributes['prov:wasGeneratedBy']},
+ {'n': '__iconurl', 'v': network_attributes['__iconurl']}
])
- logger.debug('CX file for network {} generated\n'.format(network_name))
+ logger.debug('CX file for network {} generated\n'.format(network_attributes['name']))
return new_cx_file
@@ -594,15 +621,15 @@ def get_network_uuid(self, network_name):
def load_to_NDEx(self):
- file_name = self._output_tsv_file_name
- network_name = 'STRING - Human Protein Links - ' \
- 'High Confidence (Score > ' +\
- str(self._cutoffscore) + ')'
-
if self.create_ndex_connection() is None:
return 2
- cx_file_name = self._generate_CX_file(file_name, network_name)
+
+ network_attributes = self._init_network_attributes()
+
+ cx_file_name = self._generate_CX_file(network_attributes)
+
+ network_name = network_attributes['name']
network_id = self.get_network_uuid(network_name)
diff --git a/tests/test_ndexloadstring.py b/tests/test_ndexloadstring.py
index 9b7f430..c53e4b2 100644
--- a/tests/test_ndexloadstring.py
+++ b/tests/test_ndexloadstring.py
@@ -10,6 +10,7 @@
import unittest
from ndexutil.config import NDExUtilConfig
from ndexstringloader.ndexloadstring import NDExSTRINGLoader
+import ndexstringloader
class Param(object):
@@ -36,11 +37,11 @@ def setUp(self):
'conf': None,
'profile': None,
'loadplan': None,
- 'stringversion': None,
+ 'stringversion': '11.0',
'args': None,
- 'datadir': None,
+ 'datadir': tempfile.mkdtemp(),
'cutoffscore': 0.7,
- 'iconurl': None
+ 'iconurl': 'https://home.ndexbio.org/img/STRING-logo.png'
}
self._args = dotdict(self._args)
@@ -51,9 +52,9 @@ def tearDown(self):
"""Tear down test fixtures, if any."""
@unittest.skip("skip it now - will add later")
- def test_parse_config(self):
+ def test_0010_parse_config(self):
- temp_dir = tempfile.mkdtemp()
+ temp_dir = self._args['datadir']
try:
p = Param()
p.profile = 'test_conf_section'
@@ -76,7 +77,7 @@ def test_parse_config(self):
shutil.rmtree(temp_dir)
@unittest.skip("skip it now - uncomment later")
- def test_remove_duplicate_edges(self):
+ def test_0020_remove_duplicate_edges(self):
# some duplicate records in the same format as in STRING 9606.protein.links.full.v11.0.txt
duplicate_records = [
@@ -120,14 +121,13 @@ def test_remove_duplicate_edges(self):
}
}
- temp_dir = tempfile.mkdtemp()
+ temp_dir = self._args['datadir']
temp_file = 'tmp.txt'
temp_file_1 = 'tmp1.txt'
try:
f = os.path.join(temp_dir, temp_file)
- self._args.datadir = temp_dir
self._full_name_file = f
self._output_tsv_file_name = os.path.join(temp_dir, temp_file_1)
@@ -178,7 +178,7 @@ def test_remove_duplicate_edges(self):
shutil.rmtree(temp_dir)
@unittest.skip("skip it now - uncomment later")
- def test_exception_on_duplicate_edge_with_different_scores(self):
+ def test_0030_exception_on_duplicate_edge_with_different_scores(self):
# some duplicate records in the same format as in STRING 9606.protein.links.full.v11.0.txt
@@ -201,14 +201,13 @@ def test_exception_on_duplicate_edge_with_different_scores(self):
for i in range(0, 2):
- temp_dir = tempfile.mkdtemp()
+ temp_dir = self._args['datadir']
temp_file = 'tmp.txt'
temp_file_1 = 'tmp1.txt'
try:
f = os.path.join(temp_dir, temp_file)
- self._args.datadir = temp_dir
self._full_name_file = f
self._output_tsv_file_name = os.path.join(temp_dir, temp_file_1)
@@ -242,13 +241,57 @@ def test_exception_on_duplicate_edge_with_different_scores(self):
finally:
shutil.rmtree(temp_dir)
+ self._args['datadir'] = tempfile.mkdtemp()
- # re-init dudplicates and re-rerun the teast
+ # re-init duplicates and re-rerun the teast
duplicate_records = [
'9606.ENSP00000238651 9606.ENSP00000364486 0 0 0 0 0 0 45 0 0 800 0 0 0 801',
'9606.ENSP00000364486 9606.ENSP00000238651 0 0 0 0 0 0 45 0 0 800 0 0 0 800'
]
- def test_get_network_uuid(self):
- pass
+ def test_0040_init_network_atributes(self):
+ net_attributes = {}
+
+ cutoffscore = str(self._args['cutoffscore'])
+
+ net_attributes['name'] = 'STRING - Human Protein Links - High Confidence (Score >= ' + cutoffscore + ')'
+
+ net_attributes['description'] = '
This network contains high confidence (score >= ' \
+ + cutoffscore + ') human protein links with combined scores. ' \
+ + 'Edge color was mapped to the combined score value using a gradient from light grey ' \
+ + '(low Score) to black (high Score).'
+
+ net_attributes['rights'] = 'Attribution 4.0 International (CC BY 4.0)'
+
+ net_attributes['rightsHolder'] = 'STRING CONSORTIUM'
+
+ net_attributes['version'] = self._args['stringversion']
+
+ net_attributes['organism'] = 'Homo sapiens (human)'
+
+ net_attributes['networkType'] = ['interactome', 'ppi']
+
+ net_attributes['reference'] = 'Szklarczyk D, Morris JH, Cook H, Kuhn M, Wyder S, ' \
+ + 'Simonovic M, Santos A, Doncheva NT, Roth A, Bork P, Jensen LJ, von Mering C.
' \
+ + 'The STRING database in 2017: quality-controlled protein-protein association networks, ' \
+ + 'made broadly accessible.
Nucleic Acids Res. 2017 Jan; ' \
+ + '45:D362-68.
' \
+ + 'DOI:10.1093/nar/gkw937
'
+
+ net_attributes['prov:wasDerivedFrom'] = \
+ 'https://stringdb-static.org/download/protein.links.full.v11.0/9606.protein.links.full.v11.0.txt.gz'
+
+ net_attributes['prov:wasGeneratedBy'] = \
+ 'ndexstringloader ' \
+ + str(ndexstringloader.__version__) + ''
+
+ net_attributes['__iconurl'] = self._args['iconurl']
+
+
+ loader = NDExSTRINGLoader(self._args)
+
+ # get network attributes from STRING loader object
+ network_attributes = loader._init_network_attributes()
+
+ self.assertDictEqual(net_attributes, network_attributes, 'unexpected network properties')