From df55ddea5b2a809813a94606bc8afe1343a623eb Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Fri, 13 Jan 2017 16:25:51 +0100 Subject: [PATCH] upgrade scripts for 3.1.0 version, removed multi in python for now --- README.md | 2 +- pom.xml | 4 ++-- scripts/neo4j2-import-go.py | 23 ++++++++++++++--------- scripts/neo4j2-import-ncbi.py | 28 ++++++++++++++++------------ scripts/uniprot.sh | 12 +++++++++--- 5 files changed, 42 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index e558937..0e2c60a 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ You would need to import NCBI taxonomy, Gene Ontology and UniProt into your Neo4 * Java 1.8 * Maven >= 3.1 -* Compatible with Neo4j 3.0.6 +* Compatible with Neo4j 3.1.0 ## INSTALL diff --git a/pom.xml b/pom.xml index 321bbca..2313926 100644 --- a/pom.xml +++ b/pom.xml @@ -6,10 +6,10 @@ cat.cau.neo4j neo4j-biorelation - 0.2.2 + 0.2.3 - 3.0.6 + 3.1.0 diff --git a/scripts/neo4j2-import-go.py b/scripts/neo4j2-import-go.py index 0cfb430..00bb48c 100644 --- a/scripts/neo4j2-import-go.py +++ b/scripts/neo4j2-import-go.py @@ -1,7 +1,6 @@ #!/usr/bin/env python import py2neo from py2neo.packages.httpstream import http -from py2neo.cypher import cypher_escape from multiprocessing import Pool import httplib @@ -26,8 +25,7 @@ logging.basicConfig(level=logging.ERROR) -graph = py2neo.Graph() -graph.bind("http://localhost:7474/db/data/") +graph = py2neo.Graph("http://localhost:7474/db/data/") relationshipmap={} definition_list={} @@ -38,8 +36,8 @@ label = "GO_TERM" -idxout = graph.cypher.execute("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.acc IS UNIQUE") -idxout = graph.cypher.execute("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.id IS UNIQUE") +idxout = graph.run("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.acc IS UNIQUE") +idxout = graph.run("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.id IS UNIQUE") logging.info('adding definitions') reader = csv.reader(open(opts.termdeffile),delimiter="\t") @@ -54,7 +52,7 @@ def process_statement( statements ): - tx = graph.cypher.begin() + tx = graph.begin() #print statements logging.info('proc sent') @@ -109,7 +107,11 @@ def create_go_term(line): list_statements.append( statements ) -res = p.map( process_statement, list_statements ) + +print len( list_statements ) + +for statements in list_statements : + process_statement( statements ) logging.info('adding relationships') @@ -135,7 +137,10 @@ def create_go_term(line): #We force only one worker, fails if relation p = Pool(1) - list_statements.append( statements ) -res = p.map( process_statement, list_statements ) + +for statements in list_statements : + process_statement( statements ) + +#res = p.map( process_statement, list_statements ) diff --git a/scripts/neo4j2-import-ncbi.py b/scripts/neo4j2-import-ncbi.py index 2a13da0..fa4b50f 100644 --- a/scripts/neo4j2-import-ncbi.py +++ b/scripts/neo4j2-import-ncbi.py @@ -1,7 +1,6 @@ #!/usr/bin/env python import py2neo from py2neo.packages.httpstream import http -from py2neo.cypher import cypher_escape from multiprocessing import Pool import httplib @@ -29,8 +28,7 @@ numiter = 5000 -graph = py2neo.Graph() -graph.bind("http://localhost:7474/db/data/") +graph = py2neo.Graph("http://localhost:7474/db/data/") label = "TAXID" @@ -39,11 +37,11 @@ scientific_list={} names_list={} -idxout = graph.cypher.execute("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.id IS UNIQUE") +idxout = graph.run("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.id IS UNIQUE") def process_statement( statements ): - tx = graph.cypher.begin() + tx = graph.begin() #print statements logging.info('proc sent') @@ -58,7 +56,7 @@ def process_statement( statements ): poolnum = 4; -p = Pool(poolnum) +p = Pool(processes=poolnum) def create_taxid(line, number): taxid = str(line[0]).strip() @@ -135,12 +133,18 @@ def create_taxid(line, number): statements = [] list_statements.append( statements ) -res = p.map( process_statement, list_statements ) -idxout = graph.cypher.execute("CREATE INDEX ON :"+label+"(rank)") +print len( list_statements ) + +for statements in list_statements : + process_statement( statements ) + +# p.map( process_statement, list_statements ) + +idxout = graph.run("CREATE INDEX ON :"+label+"(rank)") # We keep no pool for relationship -tx = graph.cypher.begin() +tx = graph.begin() logging.info('adding relationships') iter = 0 @@ -158,12 +162,12 @@ def create_taxid(line, number): if ( iter > numiter ): tx.process() tx.commit() - tx = graph.cypher.begin() + tx = graph.begin() iter = 0 tx.process() tx.commit() -idxout = graph.cypher.execute("CREATE INDEX ON :"+label+"(scientific_name)") -idxout = graph.cypher.execute("CREATE INDEX ON :"+label+"(name)") +idxout = graph.run("CREATE INDEX ON :"+label+"(scientific_name)") +idxout = graph.run("CREATE INDEX ON :"+label+"(name)") diff --git a/scripts/uniprot.sh b/scripts/uniprot.sh index dd18821..6a398cb 100755 --- a/scripts/uniprot.sh +++ b/scripts/uniprot.sh @@ -1,19 +1,24 @@ # CONFIG parameters NEO4JSHELL=/data/soft/neo4j-community-3.0.6/bin/neo4j-shell -#GOA: ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gpa.gz +GOAURL= ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gpa.gz GOADIR=/data/db/go/goa -#IDmapping: ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz +IDURL=ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz MAPPINGDIR=/data/db/go/mapping MOMENTDIR=/data/toniher SCRIPTPATH=`pwd` -#Info Uniprot: ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gpi.gz +INFOURL=ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gpi.gz INFOFILE=goa_uniprot_all.gpi GOAFILE=goa_uniprot_all.gpa +mkdir -p $GOADIR +mkdir -p $MAPPINGDIR + # Let's uncompress all files cd $GOADIR +wget -c -t0 $GOAURL +wget -c -t0 $INFOURL gunzip *gz # Base entries @@ -26,6 +31,7 @@ rm $INFOFILE.base # Creating synonyms in Redis -> TODO, this MUST change cd $MAPPINGDIR +wget -c -t0 $IDURL gunzip *gz python $SCRIPTPATH/neo4j2-synonyms-redis.py $MAPPINGDIR/idmapping.dat