Skip to content

Commit

Permalink
upgrade scripts for 3.1.0 version, removed multi in python for now
Browse files Browse the repository at this point in the history
  • Loading branch information
toniher committed Jan 13, 2017
1 parent 2c5da77 commit df55dde
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 27 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -10,7 +10,7 @@ You would need to import NCBI taxonomy, Gene Ontology and UniProt into your Neo4

* Java 1.8
* Maven >= 3.1
* Compatible with Neo4j 3.0.6
* Compatible with Neo4j 3.1.0

## INSTALL

Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Expand Up @@ -6,10 +6,10 @@

<groupId>cat.cau.neo4j</groupId>
<artifactId>neo4j-biorelation</artifactId>
<version>0.2.2</version>
<version>0.2.3</version>

<properties>
<neo4j.version>3.0.6</neo4j.version>
<neo4j.version>3.1.0</neo4j.version>
</properties>

<dependencies>
Expand Down
23 changes: 14 additions & 9 deletions scripts/neo4j2-import-go.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python
import py2neo
from py2neo.packages.httpstream import http
from py2neo.cypher import cypher_escape
from multiprocessing import Pool

import httplib
Expand All @@ -26,8 +25,7 @@

logging.basicConfig(level=logging.ERROR)

graph = py2neo.Graph()
graph.bind("http://localhost:7474/db/data/")
graph = py2neo.Graph("http://localhost:7474/db/data/")

relationshipmap={}
definition_list={}
Expand All @@ -38,8 +36,8 @@

label = "GO_TERM"

idxout = graph.cypher.execute("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.acc IS UNIQUE")
idxout = graph.cypher.execute("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.id IS UNIQUE")
idxout = graph.run("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.acc IS UNIQUE")
idxout = graph.run("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.id IS UNIQUE")

logging.info('adding definitions')
reader = csv.reader(open(opts.termdeffile),delimiter="\t")
Expand All @@ -54,7 +52,7 @@

def process_statement( statements ):

tx = graph.cypher.begin()
tx = graph.begin()

#print statements
logging.info('proc sent')
Expand Down Expand Up @@ -109,7 +107,11 @@ def create_go_term(line):

list_statements.append( statements )

res = p.map( process_statement, list_statements )

print len( list_statements )

for statements in list_statements :
process_statement( statements )


logging.info('adding relationships')
Expand All @@ -135,7 +137,10 @@ def create_go_term(line):

#We force only one worker, fails if relation
p = Pool(1)

list_statements.append( statements )
res = p.map( process_statement, list_statements )

for statements in list_statements :
process_statement( statements )

#res = p.map( process_statement, list_statements )

28 changes: 16 additions & 12 deletions scripts/neo4j2-import-ncbi.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python
import py2neo
from py2neo.packages.httpstream import http
from py2neo.cypher import cypher_escape
from multiprocessing import Pool

import httplib
Expand Down Expand Up @@ -29,8 +28,7 @@

numiter = 5000

graph = py2neo.Graph()
graph.bind("http://localhost:7474/db/data/")
graph = py2neo.Graph("http://localhost:7474/db/data/")

label = "TAXID"

Expand All @@ -39,11 +37,11 @@
scientific_list={}
names_list={}

idxout = graph.cypher.execute("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.id IS UNIQUE")
idxout = graph.run("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.id IS UNIQUE")

def process_statement( statements ):

tx = graph.cypher.begin()
tx = graph.begin()

#print statements
logging.info('proc sent')
Expand All @@ -58,7 +56,7 @@ def process_statement( statements ):

poolnum = 4;

p = Pool(poolnum)
p = Pool(processes=poolnum)

def create_taxid(line, number):
taxid = str(line[0]).strip()
Expand Down Expand Up @@ -135,12 +133,18 @@ def create_taxid(line, number):
statements = []

list_statements.append( statements )
res = p.map( process_statement, list_statements )

idxout = graph.cypher.execute("CREATE INDEX ON :"+label+"(rank)")
print len( list_statements )

for statements in list_statements :
process_statement( statements )

# p.map( process_statement, list_statements )

idxout = graph.run("CREATE INDEX ON :"+label+"(rank)")

# We keep no pool for relationship
tx = graph.cypher.begin()
tx = graph.begin()

logging.info('adding relationships')
iter = 0
Expand All @@ -158,12 +162,12 @@ def create_taxid(line, number):
if ( iter > numiter ):
tx.process()
tx.commit()
tx = graph.cypher.begin()
tx = graph.begin()

iter = 0

tx.process()
tx.commit()

idxout = graph.cypher.execute("CREATE INDEX ON :"+label+"(scientific_name)")
idxout = graph.cypher.execute("CREATE INDEX ON :"+label+"(name)")
idxout = graph.run("CREATE INDEX ON :"+label+"(scientific_name)")
idxout = graph.run("CREATE INDEX ON :"+label+"(name)")
12 changes: 9 additions & 3 deletions scripts/uniprot.sh
@@ -1,19 +1,24 @@
# CONFIG parameters

NEO4JSHELL=/data/soft/neo4j-community-3.0.6/bin/neo4j-shell
#GOA: ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gpa.gz
GOAURL= ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gpa.gz
GOADIR=/data/db/go/goa
#IDmapping: ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz
IDURL=ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz
MAPPINGDIR=/data/db/go/mapping
MOMENTDIR=/data/toniher
SCRIPTPATH=`pwd`

#Info Uniprot: ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gpi.gz
INFOURL=ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/goa_uniprot_all.gpi.gz
INFOFILE=goa_uniprot_all.gpi
GOAFILE=goa_uniprot_all.gpa

mkdir -p $GOADIR
mkdir -p $MAPPINGDIR

# Let's uncompress all files
cd $GOADIR
wget -c -t0 $GOAURL
wget -c -t0 $INFOURL
gunzip *gz

# Base entries
Expand All @@ -26,6 +31,7 @@ rm $INFOFILE.base
# Creating synonyms in Redis -> TODO, this MUST change

cd $MAPPINGDIR
wget -c -t0 $IDURL
gunzip *gz

python $SCRIPTPATH/neo4j2-synonyms-redis.py $MAPPINGDIR/idmapping.dat
Expand Down

0 comments on commit df55dde

Please sign in to comment.