Skip to content

Commit

Permalink
Merge pull request #279 from TomConlin/master
Browse files Browse the repository at this point in the history
Static analysis refactoring
  • Loading branch information
TomConlin committed Mar 14, 2016
2 parents ab47e5d + 76fcde9 commit 382ba00
Show file tree
Hide file tree
Showing 93 changed files with 8,600 additions and 5,030 deletions.
12 changes: 6 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ NOSE = nosetests
### Tests
###

test: BioGrid-fetch BioGrid-test ClinVar-fetch ClinVar-test \
test: BioGrid-fetch BioGrid-test \
ncbi-fetch ncbi-test Panther-fetch Panther-test ucscBands-fetch ucscBands-test

BioGrid-fetch:
Expand All @@ -18,11 +18,11 @@ BioGrid-fetch:
BioGrid-test:
$(NOSE) --with-coverage --cover-package=dipper tests/test_biogrid.py

ClinVar-fetch:
$(DIPPER_BIN) --sources clinvar --no_verify --fetch_only
#ClinVar-fetch:
# $(DIPPER_BIN) --sources clinvar --no_verify --fetch_only

ClinVar-test:
$(NOSE) --with-coverage --cover-package=dipper tests/test_clinvar.py
#ClinVar-test:
# $(NOSE) --with-coverage --cover-package=dipper tests/test_clinvar.py

GeneReviews-fetch:
$(DIPPER_BIN) --sources genereviews --no_verify --fetch_only
Expand Down Expand Up @@ -64,4 +64,4 @@ kegg-fetch:
$(DIPPER_BIN) --sources kegg --no_verify --fetch_only

kegg-test:
$(NOSE) --with-coverage --cover-package=dipper tests/test_kegg.py
$(NOSE) --with-coverage --cover-package=dipper tests/test_kegg.py
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,12 @@ import Dipper as a python package, write your own wrapper, and add your own sour
* OMIA (non-laboratory animal phenotypes)
* Wormbase (genes, alleles, phenotypes)
* FlyBase (genotype, phenotype)


```
hpoa,zfin,omim,biogrid,mgi,impc,panther,ncbigene,ucscbands,
ctd,genereviews,eom,coriell,clinvar,monochrom,kegg,animalqtldb,
ensembl,hgnc,orphanet,omia,flybase,mmrrc,wormbase,mpd,gwascatalog,go
```
* Don't see a parser you want? Feel free to request a new one, or you could contribute a Source parser to our suite!
Please see our [best-practies documentation](sources/README.md) for details on writing new Source parsers
using Dipper code, and make a Pull request.
Expand Down Expand Up @@ -151,4 +156,3 @@ For more information regarding identifiers terminology and notation, see McMurry

More detailed identifier documentation for Monarch is a work in progress, available [here:](https://docs.google.com/document/d/1jJHM0c358T5h2W2qLbpm9fGNcOsTSfhMPmmXQhI8n9Q/edit)
Please feel free to pose any questions or concerns to info@monarchinitiative.org.

120 changes: 75 additions & 45 deletions dipper.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,33 @@
#!/usr/bin/env python3

__author__ = 'nlw'

import argparse
import logging
import unittest
import importlib
import time

# TODO PYLINT not finding imports
# Unable to import 'tests.test_general'
# No name 'utils' in module 'dipper'
# Invalid constant name "test_suite"
from tests.test_general import GeneralGraphTestCase
from dipper.utils.TestUtils import TestUtils

__author__ = 'nlw'

test_suite = unittest.TestLoader().loadTestsFromTestCase(GeneralGraphTestCase)


def main():
source_to_class_map = {
'hpoa': 'HPOAnnotations', # ~3 min
'hpoa': 'HPOAnnotations', # ~3 min
'zfin': 'ZFIN',
'omim': 'OMIM', # full file takes ~15 min, due to required throttling
'biogrid': 'BioGrid', # interactions file takes <10 minutes
'mgi': 'MGI',
'impc': 'IMPC',
'panther': 'Panther', # this takes a very long time, ~1hr to map 7 species-worth of associations
# Panther takes ~1hr to map 7 species-worth of associations
'panther': 'Panther',
'ncbigene': 'NCBIGene', # takes about 4 minutes to process 2 species
'ucscbands': 'UCSCBands',
'ctd': 'CTD',
Expand All @@ -38,7 +43,7 @@ def main():
'orphanet': 'Orphanet',
'omia': 'OMIA',
'flybase': 'FlyBase',
'mmrrc' : 'MMRRC',
'mmrrc': 'MMRRC',
'wormbase': 'WormBase',
'mpd': 'MPD',
'gwascatalog': 'GWASCatalog',
Expand All @@ -48,51 +53,73 @@ def main():

logger = logging.getLogger(__name__)

parser = argparse.ArgumentParser(description='Dipper: Data Ingestion'
' Pipeline for SciGraph',
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-s', '--sources', type=str, required=True,
help='comma separated list of sources')
parser.add_argument('-l', '--limit', type=int, help='limit number of rows')
parser.add_argument('--parse_only', action='store_true',
help='parse files without writing'),
parser.add_argument('--fetch_only', action='store_true',
help='fetch sources without parsing')
parser = argparse.ArgumentParser(
description='Dipper: Data Ingestion Pipeline for SciGraph',
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
'-s', '--sources', type=str, required=True,
help='comma separated list of sources')
parser.add_argument(
'-l', '--limit', type=int,
help='limit number of rows')
parser.add_argument(
'--parse_only', action='store_true',
help='parse files without writing')
parser.add_argument(
'--fetch_only', action='store_true',
help='fetch sources without parsing')
parser.add_argument('-f', '--force', action='store_true',
help='force re-download of files')
parser.add_argument('--no_verify', help='ignore the verification step',
action='store_true')
parser.add_argument(
'--no_verify',
help='ignore the verification step', action='store_true')
parser.add_argument('--query', help='enter in a sparql query', type=str)
parser.add_argument('-q', '--quiet', help='turn off info logging',
action="store_true")
parser.add_argument('--debug', help='turn on debug logging',
action="store_true")
parser.add_argument('--skip_tests', help='skip any testing', action="store_true")

# BNodes can't be visualized in Protege, so you can materialize them for testing purposes with this flag
parser.add_argument('-nb', '--no_bnodes', help="convert blank nodes into identified nodes", action="store_true")

# TODO this preconfiguration should probably live in the conf.json, and the same filter be applied to all sources
parser.add_argument('-t', '--taxon', type=str,
help='Add a taxon constraint on a source. Enter 1+ NCBITaxon numbers, comma delimited\n'
'Implemented taxa per source\n'
'NCBIGene: 9606,10090,7955\n'
'Panther: 9606,10090,10116,7227,7955,6239,8355\n'
'BioGrid: 9606,10090,10116,7227,7955,6239,8355\n'
'UCSCBands: 9606\n'
'GO: 9606,10090,10116,7227,7955,6239,9615,9823,9031,9913')
parser.add_argument('-o', '--test_only', help='only process and output the pre-configured test subset',
action="store_true")

parser.add_argument('--format', help='serialization format: turtle (default), xml, n3, nt, raw', type=str)

parser.add_argument(
'-q', '--quiet',
help='turn off info logging', action="store_true")
parser.add_argument(
'--debug', help='turn on debug logging', action="store_true")
parser.add_argument(
'--skip_tests', help='skip any testing', action="store_true")

# BNodes can't be visualized in Protege,
# so you can materialize them for testing purposes with this flag
parser.add_argument(
'-nb', '--no_bnodes',
help="convert blank nodes into identified nodes", action="store_true")

# TODO this preconfiguration should probably live in the conf.json,
# and the same filter be applied to all sources
parser.add_argument(
'-t', '--taxon', type=str,
help='Add a taxon constraint on a source. Enter 1+ NCBITaxon numbers,'
' comma delimited\n'
'Implemented taxa per source\n'
'NCBIGene: 9606,10090,7955\n'
'Panther: 9606,10090,10116,7227,7955,6239,8355\n'
'BioGrid: 9606,10090,10116,7227,7955,6239,8355\n'
'UCSCBands: 9606\n'
'GO: 9606,10090,10116,7227,7955,6239,9615,9823,9031,9913')
parser.add_argument(
'-o', '--test_only',
help='only process and output the pre-configured test subset',
action="store_true")

parser.add_argument(
'--format',
help='serialization format: turtle (default), xml, n3, nt, raw',
type=str)

args = parser.parse_args()
tax_ids = None
if args.taxon is not None:
tax_ids = list(map(int, args.taxon.split(',')))
# TODO PYLINT Used builtin function 'map'. DONE?
# Using a list comprehension can be clearer.
# tax_ids = list(map(int, args.taxon.split(',')))
tax_ids = [int(t) for t in args.taxon.split(',')]

taxa_supported = ['Panther', 'NCBIGene', 'BioGrid', 'UCSCBands', 'GeneOntology']
taxa_supported = [
'Panther', 'NCBIGene', 'BioGrid', 'UCSCBands', 'GeneOntology']

formats_supported = ['xml', 'n3', 'turtle', 'nt', 'ttl', 'raw']

Expand Down Expand Up @@ -134,7 +161,8 @@ def main():
if args.format == 'ttl':
args.format = 'turtle'
else:
logger.error("You have specified an invalid serializer: %s", args.format)
logger.error(
"You have specified an invalid serializer: %s", args.format)

exit(0)
else:
Expand Down Expand Up @@ -168,7 +196,8 @@ def main():
if (args.no_verify or args.skip_tests) is not True:
suite = mysource.getTestSuite()
if suite is None:
logger.warn("No tests configured for this source: %s", source)
logger.warning(
"No tests configured for this source: %s", source)
else:
unittest.TextTestRunner(verbosity=2).run(suite)
else:
Expand All @@ -187,7 +216,8 @@ def main():

# status = mysource.verify()
# if status is not True:
# logger.error('Source %s did not pass verification tests.', source)
# logger.error(
# 'Source %s did not pass verification tests.', source)
# exit(1)
# else:
# logger.info('skipping verification step')
Expand Down
24 changes: 16 additions & 8 deletions dipper/config.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
__author__ = 'nicole'

import json
import os.path
import logging

__author__ = 'nicole'

logger = logging.getLogger(__name__)

#read configuration file
# read configuration file
conf = {}

#load the configuration file, if it exists.
#it isn't required, but may be for some sources
'''
Load the configuration file 'conf.json', if it exists.
it isn't always required, but may be for some sources.
'''

if os.path.exists(os.path.join(os.path.dirname(__file__), 'conf.json')):
with open(os.path.join(os.path.dirname(__file__),
'conf.json')) as json_file:
with open(
os.path.join(os.path.dirname(__file__),
'conf.json')) as json_file:
conf = json.load(json_file)
logger.debug("Finished loading config")
else:
logger.warning("'conf.json' not found in '%s'", os.path.dirname(__file__))
logger.warning("Sources that depend on 'conf.json' will fail")


def get_config():
return conf
return conf
23 changes: 15 additions & 8 deletions dipper/curie_map.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
__author__ = 'nicole'

import yaml
import os.path
import logging
import yaml

__author__ = 'nicole'

logger = logging.getLogger(__name__)

#read configuration file
# read configuration file
curie_map = None

#load the curie mapping file, if it exists.
#it isn't required, but is necessary for most resources
'''
Load the curie mapping file'curie_map.yaml', if it exists.
it isn't required, but is necessary for most resources
'''

if os.path.exists(os.path.join(os.path.dirname(__file__), 'curie_map.yaml')):
with open(os.path.join(os.path.dirname(__file__),
'curie_map.yaml')) as yaml_file:
with open(os.path.join(os.path.dirname(__file__), 'curie_map.yaml')) \
as yaml_file:
curie_map = yaml.load(yaml_file)
logger.debug("Finished loading curie maps: %s", curie_map)
else:
logger.debug(
"Cannot find 'curie_map.yaml' in %s",
os.path.dirname(__file__))


def get():
Expand Down
36 changes: 36 additions & 0 deletions dipper/generate_callgraph.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#! /bin/bash

# Requires
# pip3 install pycallgraph
# [yum|apt-get] install graphviz



# relative path to where the output goes
outdir="./callgraph"

rm -fr ${outdir}

for src in `find . -type f -name "*.py" -exec echo \{\} \\;`; do

oldpth="${src%/*}" # drop all from last slash on
basefn="${src##.*/}" # drop all up to & inc last slash
newpth="${outdir}${oldpth#.}" # drop leading dot
rtname="${basefn%.py}" # drop ".py" suffix

# echo "src: ${src}"

if [ ! -d ${newpth} ] ; then
mkdir -p ${newpth}
# echo "mkdir: ${newpth}"
fi
# echo "oldpth: ${oldpth}"
# echo "basefn: ${basefn}"
# echo "newpth: ${newpth}"
# echo "rtname: ${rtname}"
# echo ""


echo "pycallgraph graphviz --output-file=\"${newpth}/${rtname}.png\" -- \"${src}\""
echo ""
done
Loading

0 comments on commit 382ba00

Please sign in to comment.