Skip to content

Commit

Permalink
Split out argparse and running, first go
Browse files Browse the repository at this point in the history
  • Loading branch information
irl committed Oct 18, 2016
1 parent 22cb382 commit 3c18981
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 50 deletions.
53 changes: 7 additions & 46 deletions pathspider/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,7 @@

plugins = load("pathspider.plugins", subclasses=PluggableSpider)

def job_feeder(inputfile, spider):
logger = logging.getLogger("feeder")
with open(inputfile) as fp:
logger.debug("job_feeder: started")
reader = csv.reader(fp, delimiter=',', quotechar='"')
for row in reader:
if len(row) > 0:
# port numbers should be integers
row[1] = int(row[1])
spider.add_job(row)

logger.info("job_feeder: all jobs added, waiting for spider to finish")
spider.shutdown()
logger.debug("job_feeder: stopped")

def run_pathspider():
def handle_args():
class SubcommandHelpFormatter(argparse.RawDescriptionHelpFormatter):
def _format_action(self, action):
parts = super()._format_action(action)
Expand All @@ -47,7 +32,7 @@ def _format_action(self, action):
'paths.'), formatter_class=SubcommandHelpFormatter)
parser.add_argument('-s', '--standalone', action='store_true', help='''run in
standalone mode. this is the default mode (and currently the only supported
mode). in the future, mplane will be supported as a mode of operation.''')
mode). in the future, mplane will be supported as a mode of operation.''', default=True)
parser.add_argument('-i', '--interface', help='''the interface to use for the observer''', default="eth0")
parser.add_argument('-w', '--workers', type=int, help='''number of workers to use''', default=100)
parser.add_argument('--input', default='/dev/stdin', metavar='INPUTFILE', help='''a file
Expand Down Expand Up @@ -87,34 +72,10 @@ def _format_action(self, action):
# Run a utility function
sys.exit(args.func(args))

try:
if hasattr(args, "spider"):
spider = args.spider(args.workers, "int:" + args.interface, args)
else:
logger.error("Plugin not found! Cannot continue.")
logger.error("Use --help to list all plugins.")
sys.exit(1)

logger.info("activating spider...")

spider.start()

logger.debug("starting job feeder...")
threading.Thread(target=job_feeder, args=(args.input, spider)).start()

with open(args.output, 'w') as outputfile:
logger.info("opening output file "+args.output)
while True:
result = spider.outqueue.get()
if result == SHUTDOWN_SENTINEL:
logger.info("output complete")
break
outputfile.write(json.dumps(result) + "\n")
logger.debug("wrote a result")
spider.outqueue.task_done()

except KeyboardInterrupt:
logger.error("Received keyboard interrupt, dying now.")
if args.standalone:
# we're running in standalone mode
from pathspider.standalone import run_standalone
run_standalone(args)

if __name__ == "__main__":
run_pathspider()
handle_args()
59 changes: 59 additions & 0 deletions pathspider/standalone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@

import csv
import logging
import json
import sys
import threading

from straight.plugin import load

from pathspider.base import PluggableSpider
from pathspider.base import SHUTDOWN_SENTINEL


def job_feeder(inputfile, spider):
logger = logging.getLogger("feeder")
with open(inputfile) as fp:
logger.debug("job_feeder: started")
reader = csv.reader(fp, delimiter=',', quotechar='"')
for row in reader:
if len(row) > 0:
# port numbers should be integers
row[1] = int(row[1])
spider.add_job(row)

logger.info("job_feeder: all jobs added, waiting for spider to finish")
spider.shutdown()
logger.debug("job_feeder: stopped")

def run_standalone(args):
logger = logging.getLogger("pathspider")

try:
if hasattr(args, "spider"):
spider = args.spider(args.workers, "int:" + args.interface, args)
else:
logger.error("Plugin not found! Cannot continue.")
logger.error("Use --help to list all plugins.")
sys.exit(1)

logger.info("activating spider...")

spider.start()

logger.debug("starting job feeder...")
threading.Thread(target=job_feeder, args=(args.input, spider)).start()

with open(args.output, 'w') as outputfile:
logger.info("opening output file "+args.output)
while True:
result = spider.outqueue.get()
if result == SHUTDOWN_SENTINEL:
logger.info("output complete")
break
outputfile.write(json.dumps(result) + "\n")
logger.debug("wrote a result")
spider.outqueue.task_done()

except KeyboardInterrupt:
logger.error("Received keyboard interrupt, dying now.")
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
# pip to create the appropriate form of executable for the target platform.
entry_points={
'console_scripts': [
'pathspider=pathspider.run:run_pathspider',
'pathspider=pathspider.run:handle_args',
],
},
)
6 changes: 3 additions & 3 deletions tests/test_job_feeder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

import pathspider.run
from pathspider.standalone import job_feeder

class FakeSpider:
def __init__(self):
Expand All @@ -22,7 +22,7 @@ def test_job_feeder_webtest():
['2001:67c:10ec:36c2::61', 80, 'ecn.ethz.ch', '5'],
['139.133.1.4', 80, 'abdn.ac.uk', '6']]

pathspider.run.job_feeder("examples/webtest.csv", spider)
job_feeder("examples/webtest.csv", spider)
assert spider.was_shutdown
assert spider.jobs == expected_jobs

Expand All @@ -36,7 +36,7 @@ def test_job_feeder_webtest_newline():
['2001:67c:10ec:36c2::61', 80, 'ecn.ethz.ch', '5'],
['139.133.1.4', 80, 'abdn.ac.uk', '6']]

pathspider.run.job_feeder("tests/testdata/webtest_newline.csv", spider)
job_feeder("tests/testdata/webtest_newline.csv", spider)
assert spider.was_shutdown
assert spider.jobs == expected_jobs

0 comments on commit 3c18981

Please sign in to comment.