Skip to content

Commit

Permalink
Adds discarding connection capability to sync spider
Browse files Browse the repository at this point in the history
  • Loading branch information
ana-cc authored and irl committed Oct 23, 2017
1 parent 64c7eec commit 5464783
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions pathspider/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from pathspider.helpers.http import connect_https
from pathspider.helpers.dns import connect_dns_tcp
from pathspider.helpers.dns import connect_dns_udp

from pathspider.base import CONN_DISCARD

class SynchronizedSpider(Spider):
# pylint: disable=W0223
Expand Down Expand Up @@ -138,24 +138,29 @@ def worker(self, worker_number):
self.pre_connect(job)

conns = []
should_discard = False

for config in range(0, len(self.configurations)):
# Wait for configuration
self.__semaphores[config][0].acquire()

# Connect in configuration
conn = self._connect_wrapper(job, config)
if 'spdr_state' in conn:
if conn['spdr_state'] == CONN_DISCARD:
should_discard = True
conns.append(conn)

# Wait for next configuration
self.__semaphores[(config + 1) % len(
self.configurations)][1].release()

# Save job record for combiner
self.jobtab[jobId] = job
if not should_discard:
# Save job record for combiner
self.jobtab[jobId] = job

# Finish connections and pass on for merging
self._finalise_conns(job, jobId, conns)
# Finish connections and pass on for merging
self._finalise_conns(job, jobId, conns)

self.__logger.debug("job complete: " + repr(job))
self.jobqueue.task_done()
Expand Down

0 comments on commit 5464783

Please sign in to comment.