Skip to content

Commit

Permalink
Downloading now works through threading.
Browse files Browse the repository at this point in the history
  • Loading branch information
thoppe committed Aug 10, 2015
1 parent 34cca97 commit f8d51c3
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 5 deletions.
10 changes: 6 additions & 4 deletions ping_demo.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from tor_spiders import tor_request_pool


if __name__ == "__main__":

T = tor_request_pool(3)
T = tor_request_pool(2)

url = 'https://api.ipify.org?format=json'

for x in range(10):
url = 'https://api.ipify.org?format=json'
r = T.get(url)
T.put(url)

for r in T:
print r.text
39 changes: 38 additions & 1 deletion tor_spiders/tor_spiders.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
import json
import itertools

import Queue
import threading
import time

def get_IP_address(session):
url = 'https://api.ipify.org?format=json'
r = session.get(url)
Expand Down Expand Up @@ -89,7 +93,9 @@ def __init__(self, n=2,
port = PORT_START + k
args.append( (port, local_storage) )

ITR = itertools.imap(_generate_tor_req, args)
ITR = itertools.imap(_generate_tor_req, args)
self.Q = Queue.Queue()
self.result = Queue.Queue()

for k,proc in enumerate(ITR):
self.T.append(proc)
Expand All @@ -99,5 +105,36 @@ def __init__(self, n=2,
def get(self, url, params=None):
return self.workers.next().get(url, params)

def put(self,url,params=None):
self.Q.put((url,params))

def session_worker(self, queue):
queue_full = True
while queue_full:
try:
# get your data off the queue, and do some work
url,params = self.Q.get(False)
data = self.get(url,params)
self.result.put(data)
except Queue.Empty:
queue_full = False

def download_queue(self):
thread_count = len(self.T)
for _ in range(thread_count):
t = threading.Thread(target=self.session_worker,
args=(self.Q,))
t.start()

def __iter__(self):
self.download_queue()
while not self.Q.empty():
time.sleep(0.1)
while not self.result.empty():
yield self.result.get()
while not self.result.empty():
yield self.result.get()


_local_session = requesocks.session()
_local_IP = get_IP_address(_local_session)

0 comments on commit f8d51c3

Please sign in to comment.