Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Added a function that allows a maximum number of requests per second. #2

Open
wants to merge 5 commits into from

2 participants

Conrad Lee Pete Warden
Conrad Lee

I still need to test this out---I would have tested before doing anything with github, but I don't yet have the hang of github and somehow went directly to editing code even before I've checked this out.

Pete Warden
Owner
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jun 25, 2011
  1. Conrad Lee
  2. Conrad Lee

    Fixed a bug I introduced in startrequest() caused by too few argument…

    conradlee authored
    …s when calling waitforoutstandingrequeststodropbelow.
  3. fixed a minor bug in the finish requests command

    Conrad Lee authored
  4. Minor changes

    Conrad Lee authored
This page is out of date. Refresh to see the latest.
Showing with 26 additions and 7 deletions.
  1. +24 −6 pyparallelcurl.py
  2. +2 −1  test.py
30 pyparallelcurl.py 100755 → 100644
View
@@ -56,12 +56,17 @@ class ParallelCurl:
outstanding_requests = {}
multi_handle = None
- def __init__(self, in_max_requests = 10, in_options = {}):
+ def __init__(self, in_max_requests = 10, in_options = {}, max_requests_per_second = 1):
self.max_requests = in_max_requests
self.options = in_options
self.outstanding_requests = {}
self.multi_handle = pycurl.CurlMulti()
+
+ # Variables for rate limiting
+ self.max_requests_per_second = max_requests_per_second
+ self.last_used_second = 0
+ self.requests_this_second = 0
# Ensure all the requests finish nicely
def __del__(self):
@@ -83,7 +88,7 @@ def setoptions(self, in_options):
def startrequest(self, url, callback, user_data = {}, post_fields=None):
if self.max_requests > 0:
- self.waitforoutstandingrequeststodropbelow(self.max_requests)
+ self.waitforoutstandingrequeststodropbelow(self.max_requests, self.max_requests_per_second)
ch = pycurl.Curl()
for option, value in self.options.items():
@@ -97,7 +102,13 @@ def startrequest(self, url, callback, user_data = {}, post_fields=None):
ch.setopt(pycurl.POSTFIELDS, post_fields)
self.multi_handle.add_handle(ch)
-
+
+ if int(time.time()) == self.last_used_second:
+ self.requests_this_second += 1
+ else:
+ self.last_used_second = int(time.time())
+ self.requests_this_second = 1
+
self.outstanding_requests[ch] = {
'handle': ch,
'result_buffer': result_buffer,
@@ -111,7 +122,7 @@ def startrequest(self, url, callback, user_data = {}, post_fields=None):
# You *MUST* call this function at the end of your script. It waits for any running requests
# to complete, and calls their callback functions
def finishallrequests(self):
- self.waitforoutstandingrequeststodropbelow(1)
+ self.waitforoutstandingrequeststodropbelow(1, self.max_requests_per_second)
# Checks to see if any of the outstanding requests have finished
def checkforcompletedrequests(self):
@@ -170,10 +181,17 @@ def checkforcompletedrequests(self):
break
# Blocks until there's less than the specified number of requests outstanding
- def waitforoutstandingrequeststodropbelow(self, max):
+ def waitforoutstandingrequeststodropbelow(self, max_simultaneous, max_per_second):
while True:
+
+ if self.requests_this_second >= max_per_second:
+ while int(time.time()) == self.last_used_second:
+ time.sleep(0.01)
+ self.requests_this_second = 0
+ self.last_used_second = int(time.time())
+
self.checkforcompletedrequests()
- if len(self.outstanding_requests) < max:
+ if len(self.outstanding_requests) < max_simultaneous:
break
time.sleep(0.01)
3  test.py
View
@@ -27,8 +27,9 @@ def on_request_done(content, url, ch, search):
print "Fetch error "+str(httpcode)+" for '"+url+"'"
return
+
responseobject = json.loads(content)
- if 'responseData' not in responseobject or 'results' not in responseobject['responseData']:
+ if 'responseData' not in responseobject or not responseobject['responseData'] or 'results' not in responseobject['responseData']:
print "No results found for '"+search+"'"
return
Something went wrong with that request. Please try again.