Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Implemented rate limit. HOURS_LIMIT restricts how frequently a record…

… can be processed. If record last processed is > HOURS_LIMIT, the record is not processed. Records that have not been previously processed are not affected by this and a processed normally.
  • Loading branch information...
commit d356135b1dbda8ab6f6d898727313a9e289cd551 1 parent c818a5c
@nbprashanth authored
View
6 WebParse.py
@@ -50,6 +50,7 @@ def getRecords(self):
method=record.getElementsByTagName('method')[0].firstChild.data
pkgName=record.getElementsByTagName('pkgName')[0].firstChild.data
processed=record.getElementsByTagName('processed')[0].firstChild.data
+ updated=record.getElementsByTagName('updated-at')[0].firstChild.data
branch=str(branch).strip()
error=str(error).strip()
@@ -59,8 +60,9 @@ def getRecords(self):
method=str(method).strip()
pkgName=str(pkgName).strip()
processed=str(processed).strip()
+ updated=str(updated).strip()
- recordList.append({'branch':branch, 'error':error, 'errorMsg':errorMessage, 'id':id, 'url':url, 'method':method, 'pkgname':pkgName, 'processed':processed})
+ recordList.append({'branch':branch, 'error':error, 'errorMsg':errorMessage, 'id':id, 'url':url, 'method':method, 'pkgname':pkgName, 'processed':processed, 'updated-at':updated})
return recordList
@@ -86,4 +88,4 @@ def updateRecord(self, param, value, id):
putData='record['+param+']='+valueString
request = urllib2.Request(self.url+':'+self.port+'/records/'+id+'.xml', data=putData)
request.get_method = lambda: 'PUT'
- url = opener.open(request)
View
BIN  WebParse.pyc
Binary file not shown
View
91 WebParse.py~
@@ -0,0 +1,91 @@
+'''
+Created on Aug 2, 2012
+
+@author: Prashanth
+'''
+
+import urllib2
+import urllib
+from xml.dom.minidom import parse, parseString
+
+class WebParse(object):
+ '''
+ classdocs
+ '''
+
+
+ def __init__(self, url, port):
+ '''
+ Constructor
+ '''
+ self.url=url
+ self.port=port
+
+ def getRecords(self):
+ try:
+ data=urllib2.urlopen(self.url+':'+self.port+'/records.xml').read()
+# except HTTPError, e:
+# print 'HTTP Error - '+e.reason
+# return None
+ except urllib2.URLError, e:
+ print 'URL Error - '+str(e.reason)
+ data=None
+ return None
+
+ if data:
+ dom=parseString(data)
+ records=dom.getElementsByTagName('record')
+
+ recordList=[]
+
+ for record in records:
+ branch=record.getElementsByTagName('branch')[0].firstChild.data
+ error=record.getElementsByTagName('error')[0].firstChild.data
+ if str(error)=='True':
+ errorMessage=record.getElementsByTagName('errorMessage')[0].firstChild.data
+ else:
+ errorMessage=''
+ id=record.getElementsByTagName('id')[0].firstChild.data
+ url=record.getElementsByTagName('info')[0].firstChild.data
+ method=record.getElementsByTagName('method')[0].firstChild.data
+ pkgName=record.getElementsByTagName('pkgName')[0].firstChild.data
+ processed=record.getElementsByTagName('processed')[0].firstChild.data
+ updated=record.getElementsByTagName('updated-at')[0].firstChild.data
+
+ branch=str(branch).strip()
+ error=str(error).strip()
+ errorMessage=str(errorMessage).strip()
+ id=str(id).strip()
+ url=str(url).strip()
+ method=str(method).strip()
+ pkgName=str(pkgName).strip()
+ processed=str(processed).strip()
+ updated=str(updated).strip()
+
+ recordList.append({'branch':branch, 'error':error, 'errorMsg':errorMessage, 'id':id, 'url':url, 'method':method, 'pkgname':pkgName, 'processed':processed, 'updated-at':updated})
+
+ return recordList
+
+ def updateRecord(self, param, value, id):
+
+ if value==None:
+ value='N/A'
+
+ opener = urllib2.build_opener(urllib2.HTTPHandler)
+ valueString=''
+ if type(value) is list:
+ if len(value)==1:
+ valueString=value[0]
+ elif len(value)>1:
+ for val in value:
+ valueString+=val+' ,'
+ valueString=valueString[:-1]
+ else:
+ valueString='-'
+ else:
+ valueString=value
+
+ putData='record['+param+']='+valueString
+ request = urllib2.Request(self.url+':'+self.port+'/records/'+id+'.xml', data=putData)
+ request.get_method = lambda: 'UPDATE'
+ url = opener.open(request)
View
16 main.py
@@ -4,7 +4,8 @@
from Upstream import HTTPLS, FTPLS, Google, Launchpad, SVNLS, Trac,\
SubdirHTTPLS, DualHTTPLS, Custom, SF
import time
-from optparse import OptionParser
+from datetime import datetime, timedelta
+from time import gmtime, strftime
THREAD_LIMIT = 2
QUEUE_LIMIT = 50
@@ -12,6 +13,8 @@
PORT = '3000'
THREAD_WAIT = 5
+HOURS_LIMIT=1
+
# On an average, no of records processed is : 1 for every THREAD_WAIT seconds
jobs = Queue.Queue(QUEUE_LIMIT)
@@ -33,8 +36,16 @@
id=record['id']
processed=record['processed']
branch=record['branch']
+ updated=record['updated-at']
+
+ date_object = datetime.strptime(updated, '%Y-%m-%dT%H:%M:%SZ')
+ curr_dt=datetime.strptime(strftime('%Y-%m-%dT%H:%M:%SZ', gmtime()), '%Y-%m-%dT%H:%M:%SZ')
- inputlist_ori.append([pkgname, method, url, id, processed, branch])
+ if (processed=='true' and ((curr_dt-date_object)-timedelta(hours=HOURS_LIMIT)).seconds/3600<=0) or processed=='false':
+ print 'Processing ' + pkgname + ' ; Time difference : ' + str((timedelta(hours=HOURS_LIMIT)-(curr_dt-date_object)).seconds/3600)
+ inputlist_ori.append([pkgname, method, url, id, processed, branch])
+ else:
+ print 'Not processing ' + pkgname + ' ; Time difference : ' + str((curr_dt-date_object).seconds/3600) +'H '+str(((curr_dt-date_object).seconds-3600)/60) +'M'
def main(inputlist):
for x in xrange(THREAD_LIMIT):
@@ -120,6 +131,7 @@ def process(self, pkgname, method, url, id, branch):
wp.updateRecord('latest_ver', ver, id)
wp.updateRecord('loc', loc, id)
+
def run(self):
while 1:
try:
View
17 main.py~
@@ -4,7 +4,8 @@ from WebParse import WebParse
from Upstream import HTTPLS, FTPLS, Google, Launchpad, SVNLS, Trac,\
SubdirHTTPLS, DualHTTPLS, Custom, SF
import time
-from optparse import OptionParser
+from datetime import datetime, timedelta
+from time import gmtime, strftime
THREAD_LIMIT = 2
QUEUE_LIMIT = 50
@@ -12,6 +13,8 @@ URL = 'http://localhost'
PORT = '3000'
THREAD_WAIT = 5
+HOURS_LIMIT=24
+
# On an average, no of records processed is : 1 for every THREAD_WAIT seconds
jobs = Queue.Queue(QUEUE_LIMIT)
@@ -28,14 +31,21 @@ if records==None:
for record in records:
pkgname=record['pkgname']
- print pkgname
method=record['method']
url=record['url']
id=record['id']
processed=record['processed']
branch=record['branch']
+ updated=record['updated-at']
+
+ date_object = datetime.strptime(updated, '%Y-%m-%dT%H:%M:%SZ')
+ curr_dt=datetime.strptime(strftime('%Y-%m-%dT%H:%M:%SZ', gmtime()), '%Y-%m-%dT%H:%M:%SZ')
-inputlist_ori.append([pkgname, method, url, id, processed, branch])
+ if (processed=='true' and ((curr_dt-date_object)-timedelta(hours=1)).seconds/3600<=0) or processed=='false':
+ print 'Processing ' + pkgname + ' ; Time difference : ' + str((timedelta(hours=HOURS_LIMIT)-(curr_dt-date_object)).seconds/3600)
+ inputlist_ori.append([pkgname, method, url, id, processed, branch])
+ else:
+ print 'Not processing ' + pkgname + ' ; Time difference : ' + str((curr_dt-date_object).seconds/3600) +'H '+str(((curr_dt-date_object).seconds-3600)/60) +'M'
def main(inputlist):
for x in xrange(THREAD_LIMIT):
@@ -121,6 +131,7 @@ class workerbee(threading.Thread):
wp.updateRecord('latest_ver', ver, id)
wp.updateRecord('loc', loc, id)
+
def run(self):
while 1:
try:
Please sign in to comment.
Something went wrong with that request. Please try again.