Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Modified sample script with a custom URL script to support proper REGEX.

  • Loading branch information...
commit 00a3a295b68cc7a587eda4b7805b5ca18e1c3c98 1 parent 05b9741
N.B.Prashanth authored
Showing with 63 additions and 44 deletions.
  1. +63 −44 New Scripts/CustomUrl.py
View
107 New Scripts/CustomUrl.py
@@ -1,48 +1,67 @@
-'''
-Created on Jul 2, 2012
+#!/usr/bin/env python
-@author: N.B
-'''
+from urlparse import urlparse
+from BeautifulSoup import BeautifulSoup
+import urllib2
+import re
-# Test module to handle custom urls with regex
+def getLatestVersion(versions):
+
+ sums=[]
+ for x in versions:
+ k=10**4#len(x.split('.'))
+ sum=0
+ try:
+ for m in x.split('.'):
+ sum=sum+int(m)*k/10
+ k=k/10
+ sums.append(sum)
+ except:
+ sums.append(0)
+ return versions[sums.index(max(sums))]
+
+def cleanLinks(links, regex):
+ newLinks=[]
+ for link in links:
+ try:
+ m=re.compile(regex).search(link).group(0)
+ newLinks.append(link[0:-1])
+ except:
+ m=None
+ return getLatestVersion(newLinks)
+
+def parseRegex(path, regex):
+ links=[]
+
+ data=urllib2.urlopen(path).read()
+ soup=BeautifulSoup(data)
+ soup.prettify()
+ for link in soup.findAll('a'):
+ links.append(link['href'])
+ latestVer=cleanLinks(links, regex)
+ return latestVer
+
+if __name__=='__main__':
+
+ sampleUrl='http://pan.rebelbase.com/download/releases/((\d\.*)+)/source/pan-((\d\.*)+).tar.bz2'
+ parsedUrl=urlparse(sampleUrl)
+
+ downloadPath=parsedUrl.scheme + '://' + parsedUrl.netloc
+
+ for folder in parsedUrl.path[1:].split('/'):
+ if folder.find('(')>=0 or folder.find(')')>=0:
+ #print 'Working on ' + downloadPath + ' for ' + folder
+ try:
+ latestVer=parseRegex(downloadPath, folder)
+ downloadPath=downloadPath+'/'+latestVer
+ except:
+ print 'unable to parse Regex'
+ downloadPath=None
+ break
+ else:
+ downloadPath=downloadPath+'/'+folder
+
+ if downloadPath:
+ print downloadPath
-a='http://pan.rebelbase.com/download/releases/(.*)/source/pan-(.*).tar.bz2'
-b=a.split('://')[1].split('/')
-m=''
-import urllib2
-import BeautifulSoup
-
-for x in b:
- if x.find('(')>=0:
- prefix=None
- suffix=None
-# print m
-
- if(len(x[:x.find('(')])>0):
- prefix=x[:x.find('(')]
-
- if(len(x[x.find(')'):])>1):
- suffix=x[x.find(')')+1:]
-
-# print prefix, suffix
-
- data=urllib2.urlopen('http:/'+m).read()
- soup=BeautifulSoup.BeautifulSoup(data)
- soup.prettify()
- links=[]
- for link in soup.findAll('a'):
- href=str(link['href'])
- if prefix or suffix:
- if href.find(prefix)==0 or href.find(suffix)==len(href)-len(suffix):
- links.append(href)
- else:
- if href[0].isdigit():
- links.append(href[:len(href)-1])
-
- print links
-
- upstream=Upstream.Upstream()
- m=m+'/'+upstream.getLatestVersion(links)[:len(upstream.getLatestVersion(links))]
- else:
- m=m+'/'+x
Please sign in to comment.
Something went wrong with that request. Please try again.