This repository has been archived by the owner on Oct 25, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
template.py
executable file
·103 lines (81 loc) · 2.97 KB
/
template.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/python
from BeautifulSoup import BeautifulSoup
from mako.template import Template
from optparse import OptionParser
from urlparse import urljoin
import logging
import re
import os
import pycurl
import time
import common
# Regexp to extract data from URL found in the download page.
LINK_RE = re.compile(r'^.*/(?P<product>.+)-(?P<version>.+)\.tar\.gz$')
class ForgeProduct(object):
"""A product."""
def __init__(self, name):
self.name = name
self.latest = None
self.others = []
def __repr__(self):
return ('name: %s;\nlatest: {%r};\nothers: {%s};\n' %
(self.name, self.latest,
';\n'.join(['%r' % i for i in self.others])))
class ForgeVersion(object):
"""A specific version of a product."""
def __init__(self, url, version):
self.url = url
self.version = version
def __repr__(self):
return ('url: %s; version: %s' % (self.url, self.version))
def ParseForgeHTML(page_fn, page_url):
"""Go through a forge HTML page and extract data."""
with open(page_fn, "r") as fd:
soup = BeautifulSoup(fd)
products = dict()
for link in [a['href'] for a in soup.findAll('a')]:
m = LINK_RE.match(link)
if m:
forge_version = ForgeVersion(urljoin(page_url, link),
m.group('version'))
product_name = m.group('product')
if product_name not in products:
forge_product = ForgeProduct(product_name)
forge_product.latest = forge_version
products[product_name] = forge_product
else:
products[product_name].others.append(forge_version)
return products
def main():
# Parsing command line.
parser = common.CreateOptionsParser()
parser.add_option('-p', '--forge_page', dest='forge_page',
help='forge page to scrape.')
parser.add_option('-i', '--input', dest='input_filename',
help='read data from FILENAME')
parser.add_option('-o', '--output', dest='output_filename',
help='write data to FILENAME')
options = common.ApplyOptionsParser(parser)
if not options.forge_page:
raise Error('You need to specify a forge page (--forge_page).')
if not options.input_filename:
raise Error('You need to specify an input filename (--input).')
# Download the forge page if necessary.
forge_page_fn = common.CacheCurl('scrape.html',
options.forge_page,
options.online,
options.cache_dir)
# Parse the forge page.
data = ParseForgeHTML(forge_page_fn, options.forge_page)
oasis = data['oasis']
oasis_doc = data['oasis-doc']
# Render the template.
mytemplate = Template(filename=options.input_filename)
output = mytemplate.render(oasis=data['oasis'], oasis_doc=data['oasis-doc'])
if options.output_filename:
with file(options.output_filename, 'w') as fd:
fd.write(output)
else:
print output
if __name__ == '__main__':
main()