/
main.py
70 lines (61 loc) · 1.98 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from __future__ import print_function
import io
import os
import time
from .processor import Processor
def run(args):
options = {'debug': args.verbose}
if args.phantomjs_path:
options['phantomjs'] = args.phantomjs_path
elif args.phantomjs:
options['phantomjs'] = True
p = Processor(**options)
t0 = time.time()
p.process(args.url)
t1 = time.time()
print('TOTAL TIME ', t1 - t0)
for inline in p.inlines:
print('ON', inline.url)
print('AT line', inline.line)
print('BEFORE '.ljust(79, '-'))
print(inline.before)
print('AFTER '.ljust(79, '-'))
print(inline.after)
print()
output_dir = args.outputdir
if not os.path.isdir(output_dir):
os.mkdir(output_dir)
for link in p.links:
print('FOR', link.href)
orig_name = link.href.split('/')[-1]
with io.open(os.path.join(output_dir, orig_name), 'w') as f:
f.write(link.after)
before_name = 'before_' + link.href.split('/')[-1]
with io.open(os.path.join(output_dir, before_name), 'w') as f:
f.write(link.before)
print('Files written to', output_dir)
print()
print(
'(from %d to %d saves %d)' %
(len(link.before), len(link.after),
len(link.before) - len(link.after))
)
return 0
def main():
import argparse
parser = argparse.ArgumentParser()
add = parser.add_argument
add('url', type=str,
help='URL to process')
add('--outputdir', action='store',
default='./output',
help='directory where to put output (default ./output)')
add('-v', '--verbose', action='store_true',
help='increase output verbosity')
add('--phantomjs', action='store_true',
help='Use PhantomJS to download the source')
add('--phantomjs-path', action='store',
default='',
help='Where is the phantomjs executable')
args = parser.parse_args()
return run(args)