/
website.py
133 lines (110 loc) · 4.54 KB
/
website.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from datetime import datetime
from twisted.web import resource, static
from twisted.application.service import IServiceCollection
from scrapy.utils.misc import load_object
from .interfaces import IPoller, IEggStorage, ISpiderScheduler
from . import webservice
class Root(resource.Resource):
def __init__(self, config, app):
resource.Resource.__init__(self)
self.debug = config.getboolean('debug', False)
self.runner = config.get('runner')
logsdir = config.get('logs_dir')
itemsdir = config.get('items_dir')
self.app = app
self.putChild('', Home(self))
self.putChild('logs', static.File(logsdir, 'text/plain'))
self.putChild('items', static.File(itemsdir, 'text/plain'))
self.putChild('jobs', Jobs(self))
services = config.items('services', ())
for servName, servClsName in services:
servCls = load_object(servClsName)
self.putChild(servName, servCls(self))
self.update_projects()
def update_projects(self):
self.poller.update_projects()
self.scheduler.update_projects()
@property
def launcher(self):
app = IServiceCollection(self.app, self.app)
return app.getServiceNamed('launcher')
@property
def scheduler(self):
return self.app.getComponent(ISpiderScheduler)
@property
def eggstorage(self):
return self.app.getComponent(IEggStorage)
@property
def poller(self):
return self.app.getComponent(IPoller)
class Home(resource.Resource):
def __init__(self, root):
resource.Resource.__init__(self)
self.root = root
def render_GET(self, txrequest):
vars = {
'projects': ', '.join(self.root.scheduler.list_projects()),
}
return """
<html>
<head><title>Scrapyd</title></head>
<body>
<h1>Scrapyd</h1>
<p>Available projects: <b>%(projects)s</b></p>
<ul>
<li><a href="/jobs">Jobs</a></li>
<li><a href="/items/">Items</li>
<li><a href="/logs/">Logs</li>
<li><a href="http://doc.scrapy.org/en/latest/topics/scrapyd.html">Documentation</a></li>
</ul>
<h2>How to schedule a spider?</h2>
<p>To schedule a spider you need to use the API (this web UI is only for
monitoring)</p>
<p>Example using <a href="http://curl.haxx.se/">curl</a>:</p>
<p><code>curl http://localhost:6800/schedule.json -d project=default -d spider=somespider</code></p>
<p>For more information about the API, see the <a href="http://doc.scrapy.org/en/latest/topics/scrapyd.html">Scrapyd documentation</a></p>
</body>
</html>
""" % vars
class Jobs(resource.Resource):
def __init__(self, root):
resource.Resource.__init__(self)
self.root = root
def render(self, txrequest):
s = "<html><head><title>Scrapyd</title></title>"
s += "<body>"
s += "<h1>Jobs</h1>"
s += "<p><a href='..'>Go back</a></p>"
s += "<table border='1'>"
s += "<th>Project</th><th>Spider</th><th>Job</th><th>PID</th><th>Runtime</th><th>Log</th><th>Items</th>"
s += "<tr><th colspan='7' style='background-color: #ddd'>Pending</th></tr>"
for project, queue in self.root.poller.queues.items():
for m in queue.list():
s += "<tr>"
s += "<td>%s</td>" % project
s += "<td>%s</td>" % str(m['name'])
s += "<td>%s</td>" % str(m['_job'])
s += "</tr>"
s += "<tr><th colspan='7' style='background-color: #ddd'>Running</th></tr>"
for p in self.root.launcher.processes.values():
s += "<tr>"
for a in ['project', 'spider', 'job', 'pid']:
s += "<td>%s</td>" % getattr(p, a)
s += "<td>%s</td>" % (datetime.now() - p.start_time)
s += "<td><a href='/logs/%s/%s/%s.log'>Log</a></td>" % (p.project, p.spider, p.job)
s += "<td><a href='/items/%s/%s/%s.jl'>Items</a></td>" % (p.project, p.spider, p.job)
s += "</tr>"
s += "<tr><th colspan='7' style='background-color: #ddd'>Finished</th></tr>"
for p in self.root.launcher.finished:
s += "<tr>"
for a in ['project', 'spider', 'job']:
s += "<td>%s</td>" % getattr(p, a)
s += "<td></td>"
s += "<td>%s</td>" % (p.end_time - p.start_time)
s += "<td><a href='/logs/%s/%s/%s.log'>Log</a></td>" % (p.project, p.spider, p.job)
s += "<td><a href='/items/%s/%s/%s.jl'>Items</a></td>" % (p.project, p.spider, p.job)
s += "</tr>"
s += "</table>"
s += "</body>"
s += "</html>"
return s