Permalink
Browse files

update

  • Loading branch information...
sahib committed Jun 22, 2012
1 parent 486d6ef commit 20ec22ffc15a310544015705815cdbe0288ec453
Showing with 1,322 additions and 1,252 deletions.
  1. +42 −42 _modules/{ → archive}/cli/cmdparser.html
  2. +28 −34 _modules/{ → archive}/cmanager/crawlmanager.html
  3. +44 −61 _modules/{ → archive}/cmanager/intervalmanager.html
  4. +22 −22 _modules/{ → archive}/config/reader.html
  5. +30 −30 _modules/{ → archive}/crawler/cleaner.html
  6. +42 −43 _modules/{ → archive}/crawler/crawljob.html
  7. +32 −33 _modules/{ → archive}/crawler/dbgen.html
  8. +28 −26 _modules/{ → archive}/crawler/extractor.html
  9. +54 −52 _modules/{ → archive}/crawler/filter.html
  10. +34 −33 _modules/{ → archive}/crawler/git.html
  11. +21 −21 _modules/{ → archive}/crawler/rsync.html
  12. +28 −39 _modules/{ → archive}/crawler/wget.html
  13. +22 −22 _modules/{ → archive}/crawler/xmlgen.html
  14. +22 −28 _modules/{ → archive}/crawler/xmlreader.html
  15. +25 −25 _modules/{ → archive}/dbrecover/pickle_recover.html
  16. +27 −37 _modules/{ → archive}/dbrecover/recover.html
  17. +26 −26 _modules/{ → archive}/dbrecover/repair.html
  18. +28 −36 _modules/{ → archive}/dbrecover/xml_recover.html
  19. +30 −32 _modules/{ → archive}/javadapter/server.html
  20. +28 −24 _modules/{ → archive}/util/filelock.html
  21. +23 −20 _modules/{ → archive}/util/files.html
  22. +39 −27 _modules/{ → archive}/util/paths.html
  23. +26 −40 _modules/{ → archive}/util/times.html
  24. +23 −23 _modules/index.html
  25. +1 −1 _sources/cleaner.txt
  26. +1 −1 _sources/cli.txt
  27. +2 −2 _sources/config.txt
  28. +2 −2 _sources/crawlstuff.txt
  29. +1 −1 _sources/dbgen.txt
  30. +1 −1 _sources/extractor.txt
  31. +1 −1 _sources/filter.txt
  32. +1 −1 _sources/git.txt
  33. +6 −3 _sources/index.txt
  34. +2 −2 _sources/intervalmanager.txt
  35. +46 −3 _sources/intro.txt
  36. +16 −15 _sources/javadapter.txt
  37. +6 −6 _sources/recover.txt
  38. +2 −2 _sources/rsync.txt
  39. +4 −4 _sources/utils.txt
  40. +1 −1 _sources/wget.txt
  41. +2 −2 _sources/xml2metadata.txt
  42. +8 −8 cleaner.html
  43. +16 −16 cli.html
  44. +4 −4 config.html
  45. +24 −24 crawlstuff.html
  46. +19 −19 dbgen.html
  47. +7 −7 extractor.html
  48. +8 −8 filter.html
  49. +142 −140 genindex.html
  50. +25 −25 git.html
  51. +23 −6 index.html
  52. +36 −36 intervalmanager.html
  53. +48 −3 intro.html
  54. +29 −28 javadapter.html
  55. BIN objects.inv
  56. +15 −25 py-modindex.html
  57. +23 −23 recover.html
  58. +3 −3 rsync.html
  59. +1 −1 searchindex.js
  60. +56 −36 utils.html
  61. +8 −8 wget.html
  62. +8 −8 xml2metadata.html

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -7,38 +7,38 @@
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
- <title>cmanager.crawlmanager &mdash; Webarchiv 1.0 documentation</title>
+ <title>archive.cmanager.crawlmanager &mdash; Webarchiv 1.0 documentation</title>
- <link rel="stylesheet" href="../../_static/nature.css" type="text/css" />
- <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+ <link rel="stylesheet" href="../../../_static/nature.css" type="text/css" />
+ <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
- URL_ROOT: '../../',
+ URL_ROOT: '../../../',
VERSION: '1.0',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true
};
</script>
- <script type="text/javascript" src="../../_static/jquery.js"></script>
- <script type="text/javascript" src="../../_static/underscore.js"></script>
- <script type="text/javascript" src="../../_static/doctools.js"></script>
- <link rel="top" title="Webarchiv 1.0 documentation" href="../../index.html" />
- <link rel="up" title="Module code" href="../index.html" />
+ <script type="text/javascript" src="../../../_static/jquery.js"></script>
+ <script type="text/javascript" src="../../../_static/underscore.js"></script>
+ <script type="text/javascript" src="../../../_static/doctools.js"></script>
+ <link rel="top" title="Webarchiv 1.0 documentation" href="../../../index.html" />
+ <link rel="up" title="Module code" href="../../index.html" />
</head>
<body>
<div class="related">
<h3>Navigation</h3>
<ul>
<li class="right" style="margin-right: 10px">
- <a href="../../genindex.html" title="General Index"
+ <a href="../../../genindex.html" title="General Index"
accesskey="I">index</a></li>
<li class="right" >
- <a href="../../py-modindex.html" title="Python Module Index"
+ <a href="../../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
- <li><a href="../../index.html">Webarchiv 1.0 documentation</a> &raquo;</li>
- <li><a href="../index.html" accesskey="U">Module code</a> &raquo;</li>
+ <li><a href="../../../index.html">Webarchiv 1.0 documentation</a> &raquo;</li>
+ <li><a href="../../index.html" accesskey="U">Module code</a> &raquo;</li>
</ul>
</div>
@@ -47,7 +47,7 @@ <h3>Navigation</h3>
<div class="bodywrapper">
<div class="body">
- <h1>Source code for cmanager.crawlmanager</h1><div class="highlight"><pre>
+ <h1>Source code for archive.cmanager.crawlmanager</h1><div class="highlight"><pre>
<span class="c">#!/usr/bin/env python</span>
<span class="c"># encoding: utf-8</span>
@@ -58,9 +58,10 @@ <h1>Source code for cmanager.crawlmanager</h1><div class="highlight"><pre>
<span class="n">__author__</span> <span class="o">=</span> <span class="s">&#39;Christoph Piechula&#39;</span>
<span class="kn">import</span> <span class="nn">multiprocessing.pool</span> <span class="kn">as</span> <span class="nn">mpool</span>
-<span class="kn">import</span> <span class="nn">util.files</span> <span class="kn">as</span> <span class="nn">utl</span>
-<span class="kn">import</span> <span class="nn">config.reader</span> <span class="kn">as</span> <span class="nn">config</span>
-<span class="kn">import</span> <span class="nn">crawler.crawljob</span> <span class="kn">as</span> <span class="nn">job</span>
+<span class="kn">import</span> <span class="nn">archive.util.files</span> <span class="kn">as</span> <span class="nn">utl</span>
+<span class="kn">import</span> <span class="nn">archive.config.reader</span> <span class="kn">as</span> <span class="nn">config</span>
+<span class="kn">import</span> <span class="nn">archive.crawler.crawljob</span> <span class="kn">as</span> <span class="nn">job</span>
+
<span class="kn">import</span> <span class="nn">threading</span>
<span class="kn">import</span> <span class="nn">logging</span>
@@ -89,7 +90,7 @@ <h1>Source code for cmanager.crawlmanager</h1><div class="highlight"><pre>
<span class="k">return</span> <span class="n">ident</span>
-<div class="viewcode-block" id="CrawlerManager"><a class="viewcode-back" href="../../crawlstuff.html#cmanager.crawlmanager.CrawlerManager">[docs]</a><span class="k">class</span> <span class="nc">CrawlerManager</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
+<div class="viewcode-block" id="CrawlerManager"><a class="viewcode-back" href="../../../crawlstuff.html#archive.cmanager.crawlmanager.CrawlerManager">[docs]</a><span class="k">class</span> <span class="nc">CrawlerManager</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Crawljob manager, threadpool which launches a pool of</span>
<span class="sd"> threads</span>
@@ -103,7 +104,7 @@ <h1>Source code for cmanager.crawlmanager</h1><div class="highlight"><pre>
<span class="bp">self</span><span class="o">.</span><span class="n">__urls</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">urls</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">__pool</span> <span class="o">=</span> <span class="n">mpool</span><span class="o">.</span><span class="n">ThreadPool</span><span class="p">(</span><span class="n">config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s">&#39;crawler.maxInst&#39;</span><span class="p">))</span>
-<div class="viewcode-block" id="CrawlerManager.start"><a class="viewcode-back" href="../../crawlstuff.html#cmanager.crawlmanager.CrawlerManager.start">[docs]</a> <span class="k">def</span> <span class="nf">start</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+<div class="viewcode-block" id="CrawlerManager.start"><a class="viewcode-back" href="../../../crawlstuff.html#archive.cmanager.crawlmanager.CrawlerManager.start">[docs]</a> <span class="k">def</span> <span class="nf">start</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Starts threadpool with max number of instances</span>
@@ -118,14 +119,14 @@ <h1>Source code for cmanager.crawlmanager</h1><div class="highlight"><pre>
<span class="bp">self</span><span class="o">.</span><span class="n">__done_callback</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">__done_callback</span> <span class="o">=</span> <span class="bp">None</span>
</div>
-<div class="viewcode-block" id="CrawlerManager.register_done"><a class="viewcode-back" href="../../crawlstuff.html#cmanager.crawlmanager.CrawlerManager.register_done">[docs]</a> <span class="k">def</span> <span class="nf">register_done</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">):</span>
+<div class="viewcode-block" id="CrawlerManager.register_done"><a class="viewcode-back" href="../../../crawlstuff.html#archive.cmanager.crawlmanager.CrawlerManager.register_done">[docs]</a> <span class="k">def</span> <span class="nf">register_done</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Register method for callback function which is</span>
<span class="sd"> triggered after work is done</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">__done_callback</span> <span class="o">=</span> <span class="n">func</span>
</div>
-<div class="viewcode-block" id="CrawlerManager.shutdown"><a class="viewcode-back" href="../../crawlstuff.html#cmanager.crawlmanager.CrawlerManager.shutdown">[docs]</a> <span class="k">def</span> <span class="nf">shutdown</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
+<div class="viewcode-block" id="CrawlerManager.shutdown"><a class="viewcode-back" href="../../../crawlstuff.html#archive.cmanager.crawlmanager.CrawlerManager.shutdown">[docs]</a> <span class="k">def</span> <span class="nf">shutdown</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Shuts down all currently running crawljobs</span>
<span class="sd"> and joins/closes the pool</span>
@@ -134,14 +135,7 @@ <h1>Source code for cmanager.crawlmanager</h1><div class="highlight"><pre>
<span class="n">job</span><span class="o">.</span><span class="n">shutdown</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">__pool</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
- <span class="bp">self</span><span class="o">.</span><span class="n">__pool</span><span class="o">.</span><span class="n">join</span><span class="p">()</span>
-
-<span class="c">###########################################################################</span>
-<span class="c"># unittest #</span>
-<span class="c">###########################################################################</span>
-</div></div>
-<span class="k">if</span> <span class="n">__name__</span> <span class="o">==</span> <span class="s">&#39;__main__&#39;</span><span class="p">:</span>
- <span class="n">cm</span> <span class="o">=</span> <span class="n">CrawlerManager</span><span class="p">(</span><span class="n">utl</span><span class="o">.</span><span class="n">unique_items_from_file</span><span class="p">(</span><span class="s">&#39;url.txt&#39;</span><span class="p">))</span>
+ <span class="bp">self</span><span class="o">.</span><span class="n">__pool</span><span class="o">.</span><span class="n">join</span><span class="p">()</span></div></div>
</pre></div>
</div>
@@ -151,7 +145,7 @@ <h1>Source code for cmanager.crawlmanager</h1><div class="highlight"><pre>
<div class="sphinxsidebarwrapper">
<div id="searchbox" style="display: none">
<h3>Quick search</h3>
- <form class="search" action="../../search.html" method="get">
+ <form class="search" action="../../../search.html" method="get">
<input type="text" name="q" />
<input type="submit" value="Go" />
<input type="hidden" name="check_keywords" value="yes" />
@@ -170,13 +164,13 @@ <h3>Quick search</h3>
<h3>Navigation</h3>
<ul>
<li class="right" style="margin-right: 10px">
- <a href="../../genindex.html" title="General Index"
+ <a href="../../../genindex.html" title="General Index"
>index</a></li>
<li class="right" >
- <a href="../../py-modindex.html" title="Python Module Index"
+ <a href="../../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
- <li><a href="../../index.html">Webarchiv 1.0 documentation</a> &raquo;</li>
- <li><a href="../index.html" >Module code</a> &raquo;</li>
+ <li><a href="../../../index.html">Webarchiv 1.0 documentation</a> &raquo;</li>
+ <li><a href="../../index.html" >Module code</a> &raquo;</li>
</ul>
</div>
<div class="footer">
Oops, something went wrong.

0 comments on commit 20ec22f

Please sign in to comment.