Skip to content
Browse files

New sitemap generator (Fix #395)

  • Loading branch information...
1 parent 74f19e0 commit 0751122eeb7fd87233257537a9f861a6db399e3d @ralsina committed Mar 18, 2013
Showing with 46 additions and 2,171 deletions.
  1. +1 −0 CHANGES.txt
  2. +45 −42 nikola/plugins/task_sitemap/__init__.py
  3. +0 −2,129 nikola/plugins/task_sitemap/sitemap_gen.py
View
1 CHANGES.txt
@@ -9,6 +9,7 @@ Features
* New HIDE_UNTRANSLATED_POSTS option (does nothing yet)
* New EXTRA_HEAD_DATA option, which adds extra things in <HEAD> (Issue #385)
* Moved translations to transifex.com
+* New custom sitemap generator (Issue #395)
Bugfixes
--------
View
87 nikola/plugins/task_sitemap/__init__.py
@@ -22,72 +22,75 @@
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-from __future__ import print_function, absolute_import
+from __future__ import print_function, absolute_import, unicode_literals
+import codecs
+import datetime
import os
-import sys
-import tempfile
+try:
+ from urlparse import urljoin
+except ImportError:
+ from urllib.parse import urljoin
from nikola.plugin_categories import LateTask
from nikola.utils import config_changed
-from nikola.plugins.task_sitemap import sitemap_gen
+
+header = """<?xml version="1.0" encoding="UTF-8"?>
+<urlset
+ xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
+ http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
+"""
+
+url_format = """ <url>
+ <loc>{0}</loc>
+ <lastmod>{1}</lastmod>
+ <priority>0.5000</priority>
+ </url>
+"""
class Sitemap(LateTask):
- """Copy theme assets into output."""
+ """Generate google sitemap."""
name = "sitemap"
def gen_tasks(self):
- if sys.version_info[0] == 3:
- print("sitemap generation is not available for python 3")
- yield {
- 'basename': 'sitemap',
- 'name': 'sitemap',
- 'actions': [],
- }
- return
"""Generate Google sitemap."""
kw = {
"base_url": self.site.config["BASE_URL"],
"site_url": self.site.config["SITE_URL"],
"output_folder": self.site.config["OUTPUT_FOLDER"],
+ "mapped_extensions": self.site.config.get('MAPPED_EXTENSIONS', ['.html', '.htm'])
}
- output_path = os.path.abspath(kw['output_folder'])
- sitemap_path = os.path.join(output_path, "sitemap.xml.gz")
+ output_path = kw['output_folder']
+ sitemap_path = os.path.join(output_path, "sitemap.xml")
def sitemap():
- # Generate config
- config_data = """<?xml version="1.0" encoding="UTF-8"?>
- <site
- base_url="{0}"
- store_into="{1}"
- verbose="1" >
- <directory path="{2}" url="{3}" />
- <filter action="drop" type="wildcard" pattern="*~" />
- <filter action="drop" type="regexp" pattern="/\.[^/]*" />
- </site>""".format(kw["site_url"], sitemap_path, output_path,
- kw["base_url"])
- config_file = tempfile.NamedTemporaryFile(delete=False)
- config_file.write(config_data.encode('utf8'))
- config_file.close()
+ with codecs.open(sitemap_path, 'wb+', 'utf8') as outf:
+ output = kw['output_folder']
+ base_url = kw['base_url']
+ mapped_exts = kw['mapped_extensions']
+ outf.write(header)
+ for root, dirs, files in os.walk(output):
+ path = os.path.relpath(root, output)
+ path = path.replace(os.sep, '/')
+ lastmod = datetime.datetime.fromtimestamp(os.stat(root).st_mtime).isoformat()
+ outf.write(url_format.format(urljoin(base_url, path), lastmod))
+ for fname in files:
+ if os.path.splitext(fname)[-1] in mapped_exts:
+ real_path = os.path.join(root, fname)
+ path = os.path.relpath(real_path, output)
+ path = path.replace(os.sep, '/')
+ lastmod = datetime.datetime.fromtimestamp(os.stat(real_path).st_mtime).isoformat()
+ outf.write(url_format.format(urljoin(base_url, path), lastmod))
- # Generate sitemap
- sitemap = sitemap_gen.CreateSitemapFromFile(config_file.name, True)
- if not sitemap:
- sitemap_gen.output.Log('Configuration file errors -- exiting.',
- 0)
- else:
- sitemap.Generate()
- sitemap_gen.output.Log('Number of errors: {0}'.format(
- sitemap_gen.output.num_errors), 1)
- sitemap_gen.output.Log('Number of warnings: {0}'.format(
- sitemap_gen.output.num_warns), 1)
- os.unlink(config_file.name)
+ outf.write("</urlset>")
yield {
"basename": "sitemap",
- "name": os.path.join(kw['output_folder'], "sitemap.xml.gz"),
+ "name": sitemap_path,
"targets": [sitemap_path],
"actions": [(sitemap,)],
"uptodate": [config_changed(kw)],
View
2,129 nikola/plugins/task_sitemap/sitemap_gen.py
0 additions, 2,129 deletions not shown because the diff is too large. Please use a local Git client to view these changes.

0 comments on commit 0751122

Please sign in to comment.
Something went wrong with that request. Please try again.