Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fixed #7793 -- Handle sitemaps with more than 50,000 URLs in them (by…
… using

pagination). Patch from Julian Bez.

The docs patch here could probably do with some rewording.


git-svn-id: http://code.djangoproject.com/svn/django/trunk@8088 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information
mtredinnick committed Jul 26, 2008
1 parent 12ec9e1 commit 516aa7b
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 9 deletions.
3 changes: 2 additions & 1 deletion AUTHORS
Expand Up @@ -71,7 +71,7 @@ answer newbie questions, and generally made Django that much better:
Esdras Beleza <linux@esdrasbeleza.com>
Chris Bennett <chrisrbennett@yahoo.com>
James Bennett
Ben Godfrey <http://aftnn.org>
Julian Bez
Arvis Bickovskis <viestards.lists@gmail.com>
Paul Bissex <http://e-scribe.com/>
Simon Blanchard
Expand Down Expand Up @@ -166,6 +166,7 @@ answer newbie questions, and generally made Django that much better:
glin@seznam.cz
martin.glueck@gmail.com
Artyom Gnilov <boobsd@gmail.com>
Ben Godfrey <http://aftnn.org>
GomoX <gomo@datafull.com>
Guilherme Mesquita Gondim <semente@taurinus.org>
Mario Gonzalez <gonzalemario@gmail.com>
Expand Down
16 changes: 13 additions & 3 deletions django/contrib/sitemaps/__init__.py
@@ -1,4 +1,4 @@
from django.core import urlresolvers
from django.core import urlresolvers, paginator
import urllib

PING_URL = "http://www.google.com/webmasters/tools/ping"
Expand Down Expand Up @@ -34,6 +34,10 @@ def ping_google(sitemap_url=None, ping_url=PING_URL):
urllib.urlopen("%s?%s" % (ping_url, params))

class Sitemap:
# This limit is defined by Google. See the index documentation at
# http://sitemaps.org/protocol.php#index.
limit = 50000

def __get(self, name, obj, default=None):
try:
attr = getattr(self, name)
Expand All @@ -49,11 +53,17 @@ def items(self):
def location(self, obj):
return obj.get_absolute_url()

def get_urls(self):
def _get_paginator(self):
if not hasattr(self, "paginator"):
self.paginator = paginator.Paginator(self.items(), self.limit)
return self.paginator
paginator = property(_get_paginator)

def get_urls(self, page=1):
from django.contrib.sites.models import Site
current_site = Site.objects.get_current()
urls = []
for item in self.items():
for item in self.paginator.page(page).object_list:
loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
url_info = {
'location': loc,
Expand Down
24 changes: 19 additions & 5 deletions django/contrib/sitemaps/views.py
Expand Up @@ -3,14 +3,22 @@
from django.contrib.sites.models import Site
from django.core import urlresolvers
from django.utils.encoding import smart_str
from django.core.paginator import EmptyPage, PageNotAnInteger

def index(request, sitemaps):
current_site = Site.objects.get_current()
sites = []
protocol = request.is_secure() and 'https' or 'http'
for section in sitemaps.keys():
for section, site in sitemaps.items():
if callable(site):
pages = site().paginator.num_pages
else:
pages = site.paginator.num_pages
sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap', kwargs={'section': section})
sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
if pages > 1:
for page in range(2, pages+1):
sites.append('%s://%s%s?p=%s' % (protocol, current_site.domain, sitemap_url, page))
xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})
return HttpResponse(xml, mimetype='application/xml')

Expand All @@ -22,10 +30,16 @@ def sitemap(request, sitemaps, section=None):
maps.append(sitemaps[section])
else:
maps = sitemaps.values()
page = request.GET.get("p", 1)
for site in maps:
if callable(site):
urls.extend(site().get_urls())
else:
urls.extend(site.get_urls())
try:
if callable(site):
urls.extend(site().get_urls(page))
else:
urls.extend(site.get_urls(page))
except EmptyPage:
raise Http404("Page %s empty" % page)
except PageNotAnInteger:
raise Http404("No page '%s'" % page)
xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
return HttpResponse(xml, mimetype='application/xml')
4 changes: 4 additions & 0 deletions docs/sitemaps.txt
Expand Up @@ -282,6 +282,10 @@ This will automatically generate a ``sitemap.xml`` file that references
both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap``
classes and the ``sitemaps`` dict don't change at all.

If one of your sitemaps is going to have more than 50,000 URLs you should
create an index file. Your sitemap will be paginated and the index will
reflect that.

Pinging Google
==============

Expand Down

0 comments on commit 516aa7b

Please sign in to comment.