Skip to content

Commit

Permalink
feat(autosplit): enable auto-split on generate mode
Browse files Browse the repository at this point in the history
  • Loading branch information
NicoPennec committed Nov 19, 2019
1 parent c841cbe commit 8860eaa
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 22 deletions.
10 changes: 10 additions & 0 deletions README.md
Expand Up @@ -14,6 +14,7 @@

- Module based on the awesome **[sitemap.js](https://github.com/ekalinin/sitemap.js) package** ❤️
- Create **sitemap** or **sitemap index**
- Auto-split over 500.000 URLs
- Automatically add the static routes to each sitemap
- Works with **all modes** (universal, spa, generate)
- For **Nuxt 2.x** and higher
Expand Down Expand Up @@ -271,6 +272,15 @@ Add a trailing slash to each route URL (eg. `/page/1` => `/page/1/`)

> **notice:** To avoid [duplicate content](https://support.google.com/webmasters/answer/66359) detection from crawlers, you have to configure an HTTP 301 redirect between the 2 URLs (see [redirect-module](https://github.com/nuxt-community/redirect-module) or [nuxt-trailingslash-module](https://github.com/WilliamDASILVA/nuxt-trailingslash-module)).
### `size` (optional) - number

- Default: 50,000

Set a number of maximum URLs contained for each sitemap.
If your site contains more than the maximum number of URLs, the module will create multiple sitemaps and create a sitemap index to index them.

For more information about the default limitation of 50,000 URLS, please read the official [Sitemap.org - FAQ](https://www.sitemaps.org/faq.html)

### `defaults` (optional) - object

- Default: `{}`
Expand Down
9 changes: 6 additions & 3 deletions lib/builder.js
Expand Up @@ -3,6 +3,7 @@ const { join } = require('path')
const { URL } = require('url')

const isHTTPS = require('is-https')
const chunk = require('lodash.chunk')
const sm = require('sitemap')

const logger = require('./logger')
Expand All @@ -14,7 +15,7 @@ const logger = require('./logger')
* @param {Array} routes
* @param {String} base
* @param {Request} req
* @returns {Sitemap} sitemap instance
* @returns {Sitemap[]} List of sitemap instance
*/
function createSitemap(options, routes, base = null, req = null) {
const sitemapConfig = {}
Expand Down Expand Up @@ -64,8 +65,10 @@ function createSitemap(options, routes, base = null, req = null) {
// Set urls and ensure they are unique
sitemapConfig.urls = [...new Set(routes)]

// Create sitemap instance
return sm.createSitemap(sitemapConfig)
// Create list of sitemap instance splitted by the limit number of URLs per sitemap
const chunks = options.size > 0 ? chunk(sitemapConfig.urls, options.size) : [sitemapConfig.urls]
const sitemaps = chunks.map(urls => sm.createSitemap({ ...sitemapConfig, urls }))
return sitemaps
}

/**
Expand Down
69 changes: 54 additions & 15 deletions lib/generator.js
Expand Up @@ -45,27 +45,62 @@ async function generateSitemap(options, globalCache, nuxtInstance) {
// Generate sitemap.xml
const routes = await cache.routes.get('routes')
const base = nuxtInstance.options.router.base
const sitemap = await createSitemap(options, routes, base)
const xmlFilePath = path.join(nuxtInstance.options.generate.dir, options.path)
fs.outputFileSync(xmlFilePath, sitemap.toXML())
logger.success('Generated', getPathname(nuxtInstance.options.generate.dir, xmlFilePath))
const sitemaps = await createSitemap(options, routes, base)
const isAutoSplit = sitemaps.length > 1
const splitSitemaps = []

// Generate sitemap.xml.gz
if (options.gzip) {
const gzipFilePath = path.join(nuxtInstance.options.generate.dir, options.pathGzip)
fs.outputFileSync(gzipFilePath, sitemap.toGzip())
logger.success('Generated', getPathname(nuxtInstance.options.generate.dir, gzipFilePath))
sitemaps.forEach((sitemap, index) => {
let pathIndexed
let pathGzipIndexed

if (isAutoSplit) {
// Configure a split sitemap with an incrementally indexed filename
pathIndexed = options.path.replace('.xml', `-${index + 1}.xml`)
pathGzipIndexed = options.pathGzip.replace('.xml.gz', `-${index + 1}.xml.gz`)
splitSitemaps.push({
path: pathIndexed,
gzip: options.gzip
})
}

// Generate sitemap.xml
const xmlFilePath = path.join(nuxtInstance.options.generate.dir, pathIndexed || options.path)
fs.outputFileSync(xmlFilePath, sitemap.toXML())
logger.success('Generated', getPathname(nuxtInstance.options.generate.dir, xmlFilePath))

// Generate sitemap.xml.gz
if (options.gzip) {
const gzipFilePath = path.join(nuxtInstance.options.generate.dir, pathGzipIndexed || options.pathGzip)
fs.outputFileSync(gzipFilePath, sitemap.toGzip())
logger.success('Generated', getPathname(nuxtInstance.options.generate.dir, gzipFilePath))
}
})

if (isAutoSplit) {
// Create sitemap index only if auto-split is detected
generateSitemapIndex(
{
path: options.path,
hostname: options.hostname,
gzip: options.gzip,
sitemaps: splitSitemaps
},
globalCache,
nuxtInstance,
false
)
}
}

/**
* Generate a sitemapindex file
*
* @param {Object} options
* @param {Object} globalCache
* @param {Nuxt} nuxtInstance
* @param {Object} options
* @param {Object} globalCache
* @param {Nuxt} nuxtInstance
* @param {boolean} cascading
*/
async function generateSitemapIndex(options, globalCache, nuxtInstance) {
async function generateSitemapIndex(options, globalCache, nuxtInstance, cascading = true) {
// Init options
options = setDefaultSitemapIndexOptions(options)

Expand All @@ -84,8 +119,12 @@ async function generateSitemapIndex(options, globalCache, nuxtInstance) {
logger.success('Generated', getPathname(nuxtInstance.options.generate.dir, gzipFilePath))
}

// Generate linked sitemaps
await Promise.all(options.sitemaps.map(sitemapOptions => generateSitemaps(sitemapOptions, globalCache, nuxtInstance)))
if (cascading) {
// Generate linked sitemaps
await Promise.all(
options.sitemaps.map(sitemapOptions => generateSitemaps(sitemapOptions, globalCache, nuxtInstance))
)
}
}

/**
Expand Down
6 changes: 4 additions & 2 deletions lib/middleware.js
Expand Up @@ -65,7 +65,8 @@ function registerSitemap(options, globalCache, nuxtInstance) {
try {
// Init sitemap
const routes = await cache.routes.get('routes')
const gzip = await createSitemap(options, routes, base, req).toGzip()
// TODO: support auto-split feature
const gzip = await createSitemap(options, routes, base, req)[0].toGzip()
// Send http response
res.setHeader('Content-Type', 'application/x-gzip')
res.setHeader('Content-Encoding', 'gzip')
Expand All @@ -90,7 +91,8 @@ function registerSitemap(options, globalCache, nuxtInstance) {
try {
// Init sitemap
const routes = await cache.routes.get('routes')
const xml = await createSitemap(options, routes, base, req).toXML()
// TODO: support auto-split feature
const xml = await createSitemap(options, routes, base, req)[0].toXML()
// Send http response
res.setHeader('Content-Type', 'application/xml')
res.end(xml)
Expand Down
1 change: 1 addition & 0 deletions lib/options.js
Expand Up @@ -24,6 +24,7 @@ function setDefaultSitemapOptions(options, nuxtInstance) {
xmlNs: undefined,
xslUrl: undefined,
trailingSlash: false,
size: 50000,
defaults: {}
}

Expand Down
1 change: 1 addition & 0 deletions package.json
Expand Up @@ -53,6 +53,7 @@
"consola": "^2.11.0",
"fs-extra": "^8.1.0",
"is-https": "^1.0.0",
"lodash.chunk": "^4.2.0",
"lodash.unionby": "^4.8.0",
"minimatch": "^3.0.4",
"sitemap": "^4.1.1"
Expand Down
1 change: 1 addition & 0 deletions test/fixture/nuxt.config.js
Expand Up @@ -15,6 +15,7 @@ module.exports = {
xmlNs: 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"',
// xslUrl: 'sitemap.xsl',
trailingSlash: true,
size: 5,
defaults: {
changefreq: 'daily',
priority: 1
Expand Down
2 changes: 2 additions & 0 deletions test/module.test.js
Expand Up @@ -517,3 +517,5 @@ describe('sitemapindex - generate mode', () => {
expect(xml).toMatchSnapshot()
})
})

// TODO: describe('sitemap - generate mode with auto-split', () => { ... }
9 changes: 7 additions & 2 deletions yarn.lock
Expand Up @@ -4029,7 +4029,7 @@ eslint-ast-utils@^1.0.0:
lodash.get "^4.4.2"
lodash.zip "^4.2.0"

eslint-config-prettier@^6.6.0:
eslint-config-prettier@latest:
version "6.6.0"
resolved "https://registry.yarnpkg.com/eslint-config-prettier/-/eslint-config-prettier-6.6.0.tgz#4e039f65af8245e32d8fba4a2f5b83ed7186852e"
integrity sha512-6RGaj7jD+HeuSVHoIT6A0WkBhVEk0ULg74kp2FAWIwkYrOERae0TjIO09Cw33oN//gJWmt7aFhVJErEVta7uvA==
Expand Down Expand Up @@ -6429,7 +6429,7 @@ lines-and-columns@^1.1.6:
resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.1.6.tgz#1c00c743b433cd0a4e80758f7b64a57440d9ff00"
integrity sha1-HADHQ7QzzQpOgHWPe2SldEDZ/wA=

lint-staged@^9.4.3:
lint-staged@latest:
version "9.4.3"
resolved "https://registry.yarnpkg.com/lint-staged/-/lint-staged-9.4.3.tgz#f55ad5f94f6e105294bfd6499b23142961f7b982"
integrity sha512-PejnI+rwOAmKAIO+5UuAZU9gxdej/ovSEOAY34yMfC3OS4Ac82vCBPzAWLReR9zCPOMqeVwQRaZ3bUBpAsaL2Q==
Expand Down Expand Up @@ -6586,6 +6586,11 @@ lodash.camelcase@^4.1.1:
resolved "https://registry.yarnpkg.com/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz#b28aa6288a2b9fc651035c7711f65ab6190331a6"
integrity sha1-soqmKIorn8ZRA1x3EfZathkDMaY=

lodash.chunk@^4.2.0:
version "4.2.0"
resolved "https://registry.yarnpkg.com/lodash.chunk/-/lodash.chunk-4.2.0.tgz#66e5ce1f76ed27b4303d8c6512e8d1216e8106bc"
integrity sha1-ZuXOH3btJ7QwPYxlEujRIW6BBrw=

lodash.defaultsdeep@^4.6.0:
version "4.6.1"
resolved "https://registry.yarnpkg.com/lodash.defaultsdeep/-/lodash.defaultsdeep-4.6.1.tgz#512e9bd721d272d94e3d3a63653fa17516741ca6"
Expand Down

0 comments on commit 8860eaa

Please sign in to comment.