Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ shlex = "1"
tempfile = "3.0"
toml = "0.5.1"
topological-sort = "0.1.0"
url = "2.2.2"

# Watch feature
notify = { version = "4.0", optional = true }
Expand Down
3 changes: 3 additions & 0 deletions guide/src/format/configuration/renderers.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ edit-url-template = "https://github.com/rust-lang/mdBook/edit/master/guide/{path
site-url = "/example-book/"
cname = "myproject.rs"
input-404 = "not-found.md"
sitemap = true
```

The following configuration options are available:
Expand Down Expand Up @@ -162,6 +163,8 @@ The following configuration options are available:
This string will be written to a file named CNAME in the root of your site, as
required by GitHub Pages (see [*Managing a custom domain for your GitHub Pages
site*][custom domain]).
- **sitemap:** True to enable sitemap generation. In this case, `site-url` must be set,
and be an absolute URL.

[custom domain]: https://docs.github.com/en/github/working-with-github-pages/managing-a-custom-domain-for-your-github-pages-site

Expand Down
4 changes: 4 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,9 @@ pub struct HtmlConfig {
/// The mapping from old pages to new pages/URLs to use when generating
/// redirects.
pub redirect: HashMap<String, String>,
/// Whether to enable sitemap generation. If this is set, `site_url` must be an
/// absolute URL.
pub sitemap: bool,
}

impl Default for HtmlConfig {
Expand Down Expand Up @@ -571,6 +574,7 @@ impl Default for HtmlConfig {
cname: None,
livereload_url: None,
redirect: HashMap::new(),
sitemap: false,
}
}
}
Expand Down
123 changes: 123 additions & 0 deletions src/renderer/html_handlebars/hbs_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ use crate::renderer::html_handlebars::helpers;
use crate::renderer::{RenderContext, Renderer};
use crate::theme::{self, playground_editor, Theme};
use crate::utils;
use url::Url;

use std::borrow::Cow;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::fs::{self, File};
use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf};

use crate::utils::fs::get_404_output_file;
Expand Down Expand Up @@ -431,6 +433,119 @@ impl HtmlHandlebars {

Ok(())
}

fn generate_sitemap<'a>(
&self,
destination: &Path,
site_url: &str,
items: impl Iterator<Item = &'a BookItem>,
) -> Result<()> {
if destination.exists() {
// sanity check to avoid accidentally overwriting a real file.
let msg = format!(
"Not generating \"{}\" because it already exists. Are you sure you want to generate a sitemap?",
destination.display(),
);
return Err(Error::msg(msg));
}

let mut site_url = Url::parse(site_url).with_context(|| {
format!(
"output.html.site-url (\"{}\") is not a valid absolute URL",
site_url
)
})?;
// The URL must end with a slash if it doesn't already, otherwise it isn't considered a
// directory for the purpose of joining!
if !site_url.path().ends_with('/') {
site_url.set_path(&format!("{}/", site_url.path()));
}

let sitemap = BufWriter::new(
File::create(&destination).with_context(|| "Failed to create sitemap file")?,
);
self.write_sitemap(sitemap, &site_url, items)
.with_context(|| "Error writing to sitemap file")
}

fn write_sitemap<'a>(
&self,
mut sitemap: impl Write,
site_url: &Url,
items: impl Iterator<Item = &'a BookItem>,
) -> Result<()> {
writeln!(sitemap, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
writeln!(
sitemap,
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">",
)?;

for (i, path) in items
.filter_map(|item| match item {
BookItem::Chapter(ch) if !ch.is_draft_chapter() => Some(ch.path.as_ref().unwrap()),
_ => None,
})
.enumerate()
{
// No joke, this is in the spec
if i == 50_000 {
warn!("Sitemaps must not provide more than 50,000 URLs; consider using an sitemap index instead");
}

let path = path.with_extension("html");
let path = path
.to_str()
.expect("Path should be valid UTF-8 from prior processing");
let url = site_url
.join(path)
.with_context(|| format!("Failed to join {} with site_url", path))?;

writeln!(sitemap, "\t<url>")?;
writeln!(sitemap, "\t\t<loc>{}</loc>", xml_escapes(&url.to_string()))?;
// TODO: lastmod from src file modification time?
writeln!(sitemap, "\t</url>")?;
}

writeln!(sitemap, "</urlset>")?;

// TODO: check that the fils isn't moer than 50 MiB (that's what the spec says)

sitemap.flush()?;
Ok(())
}
}

fn xml_escapes(s: &str) -> Cow<'_, str> {
let mut to_escape = s
.match_indices(|ref c| ['&', '\'', '"', '>', '<'].contains(c))
.peekable();
if to_escape.peek().is_none() {
return Cow::Borrowed(s);
}

// This is under-shooting it, but better than a wild guess
let mut escaped = String::with_capacity(s.len());
let mut n = 0;
for (i, c) in to_escape {
// Push everything before this match...
escaped.push_str(&s[n..i]);
// ...and start next "as-is push" from the character after this one.
// (`+ 1` OK because they are all ASCII chars)
n = i + 1;

escaped.push_str(match c {
"&" => "&amp;",
"'" => "&apos;",
"\"" => "&quot;",
">" => "&gt;",
"<" => "&lt;",
_ => unreachable!(),
});
}
// Push the rest, too
escaped.push_str(&s[n..]);

Cow::Owned(escaped)
}

// TODO(mattico): Remove some time after the 0.1.8 release
Expand Down Expand Up @@ -571,6 +686,14 @@ impl Renderer for HtmlHandlebars {
self.emit_redirects(&ctx.destination, &handlebars, &html_config.redirect)
.context("Unable to emit redirects")?;

if html_config.sitemap {
let site_url = html_config
.site_url
.as_ref()
.ok_or_else(|| Error::msg("site-url must be set to generate a tilemap"))?;
self.generate_sitemap(&destination.join("sitemap.xml"), site_url, book.iter())?;
}

// Copy all remaining files, avoid a recursive copy from/to the book build dir
utils::fs::copy_files_except_ext(&src_dir, destination, true, Some(&build_dir), &["md"])?;

Expand Down