Skip to content

Commit

Permalink
fix(seo): add missing sitemaps (#11234)
Browse files Browse the repository at this point in the history
* chore(build): support non-docs sitemaps

* feat(blog): build sitemap

* feat(build): build sitemap index universally

* feat(curriculum): build sitemap

* feat(spas): build sitemap

* fixup! feat(build): build sitemap index universally

* refactor(build): extract writeSitemap()

* refactor(build): extract buildSitemap()

* fix(cloud-function): set gzip encoding only for existing sitemaps

* enhance(sitemaps): use BASE_URL
  • Loading branch information
caugner authored Jun 5, 2024
1 parent ee3f07f commit 4c78143
Show file tree
Hide file tree
Showing 11 changed files with 153 additions and 49 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/prod-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -274,15 +274,15 @@ jobs:
du -sh client/build
# Generate sitemap index file
yarn build --sitemap-index
# Build the blog
yarn build:blog
# Build the curriculum
yarn build:curriculum
# Generate sitemap index file
yarn build --sitemap-index
# Generate whatsdeployed files.
yarn tool whatsdeployed --output client/build/_whatsdeployed/code.json
yarn tool whatsdeployed $CONTENT_ROOT --output client/build/_whatsdeployed/content.json
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/stage-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -290,15 +290,15 @@ jobs:
du -sh client/build
# Generate sitemap index file
yarn build --sitemap-index
# Build the blog
yarn build:blog
# Build the curriculum
yarn build:curriculum
# Generate sitemap index file
yarn build --sitemap-index
# Generate whatsdeployed files.
yarn tool whatsdeployed --output client/build/_whatsdeployed/code.json
yarn tool whatsdeployed $CONTENT_ROOT --output client/build/_whatsdeployed/content.json
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/xyz-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -181,15 +181,15 @@ jobs:
du -sh client/build
# Generate sitemap index file
yarn build --sitemap-index
# Build the blog
yarn build:blog
# Build the curriculum
yarn build:curriculum
# Generate sitemap index file
yarn build --sitemap-index
# Generate whatsdeployed files.
yarn tool whatsdeployed --output client/build/_whatsdeployed/code.json
yarn tool whatsdeployed $CONTENT_ROOT --output client/build/_whatsdeployed/content.json
Expand Down
27 changes: 27 additions & 0 deletions build/blog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import { extractSections } from "./extract-sections.js";
import { HydrationData } from "../libs/types/hydration.js";
import { DEFAULT_LOCALE } from "../libs/constants/index.js";
import { memoize } from "../content/utils.js";
import { buildSitemap } from "./sitemaps.js";

const READ_TIME_FILTER = /[\w<>.,!?]+/;
const HIDDEN_CODE_BLOCK_MATCH = /```.*hidden[\s\S]*?```/g;
Expand Down Expand Up @@ -495,3 +496,29 @@ export async function buildAuthors(options: { verbose?: boolean }) {
}
}
}

export async function buildBlogSitemap(options: { verbose?: boolean }) {
const posts = await allPostFrontmatter();

const items = posts.map((post) => ({
slug: `${post.slug}/`,
modified: new Date(post.date).toISOString(),
}));

const index = {
slug: "",
modified: items
.map((p) => p.modified)
.sort((a, b) => b.localeCompare(a))
.at(0),
};

const sitemapFilePath = await buildSitemap([index, ...items], {
slugPrefix: `/${DEFAULT_LOCALE}/blog/`,
pathSuffix: [DEFAULT_LOCALE, "blog"],
});

if (options.verbose) {
console.log("Wrote", sitemapFilePath);
}
}
2 changes: 2 additions & 0 deletions build/build-blog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ import {
buildBlogFeed,
buildBlogIndex,
buildBlogPosts,
buildBlogSitemap,
} from "./blog.js";

await buildBlogIndex({ verbose: true });
await buildBlogPosts({ verbose: true });
await buildAuthors({ verbose: true });
await buildBlogFeed({ verbose: true });
await buildBlogSitemap({ verbose: true });
5 changes: 3 additions & 2 deletions build/build-curriculum.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env node
import { buildCurriculum } from "./curriculum.js";
import { buildCurriculum, buildCurriculumSitemap } from "./curriculum.js";

buildCurriculum({ verbose: true });
await buildCurriculum({ verbose: true });
await buildCurriculumSitemap({ verbose: true });
47 changes: 15 additions & 32 deletions build/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import crypto from "node:crypto";
import fs from "node:fs";
import path from "node:path";
import zlib from "node:zlib";

import chalk from "chalk";
import cliProgress from "cli-progress";
Expand All @@ -28,10 +27,11 @@ import {
} from "./index.js";
import { Doc, DocMetadata, Flaws } from "../libs/types/document.js";
import SearchIndex from "./search-index.js";
import { makeSitemapXML, makeSitemapIndexXML } from "./sitemaps.js";
import { makeSitemapIndexXML, buildSitemap } from "./sitemaps.js";
import { humanFileSize } from "./utils.js";
import { initSentry } from "./sentry.js";
import { macroRenderTimes } from "../kumascript/src/render.js";
import { fdir } from "fdir";

const { program } = caporal;
const { prompt } = inquirer;
Expand Down Expand Up @@ -341,17 +341,10 @@ async function buildDocuments(
}

for (const [locale, docs] of Object.entries(docPerLocale)) {
const sitemapDir = path.join(
BUILD_OUT_ROOT,
"sitemaps",
locale.toLowerCase()
);
fs.mkdirSync(sitemapDir, { recursive: true });
const sitemapFilePath = path.join(sitemapDir, "sitemap.xml.gz");
fs.writeFileSync(
sitemapFilePath,
zlib.gzipSync(makeSitemapXML(locale, docs))
);
await buildSitemap(docs, {
slugPrefix: `/${locale}/docs/`,
pathSuffix: [locale],
});
}

searchIndex.sort();
Expand Down Expand Up @@ -514,33 +507,23 @@ program
if (!options.quiet) {
console.log(chalk.yellow("Building sitemap index file..."));
}
const sitemapsBuilt = [];
const locales = [];
for (const locale of VALID_LOCALES.keys()) {
const sitemapFilePath = path.join(
BUILD_OUT_ROOT,
"sitemaps",
locale,
"sitemap.xml.gz"
);
if (fs.existsSync(sitemapFilePath)) {
sitemapsBuilt.push(sitemapFilePath);
locales.push(locale);
}
}

const sitemapsBuilt = new fdir()
.filter((p) => p.endsWith("/sitemap.xml.gz"))
.withFullPaths()
.crawl(path.join(BUILD_OUT_ROOT, "sitemaps"))
.sync()
.sort()
.map((fp) => fp.replace(BUILD_OUT_ROOT, ""));
const sitemapIndexFilePath = path.join(BUILD_OUT_ROOT, "sitemap.xml");
fs.writeFileSync(
sitemapIndexFilePath,
makeSitemapIndexXML(
sitemapsBuilt.map((fp) => fp.replace(BUILD_OUT_ROOT, ""))
)
makeSitemapIndexXML(sitemapsBuilt)
);

if (!options.quiet) {
console.log(
chalk.green(
`Sitemap index file built with locales: ${locales.join(", ")}.`
`Wrote sitemap index referencing ${sitemapsBuilt.length} sitemaps:\n${sitemapsBuilt.map((s) => `- ${s}`).join("\n")}`
)
);
}
Expand Down
23 changes: 23 additions & 0 deletions build/curriculum.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import { memoize, slugToFolder } from "../content/utils.js";
// @ts-ignore
import { renderHTML } from "../ssr/dist/main.js";
import { CheerioAPI } from "cheerio";
import { buildSitemap } from "./sitemaps.js";

export const allFiles = memoize(async () => {
const api = new fdir()
Expand Down Expand Up @@ -428,3 +429,25 @@ function setCurriculumTypes($: CheerioAPI) {
}
});
}

export async function buildCurriculumSitemap(options: { verbose?: boolean }) {
const index = await buildCurriculumIndex();
const items = [];
while (index.length) {
const current = index.shift();
items.push({
slug: current.url,
});
if (current.children) {
index.push(...current.children);
}
}

const sitemapFilePath = await buildSitemap(items, {
pathSuffix: [DEFAULT_LOCALE, "curriculum"],
});

if (options.verbose) {
console.log("Wrote", sitemapFilePath);
}
}
47 changes: 42 additions & 5 deletions build/sitemaps.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,38 @@
export function makeSitemapXML(
locale: string,
docs: { slug: string; modified: string }[]
import { join } from "node:path";
import { mkdir, writeFile } from "node:fs/promises";
import { gzipSync } from "node:zlib";

import { BASE_URL, BUILD_OUT_ROOT } from "../libs/env/index.js";

const SITEMAP_BASE_URL = BASE_URL.replace(/\/$/, "");

interface SitemapEntry {
slug: string;
modified?: string;
}

export async function buildSitemap(
entries: SitemapEntry[],
{
slugPrefix = "",
pathSuffix = [],
}: { slugPrefix?: string; pathSuffix?: string[] }
) {
const xml = makeSitemapXML(slugPrefix, entries);
const path = await writeSitemap(xml, ...pathSuffix);

return path;
}

function makeSitemapXML(prefix: string, docs: SitemapEntry[]) {
const sortedDocs = docs.slice().sort((a, b) => a.slug.localeCompare(b.slug));

// Based on https://support.google.com/webmasters/answer/183668?hl=en
return [
'<?xml version="1.0" encoding="UTF-8"?>',
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
...sortedDocs.map((doc) => {
const loc = `<loc>https://developer.mozilla.org/${locale}/docs/${doc.slug}</loc>`;
const loc = `<loc>${SITEMAP_BASE_URL}${prefix}${doc.slug}</loc>`;
const modified = doc.modified
? `<lastmod>${doc.modified.toString().split("T")[0]}</lastmod>`
: "";
Expand All @@ -30,11 +53,25 @@ export function makeSitemapIndexXML(paths: string[]) {
...sortedPaths.map((path) => {
return (
"<sitemap>" +
`<loc>https://developer.mozilla.org${path}</loc>` +
`<loc>${SITEMAP_BASE_URL}${path}</loc>` +
`<lastmod>${new Date().toISOString().split("T")[0]}</lastmod>` +
"</sitemap>"
);
}),
"</sitemapindex>",
].join("\n");
}

async function writeSitemap(xml: string, ...paths: string[]) {
const dirPath = join(
BUILD_OUT_ROOT,
"sitemaps",
...paths.map((p) => p.toLowerCase())
);
await mkdir(dirPath, { recursive: true });

const filePath = join(dirPath, "sitemap.xml.gz");
await writeFile(filePath, gzipSync(xml));

return filePath;
}
31 changes: 31 additions & 0 deletions build/spas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import { getSlugByBlogPostUrl, splitSections } from "./utils.js";
import { findByURL } from "../content/document.js";
import { buildDocument } from "./index.js";
import { findPostBySlug } from "./blog.js";
import { buildSitemap } from "./sitemaps.js";

const FEATURED_ARTICLES = [
"blog/learn-javascript-console-methods/",
Expand Down Expand Up @@ -113,6 +114,9 @@ export async function buildSPAs(options: {
verbose?: boolean;
}) {
let buildCount = 0;
const sitemap: {
url: string;
}[] = [];

// The URL isn't very important as long as it triggers the right route in the <App/>
const locale = DEFAULT_LOCALE;
Expand Down Expand Up @@ -195,6 +199,12 @@ export async function buildSPAs(options: {
if (options.verbose) {
console.log("Wrote", filePath);
}

if (!noIndexing && !onlyFollow) {
sitemap.push({
url,
});
}
}
}
}
Expand Down Expand Up @@ -260,6 +270,10 @@ export async function buildSPAs(options: {
}
const filePathContext = path.join(outPath, "index.json");
fs.writeFileSync(filePathContext, JSON.stringify(context));

sitemap.push({
url,
});
}
}

Expand Down Expand Up @@ -354,6 +368,10 @@ export async function buildSPAs(options: {
console.log("Wrote", filePath);
}

sitemap.push({
url,
});

// Also, dump the recent pull requests in a file so the data can be gotten
// in client-side rendering.
const filePathContext = path.join(outPath, "index.json");
Expand All @@ -365,6 +383,19 @@ export async function buildSPAs(options: {
}
}

// Sitemap.
const sitemapFilePath = await buildSitemap(
sitemap.map(({ url }) => ({
slug: url,
modified: "",
})),
{ pathSuffix: ["misc"] }
);

if (!options.quiet) {
console.log("Wrote", sitemapFilePath);
}

if (!options.quiet) {
console.log(`Built ${buildCount} SPA related files`);
}
Expand Down
2 changes: 1 addition & 1 deletion cloud-function/src/headers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ export function withContentResponseHeaders(
res.setHeader("X-Robots-Tag", "noindex, nofollow");
}

if (req.url?.endsWith("/sitemap.xml.gz")) {
if (res.statusCode === 200 && req.url?.endsWith("/sitemap.xml.gz")) {
res.setHeader("Content-Type", "application/xml");
res.setHeader("Content-Encoding", "gzip");
}
Expand Down

0 comments on commit 4c78143

Please sign in to comment.