-
-
Notifications
You must be signed in to change notification settings - Fork 433
Export static site #9
Comments
Oh, this would be such an awesome addition. Maybe // scripts/export.js
const exporter = require('sapper/export');
const axios = require('axios');
const exportOpts = {
getPages: async () => {
const { data: posts } = await axios.get('https://jsonplaceholder.typicode.com/posts');
return [
'/',
{
page: '/blog',
getParams: async () => ({
posts: posts.map(post => ({ id: post.id, title: post.title }))
}),
// Array of "child" pages?
children: posts.map(post => ({
page: '/:slug',
getParams: async () => ({
post,
// Used to give a value to :slug
slug: post.id
})
}))
},
{
page: '/repl',
// Optional values for automatic sitemap.xml generation?
index: false,
lastModified: '2018-01-04',
priority: 0.5
}
]
},
dist: 'dist',
// Optional automatic sitemap.xml generation?
sitemap: true
}
exporter.export(exportOpts).then(() => {
console.log('done');
}); |
I had a thought about this — had assumed you would need config for this to work, but what if instead we started at I haven't studied Next and Nuxt's implementation, it's possible they're already doing something similar. |
I've created a hacky but workable implementation of this idea using node-spider. You can see the implementation running on Netlify here. I feel like there should be a clean way to extract all these routes and URLs without doing what I've done (combing through links, src attributes, and manually parsing service-worker.js) -- but it's probably a lot more work to do this nicely in the middle stages of generation. Hopefully this serves as a good starting point. I'll try to clean this up tomorrow and merge into master, but for now I've added an "extract" config in my extract.js: const fs = require('fs');
const app = require('express')();
const compression = require('compression');
const sapper = require('sapper');
const static = require('serve-static');
// New requirements.
const Spider = require('node-spider');
const path = require('path');
const { PORT = 3000 } = process.env;
// TODO(freedmand): un-hardcode
const outputDir = 'dist';
// File manipulation functions:
// TODO(freedmand): use established packages or move into separate file.
/**
* Deletes the specified directory if it exists.
* @param {string} dir The directory to delete.
*/
function rmdir(dir) {
const list = fs.readdirSync(dir);
for(const f of list) {
const filename = path.join(dir, f);
const stat = fs.statSync(filename);
if (filename == '.' || filename == '..') {
continue;
} else if(stat.isDirectory()) {
rmdir(filename);
} else {
fs.unlinkSync(filename);
}
}
fs.rmdirSync(dir);
};
/**
* Creates parent directories recursively for the given file path if they do not
* exist.
* @param {string} filePath The file path to clear the way for.
*/
function makeDirectories(filePath) {
const dirname = path.dirname(filePath);
if (fs.existsSync(dirname)) return true;
makeDirectories(dirname);
fs.mkdirSync(dirname);
}
/**
* Writes the specified contents to the specified file asynchronously.
* @param {string} fn The file name to write to.
* @param {string} contents The contents to write to the file in binary.
*/
function writeFile(fn, contents) {
makeDirectories(fn);
const stream = fs.createWriteStream(fn, {encoding: 'binary'});
stream.once('open', () => {
stream.write(contents);
stream.end();
});
}
// Specialized URL functions.
const prefix = `http://localhost:${PORT}`;
/**
* Returns the full URL of the specified relative path in the server.
* @param {string} url The path for which to get the complete URL.
* @return {string} The full URL.
*/
function getFullUrl(url) {
return `${prefix}${url}`;
}
/**
* Returns the extension on the URL or '' if there is none.
* @param {string} url The URL.
* @return {string} The URL's extension or the empty string if the URL has no
* extension.
*/
function getExtension(url) {
const splits = url.split('.');
let extension = splits[splits.length - 1].trim();
// Assume that an extension cannot be longer than 10 characters.
if (!/^[a-zA-Z0-9]+$/.test(extension) || extension.length > 10) {
// Clear the extension if it is not alphanumeric or is long enough to
// signify it may just be a hash value or something.
extension = '';
}
return extension;
}
/**
* Returns the relative path for the specified URL, adding index.html if the URL
* ends in `/`.
* @param {string} url The URL for which to retrieve the relative path.
* @return {string} A URL that starts with / that is relative to the server
* root. The URL will add index.html if it ends with `/`.
*/
function relativePath(url) {
if (url.startsWith(prefix)) return relativePath(url.substr(prefix.length));
if (url.endsWith('/')) url += 'index.html';
if (getExtension(url) == '') url += '/index.html';
if (url.startsWith('/')) return url;
throw new Error('Bad url');
}
/**
* Returns the Sapper API route for the specified URL path.
* @param {string} url The absolute or relative URL.
* @return {string} The URL with /api/ in front.
*/
function apiPath(url) {
if (url.startsWith(prefix)) {
return `${prefix}/api${url.substr(prefix.length)}`;
}
return `/api${url}`;
}
/**
* Returns whether the specified URL is on the server or an external link.
* @param {string} url The URL.
* @return {boolean} True if the URL is on the server.
*/
function filter(url) {
return url.startsWith('/') || url.startsWith(getFullUrl('/'));
}
// Normal server.js stuff.
// this allows us to do e.g. `fetch('/api/blog')` on the server
const fetch = require('node-fetch');
global.fetch = (url, opts) => {
if (url[0] === '/') url = getFullUrl(url);
return fetch(url, opts);
};
app.use(compression({ threshold: 0 }));
app.use(static('assets'));
app.use(sapper());
// Clean the output directory.
// Remove the output directory.
rmdir(outputDir);
// Create a clean output directory.
if (!fs.existsSync(outputDir)){
fs.mkdirSync(outputDir);
}
// The crux of the scrape, chaining the traditional server call with a web
// scraper. The program automatically exits after all the static pages have been
// scraped from the server that are accessible from the root page (`/`).
let server;
new Promise((resolve) => {
server = app.listen(PORT, () => {
console.log(`listening on port ${PORT}`);
resolve();
});
}).then(() => {
return new Promise((resolve, reject) => {
const spider = new Spider({
// Most of these config options are copied from node-spider's usage
// example.
concurrent: 5,
delay: 0,
logs: process.stderr,
allowDuplicates: false,
catchErrors: true,
addReferrer: false,
xhr: false,
keepAlive: false,
error: (err, url) => {
console.error(`ERROR ${err} at ${url}`);
reject();
},
// Called when there are no more requests
done: () => {
console.log('Done!');
resolve();
},
headers: { 'user-agent': 'node-spider' },
// Use a binary encoding to preserve image files.
encoding: 'binary'
});
// The primary logic to handle a scraped page.
const handleRequest = (doc) => {
// Only deal with the page if it is on the server.
if (!filter(doc.url)) return;
// Grab the page's relative path and write the page contents to a local
//file.
const relPath = relativePath(doc.url);
console.log(`GOT ${relPath}`); // page url
writeFile(path.join(outputDir, relPath), doc.res.body);
/**
* Resolves and checks if a given URL is local; if so, adds it to the
* scraping queue.
* @param {string} url The URL to process.
*/
const process = (url) => {
// Remove trailing hash if relevant.
url = url.split('#')[0];
// Resolve URL relative to server root.
url = doc.resolve(url);
// Crawl more if the URL is on the server.
if (filter(url)) spider.queue(url, handleRequest);
};
const extension = getExtension(doc.url);
if (extension == 'html' || extension == '') {
// Grab src and href attributes from html pages.
doc.$('[src]').each((i, elem) => {
process(doc.$(elem).attr('src'));
});
doc.$('[href]').each((i, elem) => {
process(doc.$(elem).attr('href'));
});
}
// TODO(freedmand): find a cleaner way to do this.
if (doc.url.endsWith('/service-worker.js')) {
// Grab to_cache URLs from the service-worker. This is messy since the
// contents of service-worker.js could be prone to change on
// minification.
const match = /const to_cache = (\[.*?;)/.exec(doc.res.body);
// Use a hacky eval to process the array.
const additionalUrls = eval(match[1]);
for (const additionalUrl of additionalUrls) {
// Queue the resulting URLs.
spider.queue(doc.resolve(additionalUrl), handleRequest);
}
}
if (relPath.endsWith('/index.html') && !relPath.startsWith('/api/')) {
// Attempt to grab the /api/ version of a page that seems to be a basic
// route.
spider.queue(apiPath(doc.url), handleRequest);
}
};
// Start crawling with the document root and the service worker.
spider.queue(getFullUrl('/'), handleRequest);
spider.queue(getFullUrl('/service-worker.js'), handleRequest);
});
}).then(() => {
server.close();
}); |
For reference, here's how I don't really have much context on |
Most tools like this take a config with a "routes" definition array, similar to what @EmilTholin described. I would reckon that Sapper should (begin to) include some CLI tasks baked in, so that devs don't have to redefine the base |
Hi, I'm just learning Svelte/Sapper so so I have practically no experience with it but I've been using static site generators like Jekyll, Hugo, Gatsby, etc. so I can recommend some design decisions which imo should be baked in, if you decide to make it. From user standpoint following is important (imo):
Please let me know if I forgot something 😄 |
✋ Based on @Rich-Harris' and @freedmand's ideas I built an exporter prototype which can be pulled into Check out the README for details on how it works. ☠️ sapper-pirate-export ☠️ |
Is there still room for a "config-like" export within Sapper, or is that best pursued in a separate package? Imagine that I have a WordPress site and I want to get all the pages and posts as data with WordPress REST API, and then bake a lean and mean static Sapper site, and finally host it on Netlify. In that case it would be great with a data driven export config of sorts, so I don't have to keep the file system of pages up to date when I e.g. change the permalink of a WordPress page in the CMS. |
This current export solution is experimental, so I would definitely imagine room for a config-like export. In the first comment of #88, @Rich-Harris alluded to having more options in the CLI when exporting the site, thus necessitating a more detailed CLI help interface. In addition to being able to specify options via the command line, I agree it would be nice to have the possibility for a Sapper export config file down the road. |
For static sites (e.g. svelte.technology), it might be nice to have an equivalent of
next export
. Could be done like so:That would run
server.js
in a separate process and basically justcurl
those URLs intodist
, and copy over theassets
folder and the files generated by the compiler.The text was updated successfully, but these errors were encountered: