Skip to content

Commit

Permalink
scraping and static site [WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
yurique committed Sep 17, 2020
1 parent 50883f2 commit d3e7f25
Show file tree
Hide file tree
Showing 18 changed files with 1,167 additions and 490 deletions.
2 changes: 2 additions & 0 deletions package.json
Expand Up @@ -16,6 +16,7 @@
"express-prometheus-middleware": "^0.9.6",
"greenlock": "^4.0.4",
"greenlock-express": "^4.0.3",
"html-minifier": "^4.0.0",
"lodash": "^4.17.20",
"mime-types": "^2.1.27",
"pretty-bytes": "^5.4.1",
Expand All @@ -32,6 +33,7 @@
"@types/etag": "^1.8.0",
"@types/express": "^4.17.8",
"@types/express-http-proxy": "^1.6.1",
"@types/html-minifier": "^4.0.0",
"@types/lodash": "^4.14.161",
"@types/mime-types": "^2.1.0",
"@types/node": "^14.6.4",
Expand Down
44 changes: 35 additions & 9 deletions src/app.ts
@@ -1,10 +1,10 @@
import express from 'express'
import express, { Request, Response } from 'express'
import errorHandler from 'errorhandler'
import path from 'path'
import proxy from 'express-http-proxy'
import promMid from 'express-prometheus-middleware'
import { yellow, red, cyan } from 'chalk'
import config, { Route } from './config'
import config, { envConfig, Route } from './config'
import cache from './cache'
import preRender from './pre-render'
import { proxyRoute } from './routes/proxy-route'
Expand All @@ -20,6 +20,10 @@ const app = express()
app.enable('etag')
app.use(promMid())
app.use(errorHandler())
app.use((req, res, next) => {
console.log(`[app] request: ${req.method} ${req.url}`)
next()
})

const shortRouteDescription = (route: Route): string => {
switch (route.type) {
Expand Down Expand Up @@ -73,11 +77,6 @@ app.post('/__ssr/admin/clear-cache', async (req, res) => {
if (!config.preRender) {
return res.status(200).send(`Cache cleared. Pre-rendering has not been run: pre-render is not configured.`)
}
if (config.preRenderPaths.length === 0) {
return res
.status(200)
.send(`Cache cleared. Pre-rendering has not been run: pre-render path are not configured.`)
}
await preRender()
return res.status(200).send(`Cache cleared. Pre-rendering has been run.`)
}
Expand Down Expand Up @@ -133,9 +132,36 @@ config.routes.forEach((route) => {
if (req.header('User-Agent') === config.userAgent) {
req.url = req.originalUrl
if (config.log.selfRequests) {
console.log(`[app] self request: ${req.originalUrl}, proxying -> ${route.target}${req.url}`)
console.log(`[app] self request: ${cyan(req.originalUrl)} proxying -> ${route.target}${req.url}`)
}
return proxy(route.target)(req, res, next)
return proxy(route.target, {
userResDecorator: (proxyRes: Response, proxyResData: any, userReq: Request, userRes: Response) => {
if (proxyRes.statusCode >= 301 && proxyRes.statusCode <= 303) {
if (config.log.selfRequests) {
console.log(
`[app] self request: ${cyan(req.originalUrl)} proxy redirect: ${proxyRes.statusCode} ${
(proxyRes as any).headers.location
}`
)
}
userRes.status(proxyRes.statusCode)
const locationUrl = new URL((proxyRes as any).headers.location)
locationUrl.hostname = envConfig.hostname
locationUrl.protocol = 'http'
locationUrl.port = String(config.httpPort)
userRes.setHeader('location', locationUrl.toString())
return ''
}
if (!(proxyRes.statusCode >= 200 && proxyRes.statusCode < 300)) {
if (config.log.selfRequests) {
console.log(
`[app] self request: ${cyan(req.originalUrl)} proxy non-200 response: ${proxyRes.statusCode}`
)
}
}
return proxyResData
},
})(req, res, next)
}

return pageRoute(route, req, res)
Expand Down
51 changes: 49 additions & 2 deletions src/cache.ts
@@ -1,10 +1,12 @@
import etag from 'etag'
import { Request } from 'puppeteer-core'
import config, { runtimeConfig } from './config'

export interface CacheEntry {
content: Buffer
etag: string
status: number
location?: URL
}

export class Cache {
Expand Down Expand Up @@ -33,26 +35,48 @@ export class Cache {
return undefined
}

set(url: string, content: string, status: number): CacheEntry {
set(url: string, content: string, status: number, location?: URL): CacheEntry {
const entry = {
content: Buffer.from(content),
etag: etag(content),
status,
location,
}
if (this.enabled || runtimeConfig.cacheEverything) {
this.cache.set(url, entry)
if (config.log.cache) {
console.log(
`[cache] cached page ${url} status: ${entry.status}${entry.location ? ` location: ${entry.location}` : ''}`
)
}
} else {
console.log(
`[cache] NOT caching page ${url} status: ${entry.status}${entry.location ? ` location: ${entry.location}` : ''}`
)
}
return entry
}

setAsset(url: string, buffer: Buffer, status: number): CacheEntry {
setAsset(url: string, buffer: Buffer, status: number, location?: URL): CacheEntry {
const entry = {
content: buffer,
etag: etag(buffer),
status,
location,
}
if (this.enabled || runtimeConfig.cacheEverything) {
this.assetsCache.set(url, entry)
if (config.log.cache) {
console.log(
`[cache] cached asset ${url} status: ${entry.status}${entry.location ? ` location: ${entry.location}` : ''}`
)
}
} else {
console.log(
`[cache] NOT caching asset ${url} status: ${entry.status}${
entry.location ? ` location: ${entry.location}` : ''
}`
)
}
return entry
}
Expand All @@ -66,4 +90,27 @@ export class Cache {

const cache: Cache = new Cache(config.enableCache)

export const cachePageRenderResult = ({
url,
html,
status,
redirects,
}: {
url: string
html: string
status: number
location?: string
redirects?: Request[]
}): CacheEntry => {
let endUrl = url
if (redirects?.length > 0) {
redirects.forEach((r) => {
console.log(`[render-page] ${r.url()} -> ${r.frame().url()}`)
cache.set(r.url(), '', 302, new URL(r.frame().url()))
endUrl = r.frame().url()
})
}
return cache.set(endUrl, html, status)
}

export default cache

0 comments on commit d3e7f25

Please sign in to comment.