Skip to content

Commit

Permalink
feat: add goodbits (#187)
Browse files Browse the repository at this point in the history
  • Loading branch information
vasco-santos committed Sep 2, 2022
1 parent 3d0cbe0 commit 9b8bae8
Show file tree
Hide file tree
Showing 13 changed files with 380 additions and 98 deletions.
36 changes: 36 additions & 0 deletions .github/workflows/cron-goodbits-sync.yml
@@ -0,0 +1,36 @@
name: Cron sync goodbits list

on:
schedule:
- cron: '13 0,5,10,15,20 * * *'
workflow_dispatch:
push:
branches:
- main
paths:
- 'packages/edge-gateway'

jobs:
update:
name: Sync goodbits with KV store
runs-on: ubuntu-latest
strategy:
matrix:
env: ['staging', 'production']
timeout-minutes: 5
steps:
- uses: actions/checkout@v2
- uses: pnpm/action-setup@v2.0.1
with:
version: 6.32.x
- uses: actions/setup-node@v2
with:
node-version: 16
cache: 'pnpm'
- run: pnpm install
- name: Run job
env:
ENV: ${{ matrix.env }}
GH_TOKEN: ${{ secrets.GH_TOKEN }}
CF_API_TOKEN: ${{ secrets.CF_GATEWAY_TOKEN }}
run: pnpm --filter cron start:goodbits-sync
2 changes: 1 addition & 1 deletion .github/workflows/cron-metrics.yml
Expand Up @@ -36,4 +36,4 @@ jobs:
STAGING_RO_DATABASE_CONNECTION: ${{ secrets.STAGING_DATABASE_CONNECTION }} # no replica for staging
PROD_DATABASE_CONNECTION: ${{ secrets.PROD_DATABASE_CONNECTION }}
PROD_RO_DATABASE_CONNECTION: ${{ secrets.PROD_RO_DATABASE_CONNECTION }}
run: pnpm --filter cron start
run: pnpm --filter cron start:metrics
8 changes: 6 additions & 2 deletions packages/cron/package.json
Expand Up @@ -7,15 +7,19 @@
"type": "module",
"scripts": {
"start": "run-s start:*",
"start:metrics": "NODE_TLS_REJECT_UNAUTHORIZED=0 node src/bin/metrics.js"
"start:metrics": "NODE_TLS_REJECT_UNAUTHORIZED=0 node src/bin/metrics.js",
"start:goodbits-sync": "node src/bin/goodbits.js"
},
"author": "Vasco Santos",
"license": "(Apache-2.0 OR MIT)",
"dependencies": {
"@web-std/fetch": "4.1.0",
"debug": "^4.3.1",
"dotenv": "^9.0.2",
"iterable-ndjson": "^1.1.0",
"p-settle": "^5.0.0",
"pg": "^8.7.1"
"pg": "^8.7.1",
"toml": "^3.0.0"
},
"devDependencies": {
"npm-run-all": "^4.1.5"
Expand Down
13 changes: 13 additions & 0 deletions packages/cron/src/bin/goodbits.js
@@ -0,0 +1,13 @@
#!/usr/bin/env node

import { sync } from '../jobs/goodbits.js'
import { envConfig } from '../lib/env.js'

async function main() {
await sync({
env: process.env.ENV || 'dev',
})
}

envConfig()
main()
143 changes: 143 additions & 0 deletions packages/cron/src/jobs/goodbits.js
@@ -0,0 +1,143 @@
import fetch, { Headers } from '@web-std/fetch'
import path from 'path'
import toml from 'toml'
// @ts-ignore no types in module
import ndjson from 'iterable-ndjson'

/**
* @typedef {{ id: string, title: string }} Namespace
* @typedef {{ name: string, metadata: any }} Key
* @typedef {{ key: string, value: any, metadata?: any }} BulkWritePair
*/

const GOODBITS_SOURCES = [
'https://raw.githubusercontent.com/nftstorage/goodbits/main/list.ndjson',
]

const rootDir = path.dirname(path.dirname(import.meta.url))
const wranglerConfigPath = path.join(
rootDir,
'../../edge-gateway/wrangler.toml'
)

/**
* @param {{ env: string } } opts
*/
export async function sync({ env }) {
const cfApiToken = mustGetEnv('CF_API_TOKEN')
const ghToken = mustGetEnv('GH_TOKEN')

const wranglerConfig = await getWranglerToml(wranglerConfigPath)
const wranglerEnvConfig = wranglerConfig.env[env]
if (!wranglerEnvConfig) {
throw new Error(`missing wrangler configuration for env: ${env}`)
}
console.log(`🧩 using wrangler config: ${wranglerConfigPath}`)

const cfAccountId = wranglerEnvConfig.account_id
if (!cfAccountId) {
throw new Error(`missing Cloudflare account_id in env: ${env}`)
}
console.log(`🏕 using env: ${env} (${cfAccountId})`)

const kvNamespaces = wranglerEnvConfig.kv_namespaces || []
const goodbitsListKv = kvNamespaces.find(
(kv) => kv.binding === 'GOODBITSLIST'
)
if (!goodbitsListKv) {
throw new Error('missing binding in kv_namespaces: GOODBITSLIST')
}
console.log(`🪢 using KV binding: GOODBITSLIST (${goodbitsListKv.id})`)

for (const url of GOODBITS_SOURCES) {
console.log(`🦴 fetching ${url}`)
const goodbitsList = await getGoodbitsList(url, ghToken)

const kvs = []
for await (const { cid, tags } of goodbitsList) {
kvs.push({
key: cid,
value: { tags },
})
}

console.log(`📝 writing ${kvs.length} entries`)
await writeKVMulti(cfApiToken, cfAccountId, goodbitsListKv.id, kvs)
}

console.log('✅ Done')
}

/**
* @param {string} apiToken Cloudflare API token
* @param {string} accountId Cloudflare account ID
* @param {string} nsId KV namespace ID
* @param {Array<BulkWritePair>} kvs
* @returns {Promise<void>}
*/
async function writeKVMulti(apiToken, accountId, nsId, kvs) {
const url = `https://api.cloudflare.com/client/v4/accounts/${accountId}/storage/kv/namespaces/${nsId}/bulk`
kvs = kvs.map((kv) => ({
...kv,
value: JSON.stringify(kv.value),
}))

const chunkSize = 10000
for (let i = 0; i < kvs.length; i += chunkSize) {
const kvsChunk = kvs.slice(i, i + chunkSize)
const res = await fetch(url, {
method: 'PUT',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(kvsChunk),
})
const { success, errors } = await res.json()
if (!success) {
const error = Array.isArray(errors) && errors[0]
throw new Error(
error ? `${error.code}: ${error.message}` : 'failed to write to KV'
)
}
}
}

/**
* @param {string} url
* @param {string} ghToken
*/
async function getGoodbitsList(url, ghToken) {
const headers = new Headers()
headers.append('authorization', `token ${ghToken}`)
headers.append('cache-control', 'no-cache')
headers.append('pragma', 'no-cache')

const res = await fetch(url, {
headers,
})
if (!res.ok) {
throw new Error(`unexpected status fetching goodbits list: ${res.status}`)
}
return ndjson.parse(await res.text())
}

async function getWranglerToml(url) {
const res = await fetch(url)
if (!res.ok) {
throw new Error(`unexpected status fetching wrangler.toml: ${res.status}`)
}
return toml.parse(await res.text())
}

/**
* @param {string} key
* @returns {string}
*/
function mustGetEnv(key) {
if (process.env[key]) {
throw new Error(`missing environment variable: ${key}`)
}
// @ts-ignore validation of undefined before not accepted by ts compiler
return process.env[key]
}
1 change: 1 addition & 0 deletions packages/edge-gateway/package.json
Expand Up @@ -16,6 +16,7 @@
"dependencies": {
"itty-router": "^2.4.5",
"multiformats": "^9.6.4",
"p-retry": "^5.0.0",
"toucan-js": "^2.6.1"
},
"devDependencies": {
Expand Down
1 change: 1 addition & 0 deletions packages/edge-gateway/src/bindings.d.ts
Expand Up @@ -11,6 +11,7 @@ export interface EnvInput {
LOKI_TOKEN?: string
EDGE_GATEWAY: Fetcher
GATEWAY_HOSTNAME: string
GOODBITSLIST: KVNamespace
}

export interface EnvTransformed {
Expand Down
40 changes: 38 additions & 2 deletions packages/edge-gateway/src/gateway.js
@@ -1,5 +1,9 @@
/* eslint-env serviceworker, browser */

import pRetry from 'p-retry'

const GOODBITS_BYPASS_TAG = 'https://nftstorage.link/tags/bypass-default-csp'

/**
* Handle gateway requests
*
Expand All @@ -24,7 +28,17 @@ export async function gatewayGet(request, env) {
},
})

return getTransformedResponseWithCustomHeaders(response)
// Validation layer - CSP bypass
const resourceCid = decodeURIComponent(response.headers.get('etag') || '')
const goodbitsTags = await getTagsFromGoodbitsList(
env.GOODBITSLIST,
resourceCid
)
if (goodbitsTags.includes(GOODBITS_BYPASS_TAG)) {
return response
}

return getTransformedResponseWithCspHeaders(response)
}

/**
Expand All @@ -33,7 +47,7 @@ export async function gatewayGet(request, env) {
*
* @param {Response} response
*/
function getTransformedResponseWithCustomHeaders(response) {
function getTransformedResponseWithCspHeaders(response) {
const clonedResponse = new Response(response.body, response)

clonedResponse.headers.set(
Expand All @@ -43,3 +57,25 @@ function getTransformedResponseWithCustomHeaders(response) {

return clonedResponse
}

/**
* Get a given entry from the goodbits list if CID exists, and return tags
*
* @param {KVNamespace} datastore
* @param {string} cid
*/
async function getTagsFromGoodbitsList(datastore, cid) {
if (!datastore || !cid) {
return []
}

// TODO: Remove once https://github.com/nftstorage/nftstorage.link/issues/51 is fixed
const goodbitsEntry = await pRetry(() => datastore.get(cid), { retries: 5 })

if (goodbitsEntry) {
const { tags } = JSON.parse(goodbitsEntry)
return Array.isArray(tags) ? tags : []
}

return []
}
18 changes: 18 additions & 0 deletions packages/edge-gateway/test/content_security_policy.spec.js
@@ -0,0 +1,18 @@
import { test, getMiniflare } from './utils/setup.js'

test.beforeEach((t) => {
// Create a new Miniflare environment for each test
t.context = {
mf: getMiniflare(),
}
})

test('Gets content from binding', async (t) => {
const { mf } = t.context

const response = await mf.dispatchFetch(
'https://bafkreidyeivj7adnnac6ljvzj2e3rd5xdw3revw4da7mx2ckrstapoupoq.ipfs.localhost:8787'
)
await response.waitUntil()
t.is(await response.text(), 'Hello nftstorage.link! 😎')
})
50 changes: 50 additions & 0 deletions packages/edge-gateway/test/index.spec.js
Expand Up @@ -15,4 +15,54 @@ test('Gets content from binding', async (t) => {
)
await response.waitUntil()
t.is(await response.text(), 'Hello nftstorage.link! 😎')

// CSP
const csp = response.headers.get('content-security-policy') || ''
t.true(csp.includes("default-src 'self' 'unsafe-inline' 'unsafe-eval'"))
t.true(csp.includes('blob: data'))
t.true(csp.includes("form-action 'self' ; navigate-to 'self';"))
})

test('Gets content with no csp header when goodbits csp bypass tag exists', async (t) => {
const { mf } = t.context
const cid = 'bafkreidyeivj7adnnac6ljvzj2e3rd5xdw3revw4da7mx2ckrstapoupor'

// add the CID to the goodbits list
const goodbitsListKv = await mf.getKVNamespace('GOODBITSLIST')
await goodbitsListKv.put(
cid,
JSON.stringify({
tags: ['https://nftstorage.link/tags/bypass-default-csp'],
})
)

const response = await mf.dispatchFetch(`https://${cid}.ipfs.localhost:8787`)
await response.waitUntil()
t.is(await response.text(), 'Hello nftstorage.link! 😎')

// CSP does not exist
const csp = response.headers.get('content-security-policy')
t.falsy(csp)
})

test('Gets content with csp header when goodbits csp bypass tag does not exist', async (t) => {
const { mf } = t.context
const cid = 'bafkreidyeivj7adnnac6ljvzj2e3rd5xdw3revw4da7mx2ckrstapoupos'

// add the CID to the goodbits list
const goodbitsListKv = await mf.getKVNamespace('GOODBITSLIST')
await goodbitsListKv.put(
cid,
JSON.stringify({
tags: ['foo-bar-tag'],
})
)

const response = await mf.dispatchFetch(`https://${cid}.ipfs.localhost:8787`)
await response.waitUntil()
t.is(await response.text(), 'Hello nftstorage.link! 😎')

// CSP exists
const csp = response.headers.get('content-security-policy')
t.truthy(csp)
})
8 changes: 7 additions & 1 deletion packages/edge-gateway/test/scripts/gateway.js
Expand Up @@ -7,7 +7,13 @@ export default {
// We need to perform requests as path based per localhost subdomain resolution
const subdomainCid = getCidFromSubdomainUrl(reqUrl)
if (subdomainCid) {
return new Response('Hello nftstorage.link! 😎')
const headers = new Headers({
etag: subdomainCid,
})
return new Response('Hello nftstorage.link! 😎', {
headers,
status: 200,
})
}

throw new Error('no cid in request')
Expand Down

0 comments on commit 9b8bae8

Please sign in to comment.