Skip to content

Commit

Permalink
mirror: embed GraphQL schema into SQL
Browse files Browse the repository at this point in the history
Summary:
This commit augments the `Mirror` constructor to turn the provided
GraphQL schema into a SQL schema, with which it initializes the backing
database. The schema is roughly as originally described in #622, with
some changes (primarily: we omit `WITHOUT ROWID`; we add indexes; we
store `total_count` on connections; and we use milliseconds instead of
seconds for epoch time).

Test Plan:
Unit tests included, with full coverage; run `yarn unit`.

wchargin-branch: mirror-sql-schema
  • Loading branch information
wchargin committed Sep 18, 2018
1 parent fa81c4e commit 3c5daf8
Show file tree
Hide file tree
Showing 2 changed files with 245 additions and 20 deletions.
235 changes: 215 additions & 20 deletions src/graphql/mirror.js
Expand Up @@ -3,6 +3,7 @@
import type Database from "better-sqlite3";
import stringify from "json-stable-stringify";

import dedent from "../util/dedent";
import * as Schema from "./schema";

/**
Expand Down Expand Up @@ -35,6 +36,39 @@ export class Mirror {
this._initialize();
}

/**
* Embed the GraphQL schema into the database, initializing it for use
* as a mirror.
*
* We store the data for an object across three tables, depending on
* the field type:
*
* - Connections are stored in the `connections `table.
* - Node references are stored in the `links` table.
* - Primitive data is stored in a type-specific data table.
*
* We refer to node and primitive data together as "own data", because
* this is the data that can be queried uniformly for all elements of
* a type; querying connection data, in contrast, requires the
* object-specific end cursor.
*
* The aforementioned tables are each keyed by object ID. An object's
* metadata appears in the `objects` table, including the object's
* typename and the last update for the object's own-data. Each
* connection has its own last update value, because connections can
* be updated independently of each other and of own-data.
*
* Note that any object in the database should have entries in the
* `connections` and `links` table even if the node has never been
* updated.
*
* This method should only be invoked once, at construction time.
*
* If the database has already been initialized with the same schema
* and version, no action is taken and no error is thrown. If the
* database has been initialized with a different schema or version,
* the database is left unchanged, and an error is thrown.
*/
_initialize() {
// The following version number must be updated if there is any
// change to the way in which a GraphQL schema is mapped to a SQL
Expand All @@ -43,26 +77,170 @@ export class Mirror {
// it requires bumping the version, bump it: requiring some extra
// one-time cache resets is okay; doing the wrong thing is not.
const blob = stringify({version: "MIRROR_v1", schema: this._schema});
// We store the metadata in a singleton table `meta`, whose unique row
// has primary key `0`. Only the first ever insert will succeed; we
// are locked into the first schema.
this._db
.prepare(
"CREATE TABLE IF NOT EXISTS meta\n" +
"(zero INTEGER PRIMARY KEY, schema TEXT NOT NULL)"
)
.run();
this._db
.prepare("INSERT OR IGNORE INTO meta (zero, schema) VALUES (0, ?)")
.run(blob);
const result = this._db
.prepare("SELECT COUNT(1) AS n FROM meta WHERE schema = ?")
.get(blob);
if (result.n !== 1) {
throw new Error(
"Database already populated with incompatible schema or version"
);
}
const db = this._db;
_inTransaction(db, () => {
// We store the metadata in a singleton table `meta`, whose unique row
// has primary key `0`. Only the first ever insert will succeed; we
// are locked into the first schema.
db.prepare(
dedent`\
CREATE TABLE IF NOT EXISTS meta (
zero INTEGER PRIMARY KEY,
schema TEXT NOT NULL
)
`
).run();

const existingBlob: string | void = db
.prepare("SELECT schema FROM meta")
.pluck()
.get();
if (existingBlob === blob) {
// Already set up; nothing to do.
return;
} else if (existingBlob !== undefined) {
throw new Error(
"Database already populated with incompatible schema or version"
);
}
db.prepare("INSERT INTO meta (zero, schema) VALUES (0, ?)").run(blob);

// First, create those tables that are independent of the schema.
const structuralTables = [
// Time is stored in milliseconds since 1970-01-01T00:00Z, with
// ECMAScript semantics (leap seconds ignored, exactly 86.4M ms
// per day, etc.).
//
// We use milliseconds rather than seconds because (a) this
// simplifies JavaScript interop to a simple `+new Date()` and
// `new Date(value)`, and (b) this avoids a lurking Year 2038
// problem by surfacing >32-bit values immediately. (We have
// over 200,000 years before the number of milliseconds since
// epoch is more than `Number.MAX_SAFE_INTEGER`.)
dedent`\
CREATE TABLE updates (
rowid INTEGER PRIMARY KEY,
time_epoch_millis INTEGER NOT NULL
)
`,
dedent`\
CREATE TABLE objects (
id TEXT NOT NULL PRIMARY KEY,
typename TEXT NOT NULL,
last_update INTEGER,
FOREIGN KEY(last_update) REFERENCES updates(rowid)
)
`,
dedent`\
CREATE TABLE links (
rowid INTEGER PRIMARY KEY,
parent_id TEXT NOT NULL,
fieldname TEXT NOT NULL,
child_id TEXT,
UNIQUE(parent_id, fieldname),
FOREIGN KEY(parent_id) REFERENCES objects(id),
FOREIGN KEY(child_id) REFERENCES objects(id)
)
`,
dedent`\
CREATE UNIQUE INDEX idx_links__parent_id__fieldname
ON links (parent_id, fieldname)
`,
dedent`\
CREATE TABLE connections (
rowid INTEGER PRIMARY KEY,
object_id TEXT NOT NULL,
fieldname TEXT NOT NULL,
last_update INTEGER,
-- Each of the below fields must be NULL if the connection
-- has never been updated.
total_count INTEGER,
has_next_page BOOLEAN,
-- The end cursor may be NULL if no items are in the connection;
-- this is a consequence of GraphQL and the Relay pagination spec.
-- (It may also be NULL if the connection was never updated.)
end_cursor TEXT,
CHECK((last_update IS NULL) = (total_count IS NULL)),
CHECK((last_update IS NULL) = (has_next_page IS NULL)),
CHECK((last_update IS NULL) <= (end_cursor IS NULL)),
UNIQUE(object_id, fieldname),
FOREIGN KEY(object_id) REFERENCES objects(id),
FOREIGN KEY(last_update) REFERENCES updates(rowid)
)
`,
dedent`\
CREATE UNIQUE INDEX idx_connections__object_id__fieldname
ON connections (object_id, fieldname)
`,
dedent`\
CREATE TABLE connection_entries (
rowid INTEGER PRIMARY KEY,
connection_id INTEGER NOT NULL,
idx INTEGER NOT NULL, -- impose an ordering
child_id TEXT NOT NULL,
UNIQUE(connection_id, idx),
FOREIGN KEY(connection_id) REFERENCES connections(rowid),
FOREIGN KEY(child_id) REFERENCES objects(id)
)
`,
dedent`\
CREATE INDEX idx_connection_entries__connection_id
ON connection_entries (connection_id)
`,
];
for (const sql of structuralTables) {
db.prepare(sql).run();
}

// Then, create primitive-data tables, which depend on the schema.
const schema = this._schema;
for (const typename of Object.keys(schema)) {
const nodeType = schema[typename];
switch (nodeType.type) {
case "UNION":
// Unions exist at the type level only; they have no physical
// representation.
break;
case "OBJECT": {
if (!isSqlSafe(typename)) {
throw new Error("invalid object type name: " + typename);
}
const primitiveFieldNames: Schema.Fieldname[] = [];
for (const fieldname of Object.keys(nodeType.fields)) {
const field = nodeType.fields[fieldname];
switch (field.type) {
case "ID": // handled separately
break;
case "NODE": // goes in `links` table
break;
case "CONNECTION": // goes in `connections` table
break;
case "PRIMITIVE":
if (!isSqlSafe(fieldname)) {
throw new Error("invalid field name: " + fieldname);
}
primitiveFieldNames.push(fieldname);
break;
// istanbul ignore next
default:
throw new Error((field.type: empty));
}
}
const tableName = `"data_${typename}"`;
const tableSpec = [
"id TEXT NOT NULL PRIMARY KEY",
...primitiveFieldNames.map((fieldname) => `"${fieldname}"`),
"FOREIGN KEY(id) REFERENCES objects(id)",
].join(", ");
db.prepare(`CREATE TABLE ${tableName} (${tableSpec})`).run();
break;
}
// istanbul ignore next
default:
throw new Error((nodeType.type: empty));
}
}
});
}
}

Expand Down Expand Up @@ -103,3 +281,20 @@ export function _inTransaction<R>(db: Database, fn: () => R): R {
}
}
}

/*
* In some cases, we need to interpolate user input in SQL queries in
* positions that do not allow bound variables in prepared statements
* (e.g., table and column names). In these cases, we manually sanitize.
*
* If this function returns `true`, then its argument may be safely
* included in a SQL identifier. If it returns `false`, then no such
* guarantee is made (this function is overly conservative, so it is
* possible that the argument may in fact be safe).
*
* For instance, the function will return `true` if passed "col", but
* will return `false` if passed "'); DROP TABLE objects; --".
*/
function isSqlSafe(token) {
return !token.match(/[^A-Za-z0-9_]/);
}
30 changes: 30 additions & 0 deletions src/graphql/mirror.test.js
Expand Up @@ -112,6 +112,36 @@ describe("graphql/mirror", () => {
expect(() => new Mirror(db, schema0)).not.toThrow();
expect(fs.readFileSync(filename).toJSON()).toEqual(data);
});

it("rejects a schema with SQL-unsafe type name", () => {
const s = Schema;
const schema0 = s.schema({
"Non-Word-Characters": s.object({id: s.id()}),
});
const db = new Database(":memory:");
expect(() => new Mirror(db, schema0)).toThrow(
"invalid object type name"
);
});

it("rejects a schema with SQL-unsafe field name", () => {
const s = Schema;
const schema0 = s.schema({
A: s.object({id: s.id(), "Non-Word-Characters": s.primitive()}),
});
const db = new Database(":memory:");
expect(() => new Mirror(db, schema0)).toThrow("invalid field name");
});

it("allows specifying a good schema after rejecting one", () => {
const s = Schema;
const schema0 = s.schema({
A: s.object({id: s.id(), "Non-Word-Characters": s.primitive()}),
});
const db = new Database(":memory:");
expect(() => new Mirror(db, schema0)).toThrow("invalid field name");
expect(() => new Mirror(db, buildGithubSchema())).not.toThrow();
});
});
});

Expand Down

0 comments on commit 3c5daf8

Please sign in to comment.