Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Grammar Include Support #457

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
text=lf
*.js linguist-vendored eol=lf
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"files.eol": "\n"
}
2 changes: 1 addition & 1 deletion packages/ohm-js/dist/ohm-grammar.js

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions packages/ohm-js/extras/extractExamples.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,17 @@
});

semantics.addOperation('examples', {
Document(includesIter, grammarIter)
{

Check failure on line 77 in packages/ohm-js/extras/extractExamples.js

View workflow job for this annotation

GitHub Actions / build (16.x)

Opening curly brace does not appear on the same line as controlling statement

Check failure on line 77 in packages/ohm-js/extras/extractExamples.js

View workflow job for this annotation

GitHub Actions / build (18.x)

Opening curly brace does not appear on the same line as controlling statement

Check failure on line 77 in packages/ohm-js/extras/extractExamples.js

View workflow job for this annotation

GitHub Actions / build (20.x)

Opening curly brace does not appear on the same line as controlling statement
includesIter.examples();

Check failure on line 78 in packages/ohm-js/extras/extractExamples.js

View workflow job for this annotation

GitHub Actions / build (16.x)

Trailing spaces not allowed

Check failure on line 78 in packages/ohm-js/extras/extractExamples.js

View workflow job for this annotation

GitHub Actions / build (18.x)

Trailing spaces not allowed

Check failure on line 78 in packages/ohm-js/extras/extractExamples.js

View workflow job for this annotation

GitHub Actions / build (20.x)

Trailing spaces not allowed
LiamRiddell marked this conversation as resolved.
Show resolved Hide resolved
return grammarIter.examples();
},
Includes(includesIter) {
return includesIter.children.flatMap(c => c.examples());
},
Include(_, _la, relativePathNode, _ra) {
return null;
},
LiamRiddell marked this conversation as resolved.
Show resolved Hide resolved
Grammars(grammarIter) {
return grammarIter.children.flatMap(c => c.examples());
},
Expand Down
68 changes: 66 additions & 2 deletions packages/ohm-js/src/buildGrammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import * as errors from './errors.js';
import {Grammar} from './Grammar.js';
import * as pexprs from './pexprs.js';
import { validateOption } from './util.js';

Check failure on line 7 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (16.x)

There should be no space after '{'

Check failure on line 7 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (16.x)

There should be no space before '}'

Check failure on line 7 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (18.x)

There should be no space after '{'

Check failure on line 7 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (18.x)

There should be no space before '}'

Check failure on line 7 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (20.x)

There should be no space after '{'

Check failure on line 7 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (20.x)

There should be no space before '}'

const superSplicePlaceholder = Object.create(pexprs.PExpr.prototype);

Expand All @@ -19,16 +20,79 @@
// `tree`, which is the concrete syntax tree of a user-written grammar.
// The grammar will be assigned into `namespace` under the name of the grammar
// as specified in the source.
export function buildGrammar(match, namespace, optOhmGrammarForTesting) {
export function buildGrammar(match, namespace, options, optOhmGrammarForTesting) {
const builder = new Builder();
let decl;
let currentRuleName;
let currentRuleFormals;
let overriding = false;
const metaGrammar = optOhmGrammarForTesting || ohmGrammar;

Check failure on line 30 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (16.x)

Trailing spaces not allowed

Check failure on line 30 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (18.x)

Trailing spaces not allowed

Check failure on line 30 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (20.x)

Trailing spaces not allowed
const fetchGrammarInternal = (path) => {

Check failure on line 31 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (16.x)

Unexpected parentheses around single function argument

Check failure on line 31 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (18.x)

Unexpected parentheses around single function argument

Check failure on line 31 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (20.x)

Unexpected parentheses around single function argument
if (!validateOption(options, 'fetchGrammar', 'function'))
{

Check failure on line 33 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (16.x)

Opening curly brace does not appear on the same line as controlling statement

Check failure on line 33 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (18.x)

Opening curly brace does not appear on the same line as controlling statement

Check failure on line 33 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (20.x)

Opening curly brace does not appear on the same line as controlling statement
throw new Error("Missing option 'fetchGrammar' of type `function` when trying to include.");
}

const grammarContent = options.fetchGrammar(path);

if (typeof grammarContent !== "string")

Check failure on line 39 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (16.x)

Strings must use singlequote

Check failure on line 39 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (18.x)

Strings must use singlequote

Check failure on line 39 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (20.x)

Strings must use singlequote
{

Check failure on line 40 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (16.x)

Opening curly brace does not appear on the same line as controlling statement

Check failure on line 40 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (18.x)

Opening curly brace does not appear on the same line as controlling statement

Check failure on line 40 in packages/ohm-js/src/buildGrammar.js

View workflow job for this annotation

GitHub Actions / build (20.x)

Opening curly brace does not appear on the same line as controlling statement
throw new Error(`Expected string from 'fetchGrammar' function, but got ${typeof(grammarContent)}`);
}

return grammarContent.trim();
}

const rematchInput = (includes) => {
let modifiedGrammarSource = match.input;

for (let i = 0; i < includes.length; i++) {
const [sourceString, fileContent] = includes[i];

// Always substitute the include even with a nothing to prevent infinite loop.
modifiedGrammarSource = modifiedGrammarSource.replace(sourceString, fileContent);
}

const newMatch = ohmGrammar.match(modifiedGrammarSource);

if (newMatch.failed()) {
throw errors.grammarSyntaxError(newMatch);
}

helpers(newMatch).visit()
}

// A visitor that produces a Grammar instance from the CST.
const helpers = metaGrammar.createSemantics().addOperation('visit', {
Document(includesNode, grammarsNode)
{
const resolvedIncludes = includesNode.visit();

// We need to rebuild the grammar match with the resolved includes substituted.
// Note: It's important we prevent any deeper visits in this tree as it's now pointless.
if (resolvedIncludes.length > 0) {
rematchInput(resolvedIncludes);
LiamRiddell marked this conversation as resolved.
Show resolved Hide resolved
return;
}

return grammarsNode.visit();
},
Includes(includesIter) {
const resolvedIncludes = [];

includesIter.children.flatMap(c => {
resolvedIncludes.push(c.visit());
})

return resolvedIncludes;
},
Include(_, _la, relativePathNode, _ra) {
return [
this.sourceString,
fetchGrammarInternal(relativePathNode.sourceString)
];
},
Grammars(grammarIter) {
return grammarIter.children.map(c => c.visit());
},
Expand All @@ -45,7 +109,6 @@
namespace[grammarName] = g;
return g;
},

SuperGrammar(_, n) {
const superGrammarName = n.visit();
if (superGrammarName === 'null') {
Expand Down Expand Up @@ -234,5 +297,6 @@
return this.sourceString;
},
});

return helpers(match).visit();
}
26 changes: 19 additions & 7 deletions packages/ohm-js/src/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,27 @@ import * as util from './util.js';
import './semanticsDeferredInit.js'; // TODO: Clean this up.
Grammar.initApplicationParser(ohmGrammar, buildGrammar);

const DEFAULT_OPTIONS = {
fetchGrammar: undefined
};

const isBuffer = obj =>
!!obj.constructor &&
typeof obj.constructor.isBuffer === 'function' &&
obj.constructor.isBuffer(obj);

function compileAndLoad(source, namespace) {
const m = ohmGrammar.match(source, 'Grammars');
function compileAndLoad(source, namespace, options) {
const m = ohmGrammar.match(source);

if (m.failed()) {
throw errors.grammarSyntaxError(m);
}
return buildGrammar(m, namespace);

return buildGrammar(m, namespace, options);
}

export function grammar(source, optNamespace) {
const ns = grammars(source, optNamespace);
export function grammar(source, optNamespace, options = {}) {
const ns = grammars(source, optNamespace, options);

// Ensure that the source contained no more than one grammar definition.
const grammarNames = Object.keys(ns);
Expand All @@ -42,8 +48,10 @@ export function grammar(source, optNamespace) {
return ns[grammarNames[0]]; // Return the one and only grammar.
}

export function grammars(source, optNamespace) {
export function grammars(source, optNamespace, options = {}) {

const ns = Object.create(optNamespace || {});

if (typeof source !== 'string') {
// For convenience, detect Node.js Buffer objects and automatically call toString().
if (isBuffer(source)) {
Expand All @@ -54,7 +62,11 @@ export function grammars(source, optNamespace) {
);
}
}
compileAndLoad(source, ns);

const mergedOptions = Object.assign({}, DEFAULT_OPTIONS, options);

compileAndLoad(source, ns, mergedOptions);

return ns;
}

Expand Down
17 changes: 16 additions & 1 deletion packages/ohm-js/src/ohm-grammar.ohm
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
Ohm {

Document
= Includes Grammars

Includes
= Include*

Include
= include "'" relativeFilePath "'"

Grammars
= Grammar*

Expand Down Expand Up @@ -61,6 +70,12 @@ Ohm {
| terminal -- terminal
| "(" Alt ")" -- paren

include
= "include"

relativeFilePath
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for choosing to allow these specific characters? Maybe I'm wrong, but it seems like it may be overly restrictive.

An alternative would be for Ohm to be completely agnostic about paths, and accept any character inside the quotes.

Copy link
Author

@LiamRiddell LiamRiddell Aug 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is following security standards to always use an allowlist when you know the expected content format. If you want to accept the risk and open it to any character between the quotes we can do that.

= ~"'" (letter | digit | "-" | "\\" | "/" | ".")+

ruleDescr (a rule description)
= "(" ruleDescrText ")"

Expand Down Expand Up @@ -121,4 +136,4 @@ Ohm {
operator = "<:" | "=" | ":=" | "+=" | "*" | "+" | "?" | "~" | "&"

punctuation = "<" | ">" | "," | "--"
}
}
14 changes: 14 additions & 0 deletions packages/ohm-js/src/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,17 @@ export const uniqueId = (() => {
let idCounter = 0;
return prefix => '' + prefix + idCounter++;
})();

export function validateOption(options, optionName, expectedType) {
if (!options.hasOwnProperty(optionName)) {
return false;
}

const optionValue = options[optionName];

if (typeof optionValue !== expectedType) {
return false;
}

return true;
}
68 changes: 59 additions & 9 deletions packages/ohm-js/test/test-ohm-syntax.js
Original file line number Diff line number Diff line change
Expand Up @@ -1382,22 +1382,22 @@ describe('bootstrap', test => {
const ns = ohm.grammars(ohmGrammarSource);

test('it can recognize arithmetic grammar', t => {
assertSucceeds(t, ns.Ohm.match(arithmeticGrammarSource, 'Grammar'));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were these changes necessary, or is this an unrelated cleanup?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we added new Document rule to grammar the default rule is now Document, I feel the tests should be validating against the new grammar and not a sub-rule (e.g. Grammar) of the main grammar. Hence the change and resulting fixes to array accesses below.

assertSucceeds(t, ns.Ohm.match(arithmeticGrammarSource));
});

test('it can recognize itself', t => {
assertSucceeds(t, ns.Ohm.match(ohmGrammarSource, 'Grammar'));
assertSucceeds(t, ns.Ohm.match(ohmGrammarSource));
});

test('it can produce a grammar that works', t => {
const g = buildGrammar(ns.Ohm.match(ohmGrammarSource, 'Grammar'), {}, ns.Ohm);
const g = buildGrammar(ns.Ohm.match(ohmGrammarSource), {}, {}, ns.Ohm);
assertSucceeds(
t,
g.match(ohmGrammarSource, 'Grammar'),
g[0].match(ohmGrammarSource),
'Ohm grammar can recognize itself',
);
const Arithmetic = buildGrammar(g.match(arithmeticGrammarSource, 'Grammar'), {}, g);
const s = Arithmetic.createSemantics().addAttribute('v', {
const Arithmetic = buildGrammar(g[0].match(arithmeticGrammarSource), {}, {}, g[0]);
const s = Arithmetic[0].createSemantics().addAttribute('v', {
exp(expr) {
return expr.v;
},
Expand Down Expand Up @@ -1438,13 +1438,63 @@ describe('bootstrap', test => {
return this.sourceString;
},
});
t.is(s(Arithmetic.match('10*(2+123)-4/5')).v, 1249.2);
t.is(s(Arithmetic[0].match('10*(2+123)-4/5')).v, 1249.2);
});

test('full bootstrap!', t => {
const g = buildGrammar(ns.Ohm.match(ohmGrammarSource, 'Grammar'), {}, ns.Ohm);
const gPrime = buildGrammar(g.match(ohmGrammarSource, 'Grammar'), {}, g);
const g = buildGrammar(ns.Ohm.match(ohmGrammarSource, 'Grammar'), {}, {}, ns.Ohm);
const gPrime = buildGrammar(g.match(ohmGrammarSource, 'Grammar'), {}, {}, g);
gPrime.namespaceName = g.namespaceName; // make their namespaceName properties the same
compareGrammars(t, g, gPrime);
});
});

describe('include', test => {
test('include', t => {
const g = ohm.grammar(`
include 'test.ohm'
`, {}, {
fetchGrammar: (path) => `G { X = "G" }`
});

assertSucceeds(t, g.match('G'));
});

test('multiple', t => {
const grammars = ohm.grammars(`
include 'file-a.ohm'
include 'file-b.ohm'
`, {}, {
fetchGrammar: (path) => {
switch (path) {
case "file-a.ohm":
return 'FileA { A = "A" }';

case "file-b.ohm":
return 'FileB { B = "B" }';

default:
return "";
}
}
});

assertSucceeds(t, grammars.FileA.match('A'));
assertSucceeds(t, grammars.FileB.match('B'));
});

test('supergrammar', t => {
const grammar = ohm.grammars(`
include 'supergrammar.ohm'

ChildGrammar <: SuperGrammar {
S += "C"
}
`, {}, {
fetchGrammar: (path) => 'SuperGrammar { S = "S" }'
});

assertSucceeds(t, grammar.ChildGrammar.match('C'));
assertSucceeds(t, grammar.ChildGrammar.match('S'));
});
});
2 changes: 2 additions & 0 deletions ~/.gitconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[core]
autocrlf = false
Loading