Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Grammar Include Support #457

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
text=lf
*.js linguist-vendored eol=lf
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"files.eol": "\n"
}
2 changes: 1 addition & 1 deletion packages/ohm-js/dist/ohm-grammar.js

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions packages/ohm-js/extras/extractExamples.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ const semantics = grammars.OhmWithExamples.createSemantics().addOperation('hasEx
});

semantics.addOperation('examples', {
Document(_, grammarsNode)
{
return grammarsNode.examples();
},
Grammars(grammarIter) {
return grammarIter.children.flatMap(c => c.examples());
},
Expand Down
47 changes: 44 additions & 3 deletions packages/ohm-js/src/buildGrammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import * as common from './common.js';
import * as errors from './errors.js';
import {Grammar} from './Grammar.js';
import * as pexprs from './pexprs.js';
import {validateOption} from './util.js';

const superSplicePlaceholder = Object.create(pexprs.PExpr.prototype);

Expand All @@ -15,20 +16,61 @@ function namespaceHas(ns, name) {
return false;
}

// Compiles and loads a grammar from source or throws syntax error.
export function compileAndLoad(source, namespace, options) {
const m = ohmGrammar.match(source);

if (m.failed()) {
throw errors.grammarSyntaxError(m);
}

return buildGrammar(m, namespace, options);
}

// Returns a Grammar instance (i.e., an object with a `match` method) for
// `tree`, which is the concrete syntax tree of a user-written grammar.
// The grammar will be assigned into `namespace` under the name of the grammar
// as specified in the source.
export function buildGrammar(match, namespace, optOhmGrammarForTesting) {
export function buildGrammar(match, namespace, options, optOhmGrammarForTesting) {
const builder = new Builder();
let decl;
let currentRuleName;
let currentRuleFormals;
let overriding = false;
const metaGrammar = optOhmGrammarForTesting || ohmGrammar;

const fetchGrammarInternal = path => {
if (!validateOption(options, 'fetchGrammar', 'function')) {
throw new Error("Missing option 'fetchGrammar' of type `function` when trying to include.");
}

const grammarContent = options.fetchGrammar(path);

if (typeof grammarContent !== 'string') {
throw new Error(`Expected string from 'fetchGrammar' function, but got ${typeof(grammarContent)}`);
}

return grammarContent.trim();
}

// A visitor that produces a Grammar instance from the CST.
const helpers = metaGrammar.createSemantics().addOperation('visit', {
Document(includesNode, grammarsNode)
{
includesNode.visit();
return grammarsNode.visit();
},
Includes(includesIter) {
includesIter.children.flatMap(c => c.visit());
},
Include(_, _la, relativePathNode, _ra) {
const fileContent = fetchGrammarInternal(relativePathNode.sourceString);

if (fileContent)
{
compileAndLoad(fileContent, namespace, options);
}
},
Grammars(grammarIter) {
return grammarIter.children.map(c => c.visit());
},
Expand All @@ -45,7 +87,6 @@ export function buildGrammar(match, namespace, optOhmGrammarForTesting) {
namespace[grammarName] = g;
return g;
},

SuperGrammar(_, n) {
const superGrammarName = n.visit();
if (superGrammarName === 'null') {
Expand All @@ -57,7 +98,6 @@ export function buildGrammar(match, namespace, optOhmGrammarForTesting) {
decl.withSuperGrammar(namespace[superGrammarName]);
}
},

Rule_define(n, fs, d, _, b) {
currentRuleName = n.visit();
currentRuleFormals = fs.children.map(c => c.visit())[0] || [];
Expand Down Expand Up @@ -234,5 +274,6 @@ export function buildGrammar(match, namespace, optOhmGrammarForTesting) {
return this.sourceString;
},
});

return helpers(match).visit();
}
28 changes: 15 additions & 13 deletions packages/ohm-js/src/main.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import ohmGrammar from '../dist/ohm-grammar.js';
import {buildGrammar} from './buildGrammar.js';
import {buildGrammar,compileAndLoad} from './buildGrammar.js';
import * as common from './common.js';
import * as errors from './errors.js';
import {Grammar} from './Grammar.js';
import * as pexprs from './pexprs.js';
import * as util from './util.js';
Expand All @@ -11,21 +10,18 @@ import * as util from './util.js';
import './semanticsDeferredInit.js'; // TODO: Clean this up.
Grammar.initApplicationParser(ohmGrammar, buildGrammar);

const DEFAULT_OPTIONS = {
fetchGrammar: undefined
};

const isBuffer = obj =>
!!obj.constructor &&
typeof obj.constructor.isBuffer === 'function' &&
obj.constructor.isBuffer(obj);

function compileAndLoad(source, namespace) {
const m = ohmGrammar.match(source, 'Grammars');
if (m.failed()) {
throw errors.grammarSyntaxError(m);
}
return buildGrammar(m, namespace);
}

export function grammar(source, optNamespace) {
const ns = grammars(source, optNamespace);
export function grammar(source, optNamespace, options = {}) {
const ns = grammars(source, optNamespace, options);

// Ensure that the source contained no more than one grammar definition.
const grammarNames = Object.keys(ns);
Expand All @@ -42,8 +38,10 @@ export function grammar(source, optNamespace) {
return ns[grammarNames[0]]; // Return the one and only grammar.
}

export function grammars(source, optNamespace) {
export function grammars(source, optNamespace, options = {}) {

const ns = Object.create(optNamespace || {});

if (typeof source !== 'string') {
// For convenience, detect Node.js Buffer objects and automatically call toString().
if (isBuffer(source)) {
Expand All @@ -54,7 +52,11 @@ export function grammars(source, optNamespace) {
);
}
}
compileAndLoad(source, ns);

const mergedOptions = Object.assign({}, DEFAULT_OPTIONS, options);

compileAndLoad(source, ns, mergedOptions);

return ns;
}

Expand Down
17 changes: 16 additions & 1 deletion packages/ohm-js/src/ohm-grammar.ohm
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
Ohm {

Document
= Includes Grammars

Includes
= Include*

Include
= include "'" relativeFilePath "'"

Grammars
= Grammar*

Expand Down Expand Up @@ -61,6 +70,12 @@ Ohm {
| terminal -- terminal
| "(" Alt ")" -- paren

include
= "include"

relativeFilePath
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for choosing to allow these specific characters? Maybe I'm wrong, but it seems like it may be overly restrictive.

An alternative would be for Ohm to be completely agnostic about paths, and accept any character inside the quotes.

Copy link
Author

@LiamRiddell LiamRiddell Aug 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is following security standards to always use an allowlist when you know the expected content format. If you want to accept the risk and open it to any character between the quotes we can do that.

= ~"'" (letter | digit | "-" | "\\" | "/" | ".")+

ruleDescr (a rule description)
= "(" ruleDescrText ")"

Expand Down Expand Up @@ -121,4 +136,4 @@ Ohm {
operator = "<:" | "=" | ":=" | "+=" | "*" | "+" | "?" | "~" | "&"

punctuation = "<" | ">" | "," | "--"
}
}
14 changes: 14 additions & 0 deletions packages/ohm-js/src/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,17 @@ export const uniqueId = (() => {
let idCounter = 0;
return prefix => '' + prefix + idCounter++;
})();

export function validateOption(options, optionName, expectedType) {
if (!options.hasOwnProperty(optionName)) {
return false;
}

const optionValue = options[optionName];

if (typeof optionValue !== expectedType) {
return false;
}

return true;
}
68 changes: 59 additions & 9 deletions packages/ohm-js/test/test-ohm-syntax.js
Original file line number Diff line number Diff line change
Expand Up @@ -1382,22 +1382,22 @@ describe('bootstrap', test => {
const ns = ohm.grammars(ohmGrammarSource);

test('it can recognize arithmetic grammar', t => {
assertSucceeds(t, ns.Ohm.match(arithmeticGrammarSource, 'Grammar'));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were these changes necessary, or is this an unrelated cleanup?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we added new Document rule to grammar the default rule is now Document, I feel the tests should be validating against the new grammar and not a sub-rule (e.g. Grammar) of the main grammar. Hence the change and resulting fixes to array accesses below.

assertSucceeds(t, ns.Ohm.match(arithmeticGrammarSource));
});

test('it can recognize itself', t => {
assertSucceeds(t, ns.Ohm.match(ohmGrammarSource, 'Grammar'));
assertSucceeds(t, ns.Ohm.match(ohmGrammarSource));
});

test('it can produce a grammar that works', t => {
const g = buildGrammar(ns.Ohm.match(ohmGrammarSource, 'Grammar'), {}, ns.Ohm);
const g = buildGrammar(ns.Ohm.match(ohmGrammarSource), {}, {}, ns.Ohm);
assertSucceeds(
t,
g.match(ohmGrammarSource, 'Grammar'),
g[0].match(ohmGrammarSource),
'Ohm grammar can recognize itself',
);
const Arithmetic = buildGrammar(g.match(arithmeticGrammarSource, 'Grammar'), {}, g);
const s = Arithmetic.createSemantics().addAttribute('v', {
const Arithmetic = buildGrammar(g[0].match(arithmeticGrammarSource), {}, {}, g[0]);
const s = Arithmetic[0].createSemantics().addAttribute('v', {
exp(expr) {
return expr.v;
},
Expand Down Expand Up @@ -1438,13 +1438,63 @@ describe('bootstrap', test => {
return this.sourceString;
},
});
t.is(s(Arithmetic.match('10*(2+123)-4/5')).v, 1249.2);
t.is(s(Arithmetic[0].match('10*(2+123)-4/5')).v, 1249.2);
});

test('full bootstrap!', t => {
const g = buildGrammar(ns.Ohm.match(ohmGrammarSource, 'Grammar'), {}, ns.Ohm);
const gPrime = buildGrammar(g.match(ohmGrammarSource, 'Grammar'), {}, g);
const g = buildGrammar(ns.Ohm.match(ohmGrammarSource, 'Grammar'), {}, {}, ns.Ohm);
const gPrime = buildGrammar(g.match(ohmGrammarSource, 'Grammar'), {}, {}, g);
gPrime.namespaceName = g.namespaceName; // make their namespaceName properties the same
compareGrammars(t, g, gPrime);
});
});

describe('include', test => {
test('include', t => {
const g = ohm.grammar(`
include 'test.ohm'
`, {}, {
fetchGrammar: (path) => `G { X = "G" }`
});

assertSucceeds(t, g.match('G'));
});

test('multiple', t => {
const grammars = ohm.grammars(`
include 'file-a.ohm'
include 'file-b.ohm'
`, {}, {
fetchGrammar: (path) => {
switch (path) {
case "file-a.ohm":
return 'FileA { A = "A" }';

case "file-b.ohm":
return 'FileB { B = "B" }';

default:
return "";
}
}
});

assertSucceeds(t, grammars.FileA.match('A'));
assertSucceeds(t, grammars.FileB.match('B'));
});

test('supergrammar', t => {
const grammar = ohm.grammars(`
include 'supergrammar.ohm'

ChildGrammar <: SuperGrammar {
S += "C"
}
`, {}, {
fetchGrammar: (path) => 'SuperGrammar { S = "S" }'
});

assertSucceeds(t, grammar.ChildGrammar.match('C'));
assertSucceeds(t, grammar.ChildGrammar.match('S'));
});
});
2 changes: 2 additions & 0 deletions ~/.gitconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[core]
autocrlf = false