Skip to content

Commit

Permalink
more tests, add configurable well known prefix
Browse files Browse the repository at this point in the history
  • Loading branch information
nbittich committed Jan 27, 2024
1 parent 38178e1 commit 768a5fc
Show file tree
Hide file tree
Showing 15 changed files with 277 additions and 110 deletions.
29 changes: 2 additions & 27 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ test-case = "3.3.1"
env_logger = "0.11.1"
tortank = "0.20.0"
wasm-bindgen = "0.2.90"
graph-rdfa-processor = "0.1.2"
graph-rdfa-processor = { path = "./lib-rdfa" }

[profile.release]
opt-level = 'z' # Optimize for size.
Expand Down
59 changes: 47 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,23 +1,58 @@
# graph-rdfa-processor
# RDFa processor library (WIP)

### RDFa processor library (WIP)
Rust and wasm library to extract rdf statements (in n-triples format) from an html file
based on rdfa annotations.

- covers [RDFa 1.1 Primer - Third Edition](https://www.w3.org/TR/rdfa-primer/).
- used [RDFa/Play](https://rdfa.info/play/) for comparing.
- WIP:
## Examples

```rust
use graph_rdfa_processor::RdfaGraph;
let html = r#"
<div prefix="foaf: http://xmlns.com/foaf/0.1/" about="http://www.example.org/#somebody" rel="foaf:knows">
<p about="http://danbri.org/foaf.rdf#danbri" typeof="foaf:Person" property="foaf:name">Dan Brickley</p>
</div>
"#;
let base = "http://example.com"
let well_known_prefix = Some("http://example.org/.well_known");

RdfaGraph::parse_str(html, base, well_known_prefix).unwrap()

```

```js
<script type="module">
import init, {html_to_rdfa} from "./rdfa-wasm/pkg/rdfa_wasm.js";

async function run() {
await init();
let html =`
<div prefix="foaf: http://xmlns.com/foaf/0.1/" about="http://www.example.org/#somebody" rel="foaf:knows">
<p about="http://danbri.org/foaf.rdf#danbri" typeof="foaf:Person" property="foaf:name">Dan Brickley</p>
</div>
`;
let base = "http://example.com"
let well_known_prefix = "http://example.org/.well_known";
let res = html_to_rdfa(html, base, well_known_prefix);

}
run();
</script>

```

- covers:

- [RDFa 1.1 Primer - Third Edition](https://www.w3.org/TR/rdfa-primer/)
- [RDFa Core](https://www.w3.org/TR/rdfa-core/)
- [Earl-Reports](https://rdfa.info/earl-reports/#RDFa-rdfa1.1-tests-for-html5)

- used [RDFa/Play](https://rdfa.info/play/) for comparing.
- [Demo](https://nbittich.github.io/graph-rdfa-processor/)

### WIP

The plan is to make as much tests as possible, and fix the bugs along the way.
### WIP

Once enough tests have been implemented, a full refactoring will be needed before
doing anything else.
The plan is to make as much tests as possible, and fix the bugs along the way.

### KNOWN ISSUES
### KNOWN ISSUES

- White space not preserved. See test 0329.
- White space not preserved. See test 0329.
92 changes: 72 additions & 20 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,64 @@
form.classList.remove("d-none");

const text_area = document.querySelector("#html");
text_area.value = `<!DOCTYPE html>
<html prefix="foaf: http://xmlns.com/foaf/0.1/">
<head>
<title>Test 0083</title>
</head>
<body>
<div about="http://www.example.org/#somebody" rel="foaf:knows">
<p property="foaf:name">Ivan Herman</p>
<p rel="foaf:mailbox" resource="mailto:ivan@w3.org">mailto:ivan@w3.org</p>
<p about="http://danbri.org/foaf.rdf#danbri" typeof="foaf:Person" property="foaf:name">Dan Brickley</p>
</div>
</body>
</html>
`;
text_area.value = `
<!DOCTYPE html>
<html prefix="foaf: http://xmlns.com/foaf/0.1/">
<head>
<title>Test 0083</title>
</head>
<body>
<div about="http://www.example.org/#somebody" rel="foaf:knows">
<p property="foaf:name">Ivan Herman</p>
<p rel="foaf:mailbox" resource="mailto:ivan@w3.org">mailto:ivan@w3.org</p>
<p about="http://danbri.org/foaf.rdf#danbri" typeof="foaf:Person" property="foaf:name">Dan Brickley</p>
</div>
</body>
</html>
`;

form.addEventListener("submit", (e) => {
e.preventDefault();
const data = new FormData(e.target);
let res = html_to_rdfa(data.get("html"), data.get("base"));
let res = html_to_rdfa(
data.get("html") || "",
data.get("base") || "",
data.get("wellKnownPrefix") || "",
);
const out = document.querySelector("pre");
out.innerText = res;
});
const issueLink = document.querySelector("#issueLink");
issueLink.onclick = (e) => {
e.preventDefault();
const a = document.createElement("a");
let params = new URLSearchParams();
const data = new FormData(form);
params.append("title", "RDFa processing bug");

params.append(
"body",
`
### Base:
\`${data.get("base") || ""}\`
### Well Known Prefix:
\`${data.get("base") || ""}\`
### Html:
\`\`\`html
${data.get("html") || ""}
\`\`\`
`,
);
a.href = `https://github.com/nbittich/graph-rdfa-processor/issues/new?${params.toString()}`;
a.target = "_blank";
a.click();
};
}
run();
</script>
Expand All @@ -48,11 +84,11 @@
<h1>RDFa Playground</h1>
<p>
This is a poc of an rdfa library written in rust. If you find any bug,
please open an issue.
please <a href="#issueLink" id="issueLink">open an issue.</a>
</p>
<hr />
<div class="row">
<div class="col-4 ms-1">
<div class="d-flex justify-content-center">
<div class="col-3 me-1 border p-2">
<form action="" id="rdfaForm">
<div class="mb-2 align-items-center">
<div class="col-auto">
Expand All @@ -63,6 +99,20 @@ <h1>RDFa Playground</h1>
value="http://example.com" />
</div>
</div>
<div class="mb-2 align-items-center">
<div class="col-auto">
<label for="base" class="col-form-label">Well Known prefix</label>
</div>
<div class="col-auto">
<input required type="url" id="wellKnownPrefix" name="wellKnownPrefix" class="form-control"
aria-describedby="well known prefix" value="http://example.com/.well-known" />
<div id="passwordHelpBlock" class="form-text">
Well known prefix for
<a target="_blank" href="https://www.w3.org/2011/rdf-wg/wiki/Skolemisation">Skolemisation</a>
</div>
</div>
</div>

<div class="mb-2 align-items-center">
<div class="col-auto">
<label for="html" class="col-form-label">Html</label>
Expand All @@ -72,10 +122,12 @@ <h1>RDFa Playground</h1>
aria-describedby="Html content"></textarea>
</div>
</div>
<button type="submit" class="btn btn-outline-danger">Submit</button>
<div class="d-grid gap-2">
<button type="submit" class="btn btn-outline-danger">Submit</button>
</div>
</form>
</div>
<div class="col-7 pt-5 bg-light">
<div class="col-8 pt-5 bg-light border">
<pre id="out"></pre>
</div>
</div>
Expand Down
15 changes: 15 additions & 0 deletions lib-rdfa/examples/earl_html5/example0035.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/MarkUp/DTD/html401-rdfa11-1.dtd">
<html version="HTML+RDFa 1.1" prefix="foaf: http://xmlns.com/foaf/0.1/">
<head>
<title>Test 0035</title>
</head>
<body>
<div>
<img about="http://sw-app.org/mic.xhtml#i"
rel="foaf:img"
src="http://sw-app.org/img/mic_2007_01.jpg"
href="http://sw-app.org/img/mic_2006_03.jpg"
alt="A photo depicting Michael" />
</div>
</body>
</html>
1 change: 1 addition & 0 deletions lib-rdfa/examples/earl_html5/example0035.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<http://sw-app.org/mic.xhtml#i> <http://xmlns.com/foaf/0.1/img> <http://sw-app.org/img/mic_2007_01.jpg>.
18 changes: 18 additions & 0 deletions lib-rdfa/examples/earl_html5/example0037.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@


<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/MarkUp/DTD/html401-rdfa11-1.dtd">
<html version="HTML+RDFa 1.1" prefix="foaf: http://xmlns.com/foaf/0.1/">
<head>
<title>Test 0037</title>
</head>
<body>
<div>
<img about="http://sw-app.org/mic.xhtml#i"
rel="foaf:img"
src="http://sw-app.org/img/mic_2007_01.jpg"
href="http://sw-app.org/img/mic_2006_03.jpg"
resource="http://sw-app.org/mic.xhtml#photo"
alt="A photo depicting Michael" />
</div>
</body>
</html>
5 changes: 5 additions & 0 deletions lib-rdfa/examples/earl_html5/example0037.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@


<http://sw-app.org/mic.xhtml#i> <http://xmlns.com/foaf/0.1/img> <http://sw-app.org/mic.xhtml#photo>.


15 changes: 15 additions & 0 deletions lib-rdfa/examples/earl_html5/example0039.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/MarkUp/DTD/html401-rdfa11-1.dtd">
<html version="HTML+RDFa 1.1" prefix="foaf: http://xmlns.com/foaf/0.1/">
<head>
<title>Test 0039</title>
</head>
<body>
<div>
<img about="http://sw-app.org/mic.xhtml#i"
rev="foaf:depicts"
src="http://sw-app.org/img/mic_2007_01.jpg"
href="http://sw-app.org/img/mic_2006_03.jpg"
alt="A photo depicting Michael" />
</div>
</body>
</html>
1 change: 1 addition & 0 deletions lib-rdfa/examples/earl_html5/example0039.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<http://sw-app.org/img/mic_2007_01.jpg> <http://xmlns.com/foaf/0.1/depicts> <http://sw-app.org/mic.xhtml#i>.
13 changes: 11 additions & 2 deletions lib-rdfa/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,28 @@ impl<'a> RdfaGraph<'a> {
initial_context: Context<'a>,
) -> Result<RdfaGraph<'a>, Box<dyn Error>> {
let mut triples = vec![];
let well_known_prefix = initial_context.well_known_prefix.clone();
traverse_element(input, None, initial_context, &mut triples, &mut vec![])?;

triples = copy_pattern(triples)?;

Ok(RdfaGraph(triples.into_iter().collect()))
Ok(RdfaGraph {
statements: triples.into_iter().collect(),
well_known_prefix,
})
}
// temporary thing
pub fn parse_str(html: &'a str, base: &'a str) -> Result<String, Box<dyn Error>> {
pub fn parse_str(
html: &'a str,
base: &'a str,
well_known_prefix: Option<&'a str>,
) -> Result<String, Box<dyn Error>> {
let document = scraper::Html::parse_document(html);
let root = document.root_element();

let root_ctx = Context {
base,
well_known_prefix,
..Default::default()
};
RdfaGraph::parse(&root, root_ctx).map(|g| g.to_string())
Expand Down
Loading

0 comments on commit 768a5fc

Please sign in to comment.