Skip to content

Commit 953704c

Browse files
authored
Merge pull request #1263 from Julow/merge-sherlodoc
Merge Sherlodoc
2 parents fb9200a + 8b484be commit 953704c

File tree

138 files changed

+7057
-24
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

138 files changed

+7057
-24
lines changed

.github/workflows/build.yml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ jobs:
2828
send-coverage: true
2929
# Mdx tests Mdx tests
3030
run-mdx: true
31+
test-sherlodoc: true
3132
- os: macos-latest
3233
ocaml-compiler: "5.2"
3334
# - os: windows-latest
@@ -53,11 +54,20 @@ jobs:
5354
opam-pin: ${{ matrix.run-mdx != true }}
5455

5556
- name: Install dependencies
56-
run: opam install --deps-only --with-test ./odoc.opam ./odoc-parser.opam
57+
run: |
58+
opam install --deps-only --with-test ./odoc.opam ./odoc-parser.opam \
59+
${{ matrix.test-sherlodoc && './sherlodoc.opam' }}
5760
5861
- name: dune runtest
62+
if: matrix.test-sherlodoc == true
63+
# Run all tests
5964
run: opam exec -- dune runtest
6065

66+
- name: dune runtest
67+
if: matrix.test-sherlodoc != true
68+
# Run the tests for odoc and odoc-parser only
69+
run: opam exec -- dune runtest -p odoc,odoc-parser
70+
6171
- name: Mdx tests
6272
if: matrix.run-mdx == true
6373
run: |

dune-project

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
"Leo White <leo@lpw25.net>"
1919
"Lubega Simon <lubegasimon73@gmail.com>"
2020
"Paul-Elliot Angl\195\168s d'Auriac <paul-elliot@tarides.com>"
21-
"Thomas Refis <trefis@janestreet.com>")
21+
"Thomas Refis <trefis@janestreet.com>"
22+
"Arthur Wendling <art.wendling@gmail.com>"
23+
"Emile Trotignon <emile.trotignon@gmail.com>")
2224

2325
(maintainers
2426
"Daniel B\195\188nzli <daniel.buenzli@erratique.ch>"
@@ -29,3 +31,7 @@
2931
(cram enable)
3032

3133
(using mdx 0.3)
34+
35+
; Sherlodoc
36+
(using menhir 2.1)
37+
(using directory-targets 0.1)

odoc-driver.opam

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ depends: [
4646
"cmdliner"
4747
"sexplib"
4848
"ppx_sexp_conv"
49+
"sherlodoc"
4950
]
5051

5152
build: [

sherlodoc.opam

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
opam-version: "2.0"
2+
version: "dev"
3+
synopsis: "Search engine for OCaml documentation"
4+
maintainer: ["art.wendling@gmail.com"]
5+
authors: ["Arthur Wendling" "Emile Trotignon"]
6+
license: "MIT"
7+
homepage: "https://github.com/ocaml/odoc"
8+
doc: "https://ocaml.github.io/odoc/"
9+
bug-reports: "https://github.com/ocaml/odoc/issues"
10+
depends: [
11+
"dune" {>= "3.5"}
12+
"ocaml" {>= "4.0.8"}
13+
"odoc" {>= "3.0.0"}
14+
"base64" {>= "3.5.1"}
15+
"bigstringaf" {>= "0.9.1"}
16+
"js_of_ocaml" {>= "5.6.0"}
17+
"brr" {>= "0.0.6"}
18+
"cmdliner" {>= "1.2.0"}
19+
"decompress" {>= "1.5.3"}
20+
"fpath" {>= "0.7.3"}
21+
"lwt" {>= "5.7.0"}
22+
"menhir" {>= "20230608"}
23+
"ppx_blob" {>= "0.7.2"}
24+
"tyxml" {>= "4.6.0"}
25+
"result" {>= "1.5"}
26+
"odig" {with-test}
27+
"base" {with-test & = "v0.16.3"}
28+
"alcotest" {with-test}
29+
]
30+
depopts: [
31+
"ancient" {>= "0.9.1"}
32+
]
33+
build: [
34+
["dune" "subst"] {dev}
35+
[
36+
"dune"
37+
"build"
38+
"-p"
39+
name
40+
"-j"
41+
jobs
42+
"@install"
43+
"@runtest" {with-test}
44+
"@doc" {with-doc}
45+
]
46+
]
47+
dev-repo: "git+https://github.com/ocaml/odoc.git"

sherlodoc/.ocamlformat

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
version = 0.26.1
2+
profile = janestreet
3+
let-binding-spacing = compact
4+
sequence-style = separator
5+
doc-comments = after-when-possible
6+
exp-grouping = preserve
7+
break-cases = toplevel
8+
cases-exp-indent = 4
9+
cases-matching-exp-indent = normal

sherlodoc/LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2022 Arthur Wendling, Tarides
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

sherlodoc/README.md

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
**Try it online at [doc.sherlocode.com](https://doc.sherlocode.com) !**
2+
3+
Sherlodoc is a search engine for OCaml documentation (inspired by [Hoogle](https://hoogle.haskell.org/)), which allows you to search through OCaml libraries by names and approximate type signatures:
4+
5+
- Search by name: [`list map`](https://doc.sherlocode.com/?q=list%20map)
6+
- Search inside documentation comments: [`raise Not_found`](https://doc.sherlocode.com/?q=raise%20Not_found)
7+
- Fuzzy type search is introduced with a colon, e.g. [`: map -> list`](https://doc.sherlocode.com/?q=%3A%20map%20-%3E%20list)
8+
- Search by name and type with a colon separator [`Bogue : Button.t`](https://doc.sherlocode.com/?q=Bogue%20%3A%20Button.t)
9+
- An underscore `_` can be used as a wildcard in type queries: [`(int -> _) -> list -> _`](https://doc.sherlocode.com/?q=(int%20-%3E%20_)%20-%3E%20list%20-%3E%20_)
10+
- Type search supports products and reordering of function arguments: [`array -> ('a * int -> bool) -> array`](https://doc.sherlocode.com/?q=%3A%20array%20-%3E%20(%27a%20*%20int%20-%3E%20bool)%20-%3E%20array)
11+
12+
## Local usage
13+
14+
First, install sherlodoc and odig:
15+
16+
```bash
17+
$ opam pin add 'https://github.com/art-w/sherlodoc.git' # optional
18+
19+
$ opam install sherlodoc odig
20+
```
21+
22+
[Odig](https://erratique.ch/software/odig) can generate the odoc documentation of your current switch with:
23+
24+
```bash
25+
$ odig odoc # followed by `odig doc` to browse your switch documentation
26+
```
27+
28+
Which sherlodoc can then index to create a search database:
29+
30+
```bash
31+
# name your sherlodoc database
32+
$ export SHERLODOC_DB=/tmp/sherlodoc.marshal
33+
34+
# if you are using OCaml 4, we recommend the `ancient` database format:
35+
$ opam install ancient
36+
$ export SHERLODOC_DB=/tmp/sherlodoc.ancient
37+
38+
# index all odoc files generated by odig for your current switch:
39+
$ sherlodoc index $(find $OPAM_SWITCH_PREFIX/var/cache/odig/odoc -name '*.odocl' | grep -v __)
40+
```
41+
42+
Enjoy searching from the command-line or run the webserver:
43+
44+
```bash
45+
$ sherlodoc search "map : list"
46+
$ sherlodoc search # interactice cli
47+
48+
$ opam install dream
49+
$ sherlodoc serve # webserver at http://localhost:1234
50+
```
51+
52+
The different commands support a `--help` argument for more details/options.
53+
54+
In particular, sherlodoc supports three different file formats for its database, which can be specified either in the filename extension or through the `--db-format=` flag:
55+
- `ancient` for fast database loading using mmap, but is only compatible with OCaml 4.
56+
- `marshal` for when ancient is unavailable, with slower database opening.
57+
- `js` for integration with odoc static html documentation for client-side search without a server.
58+
59+
## Integration with Odoc
60+
61+
Odoc 2.4.0 adds a search bar inside the statically generated html documentation. [Integration with dune is in progress](https://github.com/ocaml/dune/pull/9772), you can try it inside a fresh opam switch with: (warning! this will recompile any installed package that depends on dune!)
62+
63+
```bash
64+
$ opam pin https://github.com/emileTrotignon/dune.git#search-odoc-new
65+
66+
$ dune build @doc # in your favorite project
67+
```
68+
69+
Otherwise, manual integration with odoc requires to add to every call of `odoc html-generate` the flags `--search-uri sherlodoc.js --search-uri db.js` to activate the search bar. You'll also need to generate a search database `db.js` and provide the `sherlodoc.js` dependency (a version of the sherlodoc search engine with odoc support, compiled to javascript):
70+
71+
```bash
72+
$ sherlodoc index --db=_build/default/_doc/_html/YOUR_LIB/db.js \
73+
$(find _build/default/_doc/_odocls/YOUR_LIB -name '*.odocl' | grep -v __)
74+
75+
$ sherlodoc js > _build/default/_doc/_html/sherlodoc.js
76+
```
77+
78+
## How it works
79+
80+
The sherlodoc database uses [Suffix Trees](https://en.wikipedia.org/wiki/Suffix_tree) to search for substrings in value names, documentation and types. During indexation, the suffix trees are compressed to state machine automatas. The children of every node are also sorted, such that a sub-tree can be used as a priority queue during search enumeration.
81+
82+
To rank the search results, sherlodoc computes a static evaluation of each candidate during indexation. This static scoring biases the search to favor short names, short types, the presence of documentation, etc. When searching, a dynamic evaluation dependent on the user query is used to adjust the static ordering of the results:
83+
84+
- How similar is the result name to the search query? (to e.g. prefer results which respect the case: [`map`](https://doc.sherlocode.com/?q=map) vs [`Map`](https://doc.sherlocode.com/?q=Map))
85+
- How similar are the types? (using a tree diff algorithm, as for example [`('a -> 'b -> 'a) -> 'a -> 'b list -> 'a`](https://doc.sherlocode.com/?q=(%27a%20-%3E%20%27b%20-%3E%20%27a)%20-%3E%20%27a%20-%3E%20%27b%20list%20-%3E%20%27a) and [`('a -> 'b -> 'b) -> 'a list -> 'b -> 'b`](https://doc.sherlocode.com/?q=(%27a%20-%3E%20%27b%20-%3E%20%27b)%20-%3E%20%27a%20list%20-%3E%20%27b%20-%3E%20%27b) are isomorphic yet point to `fold_left` and `fold_right` respectively)
86+
87+
For fuzzy type search, sherlodoc aims to provide good results without requiring a precise search query, on the basis that the user doesn't know the exact type of the things they are looking for (e.g. [`string -> file_descr`](https://doc.sherlocode.com/?q=string%20-%3E%20file_descr) is incomplete but should still point in the right direction). In particular when exploring a package documentation, the common question "how do I produce a value of type `foo`" can be answered with the query `: foo` (and "which functions consume a value of type `bar`" with `: bar -> _`). This should also work when the type can only be produced indirectly through a callback (for example [`: Eio.Switch.t`](https://doc.sherlocode.com/?q=%3A%20Eio.Switch.t) has no direct constructor). To achieve this, sherlodoc performs a type decomposition based on the polarity of each term: A value produced by a function is said to be positive, while an argument consumed by a function is negative. This simplifies away the tree shape of types, allowing their indexation in the suffix trees. The cardinality of each value type is also indexed, to e.g. differentiate between [`list -> list`](https://doc.sherlocode.com/?q=list%20-%3E%20list) and [`list -> list -> list`](https://doc.sherlocode.com/?q=list%20-%3E%20list%20-%3E%20list).
88+
89+
While the polarity search results are satisfying, sherlodoc offers very limited support for polymorphic variables, type aliases and true type isomorphisms. You should check out the extraordinary [Dowsing](https://github.com/Drup/dowsing) project for this!
90+
91+
And if you speak French, a more detailed [presentation of Sherlodoc](https://www.irill.org/videos/OUPS/2023-03/wendling.html) (and [Sherlocode](https://sherlocode.com)) was given at the [OCaml Users in PariS (OUPS)](https://oups.frama.io/) in March 2023.

sherlodoc/cli/dune

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
(ocamllex unescape)
2+
3+
(executable
4+
(name main)
5+
(public_name sherlodoc)
6+
(package sherlodoc)
7+
(libraries
8+
cmdliner
9+
index
10+
query
11+
db_store
12+
unix
13+
(select
14+
serve.ml
15+
from
16+
(www -> serve.available.ml)
17+
(!www -> serve.unavailable.ml)))
18+
(preprocess
19+
(pps ppx_blob))
20+
(preprocessor_deps ../jsoo/sherlodoc.js))

sherlodoc/cli/main.ml

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
let guess_db_format db_format db_filename =
2+
match db_format with
3+
| Some db_format -> db_format
4+
| None -> begin
5+
let ext = Filename.extension db_filename in
6+
let ext_len = String.length ext in
7+
let ext = if ext_len = 0 then ext else String.sub ext 1 (ext_len - 1) in
8+
try List.assoc ext Db_store.available_backends with
9+
| Not_found ->
10+
Format.fprintf
11+
Format.err_formatter
12+
"Unknown db format extension %S (expected: %s)@."
13+
ext
14+
(String.concat ", " @@ List.map fst Db_store.available_backends) ;
15+
exit 1
16+
end
17+
18+
open Cmdliner
19+
20+
let db_format =
21+
let env =
22+
let doc = "Database format" in
23+
Cmd.Env.info "SHERLODOC_FORMAT" ~doc
24+
in
25+
let kind = Arg.enum Db_store.available_backends in
26+
Arg.(value & opt (some kind) None & info [ "format" ] ~docv:"DB_FORMAT" ~env)
27+
28+
let db_filename =
29+
let env =
30+
let doc = "The database to query" in
31+
Cmd.Env.info "SHERLODOC_DB" ~doc
32+
in
33+
Arg.(required & opt (some string) None & info [ "db"; "o" ] ~docv:"DB" ~env)
34+
35+
let db_path =
36+
let env =
37+
let doc = "The database to query" in
38+
Cmd.Env.info "SHERLODOC_DB" ~doc
39+
in
40+
Arg.(required & opt (some file) None & info [ "db" ] ~docv:"DB" ~env)
41+
42+
let with_db fn db_path =
43+
let apply fn db_format db_filename =
44+
let db_format = guess_db_format db_format db_filename in
45+
fn db_format db_filename
46+
in
47+
Term.(const apply $ fn $ db_format $ db_path)
48+
49+
let cmd_search =
50+
let info = Cmd.info "search" ~doc:"Command-line search" in
51+
Cmd.v info (with_db Search.term db_path)
52+
53+
let cmd_index =
54+
let doc = "Index odocl files to create a Sherlodoc database" in
55+
let info = Cmd.info "index" ~doc in
56+
Cmd.v info (with_db Index.term db_filename)
57+
58+
let cmd_serve =
59+
let doc = "Webserver interface" in
60+
let info = Cmd.info "serve" ~doc in
61+
Cmd.v info (with_db Serve.term db_path)
62+
63+
let cmd_jsoo =
64+
let doc = "For dune/odoc integration, sherlodoc compiled as javascript" in
65+
let info = Cmd.info "js" ~doc in
66+
let target =
67+
let doc = "Name of the file to create" in
68+
Arg.(value & pos 0 string "" & info [] ~docv:"QUERY" ~doc)
69+
in
70+
let emit_js_dep filename =
71+
let close, h = if filename = "" then false, stdout else true, open_out filename in
72+
output_string h [%blob "../jsoo/sherlodoc.js"] ;
73+
if close then close_out h
74+
in
75+
Cmd.v info Term.(const emit_js_dep $ target)
76+
77+
let cmd =
78+
let doc = "Sherlodoc" in
79+
let version = "0.2" in
80+
let info = Cmd.info "sherlodoc" ~version ~doc in
81+
Cmd.group info [ cmd_search; cmd_index; cmd_serve; cmd_jsoo ]
82+
83+
let () = exit (Cmd.eval cmd)

0 commit comments

Comments
 (0)