diff --git a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala
index 6a037301..a36375f6 100644
--- a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala
+++ b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala
@@ -44,17 +44,19 @@ final case class IndexSemanticdbCommand(
@Inline() app: Application = Application.default
) extends Command {
def sourceroot: Path = AbsolutePath.of(app.env.workingDirectory)
- def isProtobufFormat: Boolean =
- IndexSemanticdbCommand.isProtobufFormat(output)
def absoluteTargetroots: List[Path] =
targetroot.map(AbsolutePath.of(_, app.env.workingDirectory))
def run(): Int = {
val reporter = new ConsoleLsifSemanticdbReporter(app)
- val format =
- if (isProtobufFormat)
- LsifOutputFormat.PROTOBUF
- else
- LsifOutputFormat.JSON
+ val outputFilename = output.getFileName.toString
+ val format = LsifOutputFormat.fromFilename(outputFilename)
+ if (format == LsifOutputFormat.UNKNOWN) {
+ app.error(
+ s"unknown output format for filename '$outputFilename'. " +
+ s"Supported file extension are `*.lsif`, `*.lsif-typed'"
+ )
+ return 1
+ }
val packages =
absoluteTargetroots
.iterator
diff --git a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/SnapshotLsifCommand.scala b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/SnapshotLsifCommand.scala
index 529466f8..f2bbc586 100644
--- a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/SnapshotLsifCommand.scala
+++ b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/SnapshotLsifCommand.scala
@@ -42,6 +42,7 @@ import moped.cli.Command
import moped.cli.CommandParser
import moped.reporters.Input
import moped.reporters.Position
+import moped.reporters.Reporter
import org.scalameta.ascii.layout.prefs.LayoutPrefsImpl
@CommandName("snapshot-lsif")
@@ -65,7 +66,11 @@ case class SnapshotLsifCommand(
for {
inputPath <- input
in = AbsolutePath.of(inputPath, sourceroot)
- doc <- SnapshotLsifCommand.parseTextDocument(in, sourceroot)
+ if Files.isRegularFile(in) || {
+ app.error(s"no such file: $in")
+ false
+ }
+ doc <- SnapshotLsifCommand.parseTextDocument(in, sourceroot, app.reporter)
} {
val docPath = AbsolutePath
.of(Paths.get(doc.getUri), sourceroot)
@@ -86,16 +91,21 @@ case class SnapshotLsifCommand(
object SnapshotLsifCommand {
private val jsonParser = JsonFormat.parser().ignoringUnknownFields()
- def parseTextDocument(input: Path, sourceroot: Path): List[TextDocument] = {
- parseSemanticdb(input, parseInput(input), sourceroot)
+ def parseTextDocument(
+ input: Path,
+ sourceroot: Path,
+ reporter: Reporter
+ ): List[TextDocument] = {
+ parseSemanticdb(input, parseInput(input), sourceroot, reporter)
}
def parseSemanticdb(
input: Path,
objects: mutable.Buffer[LsifObject],
- sourceroot: Path
+ sourceroot: Path,
+ reporter: Reporter
): List[TextDocument] = {
- val lsif = new IndexedLsif(input, objects, sourceroot)
+ val lsif = new IndexedLsif(input, objects, sourceroot, reporter)
lsif
.ranges
.iterator
@@ -169,7 +179,8 @@ object SnapshotLsifCommand {
class IndexedLsif(
val path: Path,
val objects: mutable.Buffer[LsifObject],
- val sourceroot: Path
+ val sourceroot: Path,
+ val reporter: Reporter
) {
val documents = mutable.Map.empty[Int, TextDocument.Builder]
val next = mutable.Map.empty[Int, Int]
@@ -432,24 +443,28 @@ object SnapshotLsifCommand {
case "document" =>
val relativeFile = Paths.get(URI.create(o.getUri))
val absoluteFile = sourceroot.resolve(relativeFile)
- val text =
- new String(
- Files.readAllBytes(absoluteFile),
- StandardCharsets.UTF_8
- )
- val relativeUri = sourceroot
- .relativize(absoluteFile)
- .iterator()
- .asScala
- .mkString("/")
- val language = Language
- .values()
- .find(_.name().compareToIgnoreCase(o.getLanguage) == 0)
- .getOrElse(Language.UNKNOWN_LANGUAGE)
- textDocument(o.getId)
- .setUri(relativeUri)
- .setLanguage(language)
- .setText(text)
+ if (!Files.isRegularFile(absoluteFile)) {
+ reporter.warning(s"no such file: $absoluteFile")
+ } else {
+ val text =
+ new String(
+ Files.readAllBytes(absoluteFile),
+ StandardCharsets.UTF_8
+ )
+ val relativeUri = sourceroot
+ .relativize(absoluteFile)
+ .iterator()
+ .asScala
+ .mkString("/")
+ val language = Language
+ .values()
+ .find(_.name().compareToIgnoreCase(o.getLanguage) == 0)
+ .getOrElse(Language.UNKNOWN_LANGUAGE)
+ textDocument(o.getId)
+ .setUri(relativeUri)
+ .setLanguage(language)
+ .setText(text)
+ }
case "definitionResult" =>
isDefinitionResult += o.getId()
case "hoverResult" =>
diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputFormat.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputFormat.java
index 13306372..2c97a2a1 100644
--- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputFormat.java
+++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputFormat.java
@@ -6,6 +6,25 @@
*
The Protobuf format is experimental and currently only exists as a proof-of-concept.
*/
public enum LsifOutputFormat {
- JSON,
- PROTOBUF
+ GRAPH_NDJSON,
+ GRAPH_PROTOBUF,
+ TYPED_PROTOBUF,
+ TYPED_NDJSON,
+ UNKNOWN;
+
+ public boolean isTyped() {
+ return this == TYPED_NDJSON || this == TYPED_PROTOBUF;
+ }
+
+ public boolean isNewlineDelimitedJSON() {
+ return this == GRAPH_NDJSON || this == TYPED_NDJSON;
+ }
+
+ public static LsifOutputFormat fromFilename(String name) {
+ if (name.endsWith(".lsif")) return GRAPH_NDJSON;
+ if (name.endsWith(".lsif-protobuf")) return GRAPH_PROTOBUF;
+ if (name.endsWith(".lsif-typed")) return TYPED_PROTOBUF;
+ if (name.endsWith(".lsif-typed.ndjson")) return TYPED_NDJSON;
+ return UNKNOWN;
+ }
}
diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputStream.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputStream.java
index eee4f6dc..2a27f3ea 100644
--- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputStream.java
+++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputStream.java
@@ -49,10 +49,10 @@ public void writeLsifObject(LsifObject.Builder object) {
b.output.reset();
try {
switch (options.format) {
- case PROTOBUF:
+ case GRAPH_PROTOBUF:
object.buildPartial().writeTo(b.output);
break;
- case JSON:
+ case GRAPH_NDJSON:
default:
jsonPrinter.appendTo(object, b.writer);
b.writer.flush();
@@ -69,7 +69,9 @@ public void flush() throws IOException {
byte[] bytes = buffer.poll();
while (bytes != null) {
out.write(bytes);
- out.write(NEWLINE);
+ if (options.format.isNewlineDelimitedJSON()) {
+ out.write(NEWLINE);
+ }
bytes = buffer.poll();
}
out.flush();
diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java
index 84728c3a..6f50fb45 100644
--- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java
+++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java
@@ -7,14 +7,18 @@
import com.sourcegraph.semanticdb_javac.Semanticdb.SymbolOccurrence;
import com.sourcegraph.semanticdb_javac.Semanticdb.SymbolOccurrence.Role;
import com.sourcegraph.semanticdb_javac.SemanticdbSymbols;
+import lib.codeintel.lsif_typed.LsifTyped;
import java.io.IOException;
+import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
/** The core logic that converts SemanticDB into LSIF. */
public class LsifSemanticdb {
@@ -49,6 +53,114 @@ private void run() throws IOException {
return;
}
options.reporter.startProcessing(files.size());
+ if (options.format.isTyped()) {
+ runTyped(files, packages);
+ } else {
+ runGraph(files, packages);
+ }
+ writer.build();
+ options.reporter.endProcessing();
+ }
+
+ private void runTyped(List files, PackageTable packages) {
+ writer.emitTyped(typedMetadata());
+ filesStream(files).forEach(document -> processTypedDocument(document, packages));
+ }
+
+ private String typedSymbol(String symbol, Package pkg) {
+ if (symbol.startsWith("local")) {
+ return "local " + symbol.substring("local".length());
+ }
+ return "semanticdb maven " + pkg.repoName() + " " + pkg.version() + " " + symbol;
+ }
+
+ private void processTypedDocument(Path path, PackageTable packages) {
+ for (LsifTextDocument doc : parseTextDocument(path).collect(Collectors.toList())) {
+ if (doc.semanticdb.getOccurrencesCount() == 0) {
+ continue;
+ }
+
+ Path absolutePath = Paths.get(URI.create(doc.semanticdb.getUri()));
+ String relativePath =
+ StreamSupport.stream(options.sourceroot.relativize(absolutePath).spliterator(), false)
+ .map(p -> p.getFileName().toString())
+ .collect(Collectors.joining("/"));
+ LsifTyped.Document.Builder tdoc =
+ LsifTyped.Document.newBuilder().setRelativePath(relativePath);
+ for (SymbolOccurrence occ : doc.sortedSymbolOccurrences()) {
+ int role = 0;
+ if (occ.getRole() == Role.DEFINITION) {
+ role |= LsifTyped.SymbolRole.Definition_VALUE;
+ }
+ boolean isSingleLineRange = occ.getRange().getStartLine() == occ.getRange().getEndLine();
+ Iterable range =
+ isSingleLineRange
+ ? Arrays.asList(
+ occ.getRange().getStartLine(),
+ occ.getRange().getStartCharacter(),
+ occ.getRange().getEndCharacter())
+ : Arrays.asList(
+ occ.getRange().getStartLine(),
+ occ.getRange().getStartCharacter(),
+ occ.getRange().getEndLine(),
+ occ.getRange().getEndCharacter());
+ Package pkg = packages.packageForSymbol(occ.getSymbol()).orElse(Package.EMPTY);
+ tdoc.addOccurrences(
+ LsifTyped.Occurrence.newBuilder()
+ .addAllRange(range)
+ .setSymbol(typedSymbol(occ.getSymbol(), pkg))
+ .setSymbolRoles(role));
+ }
+ Symtab symtab = new Symtab(doc.semanticdb);
+ for (SymbolInformation info : doc.semanticdb.getSymbolsList()) {
+ Package pkg = packages.packageForSymbol(info.getSymbol()).orElse(Package.EMPTY);
+ LsifTyped.SymbolInformation.Builder tinfo =
+ LsifTyped.SymbolInformation.newBuilder().setSymbol(typedSymbol(info.getSymbol(), pkg));
+
+ for (String overriddenSymbol : info.getOverriddenSymbolsList()) {
+ if (isIgnoredOverriddenSymbol(overriddenSymbol)) {
+ continue;
+ }
+ Package overriddenSymbolPkg =
+ packages.packageForSymbol(overriddenSymbol).orElse(Package.EMPTY);
+ tinfo.addRelationships(
+ LsifTyped.Relationship.newBuilder()
+ .setSymbol(typedSymbol(overriddenSymbol, overriddenSymbolPkg))
+ .setIsImplementation(true)
+ .setIsReference(SemanticdbSymbols.isMethod(info.getSymbol())));
+ }
+ if (info.hasSignature()) {
+ String language =
+ doc.semanticdb.getLanguage().toString().toLowerCase(Locale.ROOT).intern();
+ String signature = new SignatureFormatter(info, symtab).formatSymbol();
+ tinfo.addDocumentation("```" + language + "\n" + signature + "\n```");
+ }
+ String documentation = info.getDocumentation().getMessage();
+ if (!documentation.isEmpty()) {
+ tinfo.addDocumentation(documentation);
+ }
+ tdoc.addSymbols(tinfo);
+ }
+ writer.emitTyped(LsifTyped.Index.newBuilder().addDocuments(tdoc).build());
+ }
+ }
+
+ private LsifTyped.Index typedMetadata() {
+ return LsifTyped.Index.newBuilder()
+ .setMetadata(
+ LsifTyped.Metadata.newBuilder()
+ .setVersion(LsifTyped.ProtocolVersion.UnspecifiedProtocolVersion)
+ .setProjectRoot(options.sourceroot.toUri().toString())
+ .setTextDocumentEncoding(LsifTyped.TextEncoding.UTF8)
+ .setToolInfo(
+ LsifTyped.ToolInfo.newBuilder()
+ .setName(options.toolInfo.getName())
+ .setVersion(options.toolInfo.getVersion())
+ .addAllArguments(options.toolInfo.getArgsList())))
+ .build();
+ }
+
+ private void runGraph(List files, PackageTable packages) {
writer.emitMetaData();
int projectId = writer.emitProject(options.language);
@@ -57,11 +169,7 @@ private void run() throws IOException {
filesStream(files)
.flatMap(d -> processPath(d, isExportedSymbol, packages))
.collect(Collectors.toList());
-
writer.emitContains(projectId, documentIds);
-
- writer.build();
- options.reporter.endProcessing();
}
private Stream filesStream(List files) {
@@ -170,16 +278,22 @@ private Integer processDocumentUnsafe(
// Overrides
if (symbolInformation.getOverriddenSymbolsCount() > 0) {
- int[] overriddenReferenceResultIds = new int[symbolInformation.getOverriddenSymbolsCount()];
+ List overriddenReferenceResultIds =
+ new ArrayList<>(symbolInformation.getOverriddenSymbolsCount());
for (int i = 0; i < symbolInformation.getOverriddenSymbolsCount(); i++) {
String overriddenSymbol = symbolInformation.getOverriddenSymbols(i);
+ if (isIgnoredOverriddenSymbol(overriddenSymbol)) {
+ continue;
+ }
ResultIds overriddenIds = results.getOrInsertResultSet(overriddenSymbol);
- overriddenReferenceResultIds[i] = overriddenIds.referenceResult;
+ overriddenReferenceResultIds.add(overriddenIds.referenceResult);
writer.emitReferenceResultsItemEdge(
- overriddenIds.referenceResult, new int[] {rangeId}, doc.id);
+ overriddenIds.referenceResult, Collections.singletonList(rangeId), doc.id);
+ }
+ if (overriddenReferenceResultIds.size() > 0) {
+ writer.emitReferenceResultsItemEdge(
+ ids.referenceResult, overriddenReferenceResultIds, doc.id);
}
- writer.emitReferenceResultsItemEdge(
- ids.referenceResult, overriddenReferenceResultIds, doc.id);
}
}
writer.emitContains(doc.id, new ArrayList<>(rangeIds));
@@ -214,4 +328,10 @@ private Semanticdb.TextDocuments textDocumentsParseFrom(Path semanticdbPath) thr
return Semanticdb.TextDocuments.parseFrom(bytes);
}
}
+
+ private boolean isIgnoredOverriddenSymbol(String symbol) {
+ // Skip java/lang/Object# since it's the parent of all classes
+ // making it noisy for "find implementations" results.
+ return symbol.equals("java/lang/Object#");
+ }
}
diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifWriter.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifWriter.java
index fbed9f47..c1d0222b 100644
--- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifWriter.java
+++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifWriter.java
@@ -6,6 +6,8 @@
import com.sourcegraph.lsif_protocol.LsifPosition;
import com.sourcegraph.semanticdb_javac.Semanticdb;
import com.sourcegraph.semanticdb_javac.SemanticdbSymbols;
+import lib.codeintel.lsif_typed.LsifTyped;
+
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.nio.file.Files;
@@ -15,6 +17,7 @@
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.regex.Pattern;
import java.util.stream.Collectors;
/** High-level utility methods to write LSIF vertex/edge objects into the LSIF output stream. */
@@ -36,6 +39,10 @@ public LsifWriter(LsifSemanticdbOptions options) throws IOException {
this.options = options;
}
+ public void emitTyped(LsifTyped.Index index) {
+ this.output.write(index.toByteArray());
+ }
+
public void emitMetaData() {
emitObject(
lsifVertex("metaData")
@@ -139,12 +146,11 @@ public void emitItem(int outV, int inV, int document) {
emitObject(lsifEdge("item").setOutV(outV).addInVs(inV).setDocument(document));
}
- public void emitReferenceResultsItemEdge(int outV, int[] inVs, int document) {
- List ints = Arrays.stream(inVs).boxed().collect(Collectors.toList());
+ public void emitReferenceResultsItemEdge(int outV, Iterable inVs, int document) {
emitObject(
lsifEdge("item")
.setOutV(outV)
- .addAllInVs(ints)
+ .addAllInVs(inVs)
.setDocument(document)
.setProperty("referenceResults"));
}
diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/Package.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/Package.java
index 0327bf02..8dd7994a 100644
--- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/Package.java
+++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/Package.java
@@ -1,8 +1,31 @@
package com.sourcegraph.lsif_semanticdb;
public abstract class Package {
+ public static final Package EMPTY =
+ new Package() {
+ @Override
+ public String repoName() {
+ return ".";
+ }
+
+ @Override
+ public String version() {
+ return ".";
+ }
+ };
public abstract String repoName();
public abstract String version();
+
+ public final String lsifTypedEncoding() {
+ return "maven " + encode(repoName()) + " " + encode(version());
+ }
+
+ private String encode(String value) {
+ if (value.contains(" ")) {
+ return "`" + value + "`";
+ }
+ return value;
+ }
}
diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java
index 77cdf151..55d2f594 100644
--- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java
+++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java
@@ -19,6 +19,7 @@
import java.util.function.Function;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
+import java.util.regex.Pattern;
public class PackageTable implements Function {
@@ -30,8 +31,6 @@ public class PackageTable implements Function {
private static final PathMatcher JAR_PATTERN =
FileSystems.getDefault().getPathMatcher("glob:**.jar");
- private static final PathMatcher CLASS_PATTERN =
- FileSystems.getDefault().getPathMatcher("glob:**.class");
public PackageTable(LsifSemanticdbOptions options, LsifWriter writer) throws IOException {
this.writer = writer;
@@ -51,10 +50,6 @@ public void writeMonikerPackage(int monikerId, Package pkg) {
writer.emitPackageInformationEdge(monikerId, pkgId);
}
- public void writeImportedSymbol(String symbol, int monikerId) {
- packageForSymbol(symbol).ifPresent(pkg -> writeMonikerPackage(monikerId, pkg));
- }
-
public Optional packageForSymbol(String symbol) {
return SymbolDescriptor.toplevel(symbol)
.flatMap(
@@ -65,6 +60,7 @@ public Optional packageForSymbol(String symbol) {
}
private Optional packageForClassfile(String classfile) {
+
Package result = byClassfile.get(classfile);
if (result != null) return Optional.of(result);
if (!javaVersion.isJava8 && isJrtClassfile(classfile)) return Optional.of(javaVersion.pkg);
diff --git a/lsif-semanticdb/src/main/protobuf/lsif-typed.proto b/lsif-semanticdb/src/main/protobuf/lsif-typed.proto
new file mode 100644
index 00000000..9469a81b
--- /dev/null
+++ b/lsif-semanticdb/src/main/protobuf/lsif-typed.proto
@@ -0,0 +1,366 @@
+// An index contains one or more pieces of information about a given piece of
+// source code or software artifact. Complementary information can be merged
+// together from multiple sources to provide a unified code intelligence
+// experience.
+//
+// Programs producing a file of this format is an "indexer" and may operate
+// somewhere on the spectrum between precision, such as indexes produced by
+// compiler-backed indexers, and heurstics, such as indexes produced by local
+// syntax-directed analysis for scope rules.
+
+syntax = "proto3";
+
+package lib.codeintel.lsif_typed;
+
+option go_package = "github.com/sourcegraph/sourcegraph/lib/codeintel/lsif_typed/";
+
+// Index represents a complete LSIF index for a workspace this is rooted at a
+// single directory. An Index message payload can have a large memory footprint
+// and it's therefore recommended to emit and consume an Index payload one field
+// value at a time. To permit streaming consumption of an Index payload, the
+// `metadata` field must appear at the start of the stream and must only appear
+// once in the stream. Other field values may appear in any order.
+message Index {
+ // Metadata about this index.
+ Metadata metadata = 1;
+ // Documents that belong to this index.
+ repeated Document documents = 2;
+ // (optional) Symbols that are referenced from this index but are defined in
+ // an external package (a separate `Index` message). Leave this field empty
+ // if you assume the external package will get indexed separately. If the
+ // external package won't get indexed for some reason then you can use this
+ // field to provide hover documentation for those external symbols.
+ repeated SymbolInformation external_symbols = 3;
+}
+
+message Metadata {
+ // Which version of this protocol was used to generate this index?
+ ProtocolVersion version = 1;
+ // Information about the tool that produced this index.
+ ToolInfo tool_info = 2;
+ // URI-encoded absolute path to the root directory of this index. All
+ // documents in this index must appear in a subdirectory of this root
+ // directory.
+ string project_root = 3;
+ // Text encoding of the source files on disk that are referenced from
+ // `Document.relative_path`.
+ TextEncoding text_document_encoding = 4;
+}
+
+enum ProtocolVersion {
+ UnspecifiedProtocolVersion = 0;
+}
+
+enum TextEncoding {
+ UnspecifiedTextEncoding = 0;
+ UTF8 = 1;
+ UTF16 = 2;
+}
+
+message ToolInfo {
+ // Name of the indexer that produced this index.
+ string name = 1;
+ // Version of the indexer that produced this index.
+ string version = 2;
+ // Command-line arguments that were used to invoke this indexer.
+ repeated string arguments = 3;
+}
+
+// Document defines the metadata about a source file on disk.
+message Document {
+ // (Required) Path to the text document relative to the directory supplied in
+ // the associated `Metadata.project_root`. Not URI-encoded. This value should
+ // not begin with a directory separator.
+ string relative_path = 1;
+ // Occurrences that appear in this file.
+ repeated Occurrence occurrences = 2;
+ // Symbols that are defined within this document.
+ repeated SymbolInformation symbols = 3;
+}
+
+// Symbol is similar to a URI, it identifies a class, method, or a local
+// variable. `SymbolInformation` contains rich metadata about symbols such as
+// the docstring.
+//
+// Symbol has a standardized string representation, which can be used
+// interchangeably with `Symbol`. The syntax for Symbol is the following:
+// ```
+// ::= ' ' ' ' { } | 'local '
+// ::= ' ' ' '
+// ::= any UTF-8, escape spaces with double space.
+// ::= same as above
+// ::= same as above
+// ::= same as above
+// ::= | | | | | |
+// ::= '/'
+// ::= '#'
+// ::= '.'
+// ::= ':'
+// ::= '(' ').'
+// ::= '[' ']'
+// ::= '(' ')'
+// ::=
+// ::=
+// ::= |
+// ::= { }
+// ::= '_' | '+' | '-' | '$' | ASCII letter or digit
+// ::= '`' { } '`'
+// ::= any UTF-8 character, escape backticks with double backtick.
+// ```
+message Symbol {
+ string scheme = 1;
+ Package package = 2;
+ repeated Descriptor descriptors = 3;
+}
+
+message Package {
+ string manager = 1;
+ string name = 2;
+ string version = 3;
+}
+
+message Descriptor {
+ enum Suffix {
+ UnspecifiedSuffix = 0;
+ Package = 1;
+ Type = 2;
+ Term = 3;
+ Method = 4;
+ TypeParameter = 5;
+ Parameter = 6;
+ // Can be used for any purpose.
+ Meta = 7;
+ Local = 8;
+ }
+ string name = 1;
+ string disambiguator = 2;
+ Suffix suffix = 3;
+}
+
+// SymbolInformation defines metadata about a symbol, such as the symbol's
+// docstring or what package it's defined it.
+message SymbolInformation {
+ // Identifier of this symbol, which can be referenced from `Occurence.symbol`.
+ // The string must be formatted according to the grammar in `Symbol`.
+ string symbol = 1;
+ // (optional, but strongly recommended) The markdown-formatted documentation
+ // for this symbol. This field is repeated to allow different kinds of
+ // documentation. For example, it's nice to include both the signature of a
+ // method (parameters and return type) along with the accompanying docstring.
+ repeated string documentation = 3;
+ // (optional) Relationships to other symbols (e.g., implements, type definition).
+ repeated Relationship relationships = 4;
+}
+
+message Relationship {
+ string symbol = 1;
+ // When resolving "Find references", this field documents what other symbols
+ // should be included together with this symbol. For example, consider the
+ // following TypeScript code that defines two symbols `Animal#sound()` and
+ // `Dog#sound()`:
+ // ```ts
+ // interface Animal {
+ // ^^^^^^ definition Animal#
+ // sound(): string
+ // ^^^^^ definition Animal#sound()
+ // }
+ // class Dog implements Animal {
+ // ^^^ definition Dog#, implementation_symbols = Animal#
+ // public sound(): string { return "woof" }
+ // ^^^^^ definition Dog#sound(), references_symbols = Animal#sound(), implementation_symbols = Animal#sound()
+ // }
+ // const animal: Animal = new Dog()
+ // ^^^^^^ reference Animal#
+ // console.log(animal.sound())
+ // ^^^^^ reference Animal#sound()
+ // ```
+ // Doing "Find references" on the symbol `Animal#sound()` should return
+ // references to the `Dog#sound()` method as well. Vice-versa, doing "Find
+ // references" on the `Dog#sound()` method should include references to the
+ // `Animal#sound()` method as well.
+ bool is_reference = 2;
+ // Similar to `references_symbols` but for "Go to implementation".
+ // It's common for the `implementation_symbols` and `references_symbols` fields
+ // have the same values but that's not always the case.
+ // In the TypeScript example above, observe that `implementation_symbols` has
+ // the value `"Animal#"` for the "Dog#" symbol while `references_symbols` is
+ // empty. When requesting "Find references" on the "Animal#" symbol we don't
+ // want to include references to "Dog#" even if "Go to implementation" on the
+ // "Animal#" symbol should navigate to the "Dog#" symbol.
+ bool is_implementation = 3;
+ // Similar to `references_symbols` but for "Go to type definition".
+ bool is_type_definition = 4;
+}
+
+// SymbolRole declares what "role" a symbol has in an occurrence. A role is
+// encoded as a bitmask where each bit represents a different role. For example,
+// to determine if the `Import` role is set test whether the second bit of the
+// enum value is defined. In psuedo-code, this can be implemented with the
+// logic: `const isImportRole = (role.value & SymbolRole.Import.value) > 0`.
+enum SymbolRole {
+ UnspecifiedSymbolRole = 0;
+ // Is the symbol defined here? If not, then this is a symbol reference.
+ Definition = 0x1;
+ // Is the symbol imported here?
+ Import = 0x2;
+ // Is the symbol written here?
+ WriteAccess = 0x4;
+ // Is the symbol read here?
+ ReadAccess = 0x8;
+ // Is the symbol in generated code?
+ Generated = 0x10;
+ // Is the symbol in test code?
+ Test = 0x20;
+}
+
+enum SyntaxKind {
+ UnspecifiedSyntaxKind = 0;
+
+ // `+`, `*`, etc.
+ Operator = 1;
+
+ // Comment, including comment markers and text
+ Comment = 2;
+
+ // `;` `.` `,`
+ PunctuationDelimiter = 3;
+ // (), {}, [] when used syntactically
+ PunctuationBracket = 4;
+ // `{}` within a string.
+ PunctuationSpecial = 5;
+
+ // `if`, `else`, `return`, `class`, etc.
+ Keyword = 6;
+
+ // non-specific variables, function calls, etc.
+ // In general, prefer more specific identifier kinds if possible.
+ Identifier = 7;
+ // Identifiers builtin to the language: `min`, `print` in Python.
+ BuiltinIdentifier = 8;
+ // Identifiers builtin to the language: `None` in Python, `nil` in Go.
+ NullIdentifier = 9;
+ // `xyz` in `const xyz = "hello"`
+ ConstantIdentifier = 10;
+ // `var X = "hello"` in Go
+ MutableGlobalIdentifier = 11;
+ // both parameter definition and references
+ ParameterIdentifier = 12;
+ // identifiers for variable definitions and references within a local scope
+ LocalIdentifier = 13;
+ // Used when identifier shadowes some other identifier within the scope
+ ShadowedIdentifier = 14;
+ // `package main`
+ ModuleIdentifier = 15;
+ // Macro references only, not definitions
+ MacroIdentifier = 16;
+
+ // Literal strings: "Hello, world!"
+ StringLiteral = 17;
+ // ".*" in a string regex
+ StringLiteralRegex = 18;
+ // "\t", "\n"
+ StringLiteralEscape = 19;
+ // datetimes within strings, special words within a string
+ StringLiteralSpecial = 20;
+ // "key" in { "key": "value" }
+ StringLiteralKey = 21;
+ // 'c' or similar, in languages that differentiate strings and characters
+ CharacterLiteral = 22;
+ // Literal numbers, both floats and integers
+ NumericLiteral = 23;
+ // `true`, `false`
+ BooleanLiteral = 24;
+
+ // Function definition only.
+ // Included because many editors highlight a function definition differently
+ // from function calls.
+ FunctionDefinition = 25;
+ // Macro definition only.
+ // Included because many editors highlight a macro definition differently
+ // from macro usages.
+ MacroDefinition = 26;
+
+ // non-builtin types, including namespaces
+ TypeIdentifier = 27;
+ // builtin types only, such as `str` for Python or `int` in Go
+ BuiltinTypeIdentifier = 28;
+
+ // Python decorators, c-like __attribute__
+ AttributeIdentifier = 29;
+
+ // Used for XML-like tags
+ Tag = 30;
+ // Attribute name in XML-like tags
+ TagAttribute = 31;
+ // Delimiters for XML-like tags
+ TagDelimiter = 32;
+}
+
+// Occurrence associates a source position with a symbol and/or highlighting
+// information.
+message Occurrence {
+ // Source position of this occurrence. Must be exactly three or four
+ // elements:
+ //
+ // - Four elements: `[startLine, startCharacter, endLine, endCharacter]`
+ // - Three elements: `[startLine, startCharacter, endCharacter]`. The end line
+ // is inferred to have the same value as the start line.
+ //
+ // Line numbers and characters are always 0-based. Make sure to increment the
+ // line/character values before displaying them in an editor-like UI because
+ // editors conventionally use 1-based numbers.
+ //
+ // Historical note: the original draft of this schema had a `Range` message
+ // type with `start` and `end` fields of type `Position`, mirroring LSP.
+ // Benchmarks revealed that this encoding was inefficient and that we could
+ // reduce the total payload size of an index by 50% by using `repeated int32`
+ // instead. The `repeated int32` encoding is admittedly more embarrassing to
+ // work with in some programming languages but we hope the performance
+ // improvements make up for it.
+ repeated int32 range = 1;
+ // (optional) The symbol that appears at this position. See
+ // `SymbolInformation.symbol` for how to format symbols as strings.
+ string symbol = 2;
+ // (optional) Bitmask for what `SymbolRole` apply to this occurrence. See
+ // `SymbolRole` for how to read and write this field.
+ int32 symbol_roles = 3;
+ // (optional) Markdown-formatted documentation for this specific range. If
+ // empty, the `Symbol.documentation` field is used instead. One example
+ // where this field might be useful is when the symbol represents a generic
+ // function (with abstract type parameters such as `List`) and at this
+ // occurrence we know the exact values (such as `List`).
+ repeated string override_documentation = 4;
+ // (optional) What syntax highlighting class should be used for this range?
+ SyntaxKind syntax_kind = 5;
+ // Diagnostics that have been reported for this specific range.
+ repeated Diagnostic diagnostics = 6;
+}
+
+// Represents a diagnostic, such as a compiler error or warning, which should be
+// reported for a document.
+message Diagnostic {
+ // Should this diagnostic be reported as an error, warning, info, or hint?
+ Severity severity = 1;
+ // Code of this diagnostic, which might appear in the user interface.
+ string code = 2;
+ // Message of this diagnostic.
+ string message = 3;
+ // Human-readable string describing the source of this diagnostic, e.g.
+ // 'typescript' or 'super lint'.
+ string source = 4;
+ repeated DiagnosticTag tags = 5;
+}
+
+enum Severity {
+ UnspecifiedSeverity = 0;
+ Error = 1;
+ Warning = 2;
+ Information = 3;
+ Hint = 4;
+}
+
+enum DiagnosticTag {
+ UnspecifiedDiagnosticTag = 0;
+ Unnecessary = 1;
+ Deprecated = 2;
+}
diff --git a/semanticdb-java/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbSymbols.java b/semanticdb-java/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbSymbols.java
index f4e68205..d2677cb0 100644
--- a/semanticdb-java/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbSymbols.java
+++ b/semanticdb-java/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbSymbols.java
@@ -35,6 +35,10 @@ public static boolean isGlobal(String symbol) {
return !isLocal(symbol);
}
+ public static boolean isMethod(String symbol) {
+ return symbol.endsWith(").");
+ }
+
/**
* A SemanticDB symbol is composed from a list of "descriptors".
*
diff --git a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java
index a50102cb..f88287c7 100644
--- a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java
+++ b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java
@@ -108,10 +108,12 @@ private void emitSymbolInformation(Symbol sym, JCTree tree) {
case ENUM:
case CLASS:
builder.setKind(Kind.CLASS);
+ builder.addAllOverriddenSymbols(semanticdbParentSymbols(sym, new ArrayList<>()));
break;
case INTERFACE:
case ANNOTATION_TYPE:
builder.setKind(Kind.INTERFACE);
+ builder.addAllOverriddenSymbols(semanticdbParentSymbols(sym, new ArrayList<>()));
break;
case FIELD:
builder.setKind(Kind.FIELD);
@@ -381,6 +383,31 @@ private int semanticdbSymbolInfoProperties(Symbol sym) {
return properties;
}
+ private List semanticdbParentSymbols(Symbol sym, List result) {
+ if (!(sym instanceof Symbol.ClassSymbol)) {
+ return result;
+ }
+ Symbol.ClassSymbol csym = (Symbol.ClassSymbol) sym;
+ if (csym.getSuperclass() != Type.noType) {
+ semanticdbParentSymbol(csym.getSuperclass().tsym, result);
+ }
+ for (Type iType : csym.getInterfaces()) {
+ semanticdbParentSymbol(iType.tsym, result);
+ }
+ return result;
+ }
+
+ private void semanticdbParentSymbol(Symbol sym, List result) {
+ if (sym == null) {
+ return;
+ }
+ String ssym = semanticdbSymbol(sym);
+ if (!Objects.equals(ssym, SemanticdbSymbols.NONE)) {
+ result.add(ssym);
+ semanticdbParentSymbols(sym, result);
+ }
+ }
+
private List semanticdbOverrides(Symbol sym) {
ArrayList overriddenSymbols = new ArrayList<>();
Set overriddenMethods = javacTypes.getOverriddenMethods(sym);
diff --git a/tests/snapshots/src/main/scala/tests/LsifGraphSnapshotGenerator.scala b/tests/snapshots/src/main/scala/tests/LsifGraphSnapshotGenerator.scala
index 47e7d249..88cf240d 100644
--- a/tests/snapshots/src/main/scala/tests/LsifGraphSnapshotGenerator.scala
+++ b/tests/snapshots/src/main/scala/tests/LsifGraphSnapshotGenerator.scala
@@ -8,6 +8,7 @@ import com.sourcegraph.io.DeleteVisitor
import com.sourcegraph.lsif_java.LsifJava
import com.sourcegraph.lsif_java.commands.SnapshotLsifCommand
import com.sourcegraph.lsif_java.commands.SnapshotLsifCommand.IndexedLsif
+import moped.reporters.ConsoleReporter
import moped.testkit.FileLayout
class LsifGraphSnapshotGenerator extends SnapshotGenerator {
@@ -140,7 +141,8 @@ class LsifGraphSnapshotGenerator extends SnapshotGenerator {
sourceroot
)
val objects = SnapshotLsifCommand.parseInput(outputLsif)
- val lsif = new IndexedLsif(outputLsif, objects, sourceroot)
+ val reporter = ConsoleReporter(System.out)
+ val lsif = new IndexedLsif(outputLsif, objects, sourceroot, reporter)
runSuccessfully(
List(
"snapshot-lsif",