diff --git a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala index 6a037301..a36375f6 100644 --- a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala +++ b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala @@ -44,17 +44,19 @@ final case class IndexSemanticdbCommand( @Inline() app: Application = Application.default ) extends Command { def sourceroot: Path = AbsolutePath.of(app.env.workingDirectory) - def isProtobufFormat: Boolean = - IndexSemanticdbCommand.isProtobufFormat(output) def absoluteTargetroots: List[Path] = targetroot.map(AbsolutePath.of(_, app.env.workingDirectory)) def run(): Int = { val reporter = new ConsoleLsifSemanticdbReporter(app) - val format = - if (isProtobufFormat) - LsifOutputFormat.PROTOBUF - else - LsifOutputFormat.JSON + val outputFilename = output.getFileName.toString + val format = LsifOutputFormat.fromFilename(outputFilename) + if (format == LsifOutputFormat.UNKNOWN) { + app.error( + s"unknown output format for filename '$outputFilename'. " + + s"Supported file extension are `*.lsif`, `*.lsif-typed'" + ) + return 1 + } val packages = absoluteTargetroots .iterator diff --git a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/SnapshotLsifCommand.scala b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/SnapshotLsifCommand.scala index 529466f8..f2bbc586 100644 --- a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/SnapshotLsifCommand.scala +++ b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/SnapshotLsifCommand.scala @@ -42,6 +42,7 @@ import moped.cli.Command import moped.cli.CommandParser import moped.reporters.Input import moped.reporters.Position +import moped.reporters.Reporter import org.scalameta.ascii.layout.prefs.LayoutPrefsImpl @CommandName("snapshot-lsif") @@ -65,7 +66,11 @@ case class SnapshotLsifCommand( for { inputPath <- input in = AbsolutePath.of(inputPath, sourceroot) - doc <- SnapshotLsifCommand.parseTextDocument(in, sourceroot) + if Files.isRegularFile(in) || { + app.error(s"no such file: $in") + false + } + doc <- SnapshotLsifCommand.parseTextDocument(in, sourceroot, app.reporter) } { val docPath = AbsolutePath .of(Paths.get(doc.getUri), sourceroot) @@ -86,16 +91,21 @@ case class SnapshotLsifCommand( object SnapshotLsifCommand { private val jsonParser = JsonFormat.parser().ignoringUnknownFields() - def parseTextDocument(input: Path, sourceroot: Path): List[TextDocument] = { - parseSemanticdb(input, parseInput(input), sourceroot) + def parseTextDocument( + input: Path, + sourceroot: Path, + reporter: Reporter + ): List[TextDocument] = { + parseSemanticdb(input, parseInput(input), sourceroot, reporter) } def parseSemanticdb( input: Path, objects: mutable.Buffer[LsifObject], - sourceroot: Path + sourceroot: Path, + reporter: Reporter ): List[TextDocument] = { - val lsif = new IndexedLsif(input, objects, sourceroot) + val lsif = new IndexedLsif(input, objects, sourceroot, reporter) lsif .ranges .iterator @@ -169,7 +179,8 @@ object SnapshotLsifCommand { class IndexedLsif( val path: Path, val objects: mutable.Buffer[LsifObject], - val sourceroot: Path + val sourceroot: Path, + val reporter: Reporter ) { val documents = mutable.Map.empty[Int, TextDocument.Builder] val next = mutable.Map.empty[Int, Int] @@ -432,24 +443,28 @@ object SnapshotLsifCommand { case "document" => val relativeFile = Paths.get(URI.create(o.getUri)) val absoluteFile = sourceroot.resolve(relativeFile) - val text = - new String( - Files.readAllBytes(absoluteFile), - StandardCharsets.UTF_8 - ) - val relativeUri = sourceroot - .relativize(absoluteFile) - .iterator() - .asScala - .mkString("/") - val language = Language - .values() - .find(_.name().compareToIgnoreCase(o.getLanguage) == 0) - .getOrElse(Language.UNKNOWN_LANGUAGE) - textDocument(o.getId) - .setUri(relativeUri) - .setLanguage(language) - .setText(text) + if (!Files.isRegularFile(absoluteFile)) { + reporter.warning(s"no such file: $absoluteFile") + } else { + val text = + new String( + Files.readAllBytes(absoluteFile), + StandardCharsets.UTF_8 + ) + val relativeUri = sourceroot + .relativize(absoluteFile) + .iterator() + .asScala + .mkString("/") + val language = Language + .values() + .find(_.name().compareToIgnoreCase(o.getLanguage) == 0) + .getOrElse(Language.UNKNOWN_LANGUAGE) + textDocument(o.getId) + .setUri(relativeUri) + .setLanguage(language) + .setText(text) + } case "definitionResult" => isDefinitionResult += o.getId() case "hoverResult" => diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputFormat.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputFormat.java index 13306372..2c97a2a1 100644 --- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputFormat.java +++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputFormat.java @@ -6,6 +6,25 @@ *

The Protobuf format is experimental and currently only exists as a proof-of-concept. */ public enum LsifOutputFormat { - JSON, - PROTOBUF + GRAPH_NDJSON, + GRAPH_PROTOBUF, + TYPED_PROTOBUF, + TYPED_NDJSON, + UNKNOWN; + + public boolean isTyped() { + return this == TYPED_NDJSON || this == TYPED_PROTOBUF; + } + + public boolean isNewlineDelimitedJSON() { + return this == GRAPH_NDJSON || this == TYPED_NDJSON; + } + + public static LsifOutputFormat fromFilename(String name) { + if (name.endsWith(".lsif")) return GRAPH_NDJSON; + if (name.endsWith(".lsif-protobuf")) return GRAPH_PROTOBUF; + if (name.endsWith(".lsif-typed")) return TYPED_PROTOBUF; + if (name.endsWith(".lsif-typed.ndjson")) return TYPED_NDJSON; + return UNKNOWN; + } } diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputStream.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputStream.java index eee4f6dc..2a27f3ea 100644 --- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputStream.java +++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifOutputStream.java @@ -49,10 +49,10 @@ public void writeLsifObject(LsifObject.Builder object) { b.output.reset(); try { switch (options.format) { - case PROTOBUF: + case GRAPH_PROTOBUF: object.buildPartial().writeTo(b.output); break; - case JSON: + case GRAPH_NDJSON: default: jsonPrinter.appendTo(object, b.writer); b.writer.flush(); @@ -69,7 +69,9 @@ public void flush() throws IOException { byte[] bytes = buffer.poll(); while (bytes != null) { out.write(bytes); - out.write(NEWLINE); + if (options.format.isNewlineDelimitedJSON()) { + out.write(NEWLINE); + } bytes = buffer.poll(); } out.flush(); diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java index 84728c3a..6f50fb45 100644 --- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java +++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java @@ -7,14 +7,18 @@ import com.sourcegraph.semanticdb_javac.Semanticdb.SymbolOccurrence; import com.sourcegraph.semanticdb_javac.Semanticdb.SymbolOccurrence.Role; import com.sourcegraph.semanticdb_javac.SemanticdbSymbols; +import lib.codeintel.lsif_typed.LsifTyped; import java.io.IOException; +import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; import java.util.stream.Stream; +import java.util.stream.StreamSupport; /** The core logic that converts SemanticDB into LSIF. */ public class LsifSemanticdb { @@ -49,6 +53,114 @@ private void run() throws IOException { return; } options.reporter.startProcessing(files.size()); + if (options.format.isTyped()) { + runTyped(files, packages); + } else { + runGraph(files, packages); + } + writer.build(); + options.reporter.endProcessing(); + } + + private void runTyped(List files, PackageTable packages) { + writer.emitTyped(typedMetadata()); + filesStream(files).forEach(document -> processTypedDocument(document, packages)); + } + + private String typedSymbol(String symbol, Package pkg) { + if (symbol.startsWith("local")) { + return "local " + symbol.substring("local".length()); + } + return "semanticdb maven " + pkg.repoName() + " " + pkg.version() + " " + symbol; + } + + private void processTypedDocument(Path path, PackageTable packages) { + for (LsifTextDocument doc : parseTextDocument(path).collect(Collectors.toList())) { + if (doc.semanticdb.getOccurrencesCount() == 0) { + continue; + } + + Path absolutePath = Paths.get(URI.create(doc.semanticdb.getUri())); + String relativePath = + StreamSupport.stream(options.sourceroot.relativize(absolutePath).spliterator(), false) + .map(p -> p.getFileName().toString()) + .collect(Collectors.joining("/")); + LsifTyped.Document.Builder tdoc = + LsifTyped.Document.newBuilder().setRelativePath(relativePath); + for (SymbolOccurrence occ : doc.sortedSymbolOccurrences()) { + int role = 0; + if (occ.getRole() == Role.DEFINITION) { + role |= LsifTyped.SymbolRole.Definition_VALUE; + } + boolean isSingleLineRange = occ.getRange().getStartLine() == occ.getRange().getEndLine(); + Iterable range = + isSingleLineRange + ? Arrays.asList( + occ.getRange().getStartLine(), + occ.getRange().getStartCharacter(), + occ.getRange().getEndCharacter()) + : Arrays.asList( + occ.getRange().getStartLine(), + occ.getRange().getStartCharacter(), + occ.getRange().getEndLine(), + occ.getRange().getEndCharacter()); + Package pkg = packages.packageForSymbol(occ.getSymbol()).orElse(Package.EMPTY); + tdoc.addOccurrences( + LsifTyped.Occurrence.newBuilder() + .addAllRange(range) + .setSymbol(typedSymbol(occ.getSymbol(), pkg)) + .setSymbolRoles(role)); + } + Symtab symtab = new Symtab(doc.semanticdb); + for (SymbolInformation info : doc.semanticdb.getSymbolsList()) { + Package pkg = packages.packageForSymbol(info.getSymbol()).orElse(Package.EMPTY); + LsifTyped.SymbolInformation.Builder tinfo = + LsifTyped.SymbolInformation.newBuilder().setSymbol(typedSymbol(info.getSymbol(), pkg)); + + for (String overriddenSymbol : info.getOverriddenSymbolsList()) { + if (isIgnoredOverriddenSymbol(overriddenSymbol)) { + continue; + } + Package overriddenSymbolPkg = + packages.packageForSymbol(overriddenSymbol).orElse(Package.EMPTY); + tinfo.addRelationships( + LsifTyped.Relationship.newBuilder() + .setSymbol(typedSymbol(overriddenSymbol, overriddenSymbolPkg)) + .setIsImplementation(true) + .setIsReference(SemanticdbSymbols.isMethod(info.getSymbol()))); + } + if (info.hasSignature()) { + String language = + doc.semanticdb.getLanguage().toString().toLowerCase(Locale.ROOT).intern(); + String signature = new SignatureFormatter(info, symtab).formatSymbol(); + tinfo.addDocumentation("```" + language + "\n" + signature + "\n```"); + } + String documentation = info.getDocumentation().getMessage(); + if (!documentation.isEmpty()) { + tinfo.addDocumentation(documentation); + } + tdoc.addSymbols(tinfo); + } + writer.emitTyped(LsifTyped.Index.newBuilder().addDocuments(tdoc).build()); + } + } + + private LsifTyped.Index typedMetadata() { + return LsifTyped.Index.newBuilder() + .setMetadata( + LsifTyped.Metadata.newBuilder() + .setVersion(LsifTyped.ProtocolVersion.UnspecifiedProtocolVersion) + .setProjectRoot(options.sourceroot.toUri().toString()) + .setTextDocumentEncoding(LsifTyped.TextEncoding.UTF8) + .setToolInfo( + LsifTyped.ToolInfo.newBuilder() + .setName(options.toolInfo.getName()) + .setVersion(options.toolInfo.getVersion()) + .addAllArguments(options.toolInfo.getArgsList()))) + .build(); + } + + private void runGraph(List files, PackageTable packages) { writer.emitMetaData(); int projectId = writer.emitProject(options.language); @@ -57,11 +169,7 @@ private void run() throws IOException { filesStream(files) .flatMap(d -> processPath(d, isExportedSymbol, packages)) .collect(Collectors.toList()); - writer.emitContains(projectId, documentIds); - - writer.build(); - options.reporter.endProcessing(); } private Stream filesStream(List files) { @@ -170,16 +278,22 @@ private Integer processDocumentUnsafe( // Overrides if (symbolInformation.getOverriddenSymbolsCount() > 0) { - int[] overriddenReferenceResultIds = new int[symbolInformation.getOverriddenSymbolsCount()]; + List overriddenReferenceResultIds = + new ArrayList<>(symbolInformation.getOverriddenSymbolsCount()); for (int i = 0; i < symbolInformation.getOverriddenSymbolsCount(); i++) { String overriddenSymbol = symbolInformation.getOverriddenSymbols(i); + if (isIgnoredOverriddenSymbol(overriddenSymbol)) { + continue; + } ResultIds overriddenIds = results.getOrInsertResultSet(overriddenSymbol); - overriddenReferenceResultIds[i] = overriddenIds.referenceResult; + overriddenReferenceResultIds.add(overriddenIds.referenceResult); writer.emitReferenceResultsItemEdge( - overriddenIds.referenceResult, new int[] {rangeId}, doc.id); + overriddenIds.referenceResult, Collections.singletonList(rangeId), doc.id); + } + if (overriddenReferenceResultIds.size() > 0) { + writer.emitReferenceResultsItemEdge( + ids.referenceResult, overriddenReferenceResultIds, doc.id); } - writer.emitReferenceResultsItemEdge( - ids.referenceResult, overriddenReferenceResultIds, doc.id); } } writer.emitContains(doc.id, new ArrayList<>(rangeIds)); @@ -214,4 +328,10 @@ private Semanticdb.TextDocuments textDocumentsParseFrom(Path semanticdbPath) thr return Semanticdb.TextDocuments.parseFrom(bytes); } } + + private boolean isIgnoredOverriddenSymbol(String symbol) { + // Skip java/lang/Object# since it's the parent of all classes + // making it noisy for "find implementations" results. + return symbol.equals("java/lang/Object#"); + } } diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifWriter.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifWriter.java index fbed9f47..c1d0222b 100644 --- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifWriter.java +++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifWriter.java @@ -6,6 +6,8 @@ import com.sourcegraph.lsif_protocol.LsifPosition; import com.sourcegraph.semanticdb_javac.Semanticdb; import com.sourcegraph.semanticdb_javac.SemanticdbSymbols; +import lib.codeintel.lsif_typed.LsifTyped; + import java.io.BufferedOutputStream; import java.io.IOException; import java.nio.file.Files; @@ -15,6 +17,7 @@ import java.util.Arrays; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; +import java.util.regex.Pattern; import java.util.stream.Collectors; /** High-level utility methods to write LSIF vertex/edge objects into the LSIF output stream. */ @@ -36,6 +39,10 @@ public LsifWriter(LsifSemanticdbOptions options) throws IOException { this.options = options; } + public void emitTyped(LsifTyped.Index index) { + this.output.write(index.toByteArray()); + } + public void emitMetaData() { emitObject( lsifVertex("metaData") @@ -139,12 +146,11 @@ public void emitItem(int outV, int inV, int document) { emitObject(lsifEdge("item").setOutV(outV).addInVs(inV).setDocument(document)); } - public void emitReferenceResultsItemEdge(int outV, int[] inVs, int document) { - List ints = Arrays.stream(inVs).boxed().collect(Collectors.toList()); + public void emitReferenceResultsItemEdge(int outV, Iterable inVs, int document) { emitObject( lsifEdge("item") .setOutV(outV) - .addAllInVs(ints) + .addAllInVs(inVs) .setDocument(document) .setProperty("referenceResults")); } diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/Package.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/Package.java index 0327bf02..8dd7994a 100644 --- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/Package.java +++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/Package.java @@ -1,8 +1,31 @@ package com.sourcegraph.lsif_semanticdb; public abstract class Package { + public static final Package EMPTY = + new Package() { + @Override + public String repoName() { + return "."; + } + + @Override + public String version() { + return "."; + } + }; public abstract String repoName(); public abstract String version(); + + public final String lsifTypedEncoding() { + return "maven " + encode(repoName()) + " " + encode(version()); + } + + private String encode(String value) { + if (value.contains(" ")) { + return "`" + value + "`"; + } + return value; + } } diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java index 77cdf151..55d2f594 100644 --- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java +++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java @@ -19,6 +19,7 @@ import java.util.function.Function; import java.util.jar.JarEntry; import java.util.jar.JarFile; +import java.util.regex.Pattern; public class PackageTable implements Function { @@ -30,8 +31,6 @@ public class PackageTable implements Function { private static final PathMatcher JAR_PATTERN = FileSystems.getDefault().getPathMatcher("glob:**.jar"); - private static final PathMatcher CLASS_PATTERN = - FileSystems.getDefault().getPathMatcher("glob:**.class"); public PackageTable(LsifSemanticdbOptions options, LsifWriter writer) throws IOException { this.writer = writer; @@ -51,10 +50,6 @@ public void writeMonikerPackage(int monikerId, Package pkg) { writer.emitPackageInformationEdge(monikerId, pkgId); } - public void writeImportedSymbol(String symbol, int monikerId) { - packageForSymbol(symbol).ifPresent(pkg -> writeMonikerPackage(monikerId, pkg)); - } - public Optional packageForSymbol(String symbol) { return SymbolDescriptor.toplevel(symbol) .flatMap( @@ -65,6 +60,7 @@ public Optional packageForSymbol(String symbol) { } private Optional packageForClassfile(String classfile) { + Package result = byClassfile.get(classfile); if (result != null) return Optional.of(result); if (!javaVersion.isJava8 && isJrtClassfile(classfile)) return Optional.of(javaVersion.pkg); diff --git a/lsif-semanticdb/src/main/protobuf/lsif-typed.proto b/lsif-semanticdb/src/main/protobuf/lsif-typed.proto new file mode 100644 index 00000000..9469a81b --- /dev/null +++ b/lsif-semanticdb/src/main/protobuf/lsif-typed.proto @@ -0,0 +1,366 @@ +// An index contains one or more pieces of information about a given piece of +// source code or software artifact. Complementary information can be merged +// together from multiple sources to provide a unified code intelligence +// experience. +// +// Programs producing a file of this format is an "indexer" and may operate +// somewhere on the spectrum between precision, such as indexes produced by +// compiler-backed indexers, and heurstics, such as indexes produced by local +// syntax-directed analysis for scope rules. + +syntax = "proto3"; + +package lib.codeintel.lsif_typed; + +option go_package = "github.com/sourcegraph/sourcegraph/lib/codeintel/lsif_typed/"; + +// Index represents a complete LSIF index for a workspace this is rooted at a +// single directory. An Index message payload can have a large memory footprint +// and it's therefore recommended to emit and consume an Index payload one field +// value at a time. To permit streaming consumption of an Index payload, the +// `metadata` field must appear at the start of the stream and must only appear +// once in the stream. Other field values may appear in any order. +message Index { + // Metadata about this index. + Metadata metadata = 1; + // Documents that belong to this index. + repeated Document documents = 2; + // (optional) Symbols that are referenced from this index but are defined in + // an external package (a separate `Index` message). Leave this field empty + // if you assume the external package will get indexed separately. If the + // external package won't get indexed for some reason then you can use this + // field to provide hover documentation for those external symbols. + repeated SymbolInformation external_symbols = 3; +} + +message Metadata { + // Which version of this protocol was used to generate this index? + ProtocolVersion version = 1; + // Information about the tool that produced this index. + ToolInfo tool_info = 2; + // URI-encoded absolute path to the root directory of this index. All + // documents in this index must appear in a subdirectory of this root + // directory. + string project_root = 3; + // Text encoding of the source files on disk that are referenced from + // `Document.relative_path`. + TextEncoding text_document_encoding = 4; +} + +enum ProtocolVersion { + UnspecifiedProtocolVersion = 0; +} + +enum TextEncoding { + UnspecifiedTextEncoding = 0; + UTF8 = 1; + UTF16 = 2; +} + +message ToolInfo { + // Name of the indexer that produced this index. + string name = 1; + // Version of the indexer that produced this index. + string version = 2; + // Command-line arguments that were used to invoke this indexer. + repeated string arguments = 3; +} + +// Document defines the metadata about a source file on disk. +message Document { + // (Required) Path to the text document relative to the directory supplied in + // the associated `Metadata.project_root`. Not URI-encoded. This value should + // not begin with a directory separator. + string relative_path = 1; + // Occurrences that appear in this file. + repeated Occurrence occurrences = 2; + // Symbols that are defined within this document. + repeated SymbolInformation symbols = 3; +} + +// Symbol is similar to a URI, it identifies a class, method, or a local +// variable. `SymbolInformation` contains rich metadata about symbols such as +// the docstring. +// +// Symbol has a standardized string representation, which can be used +// interchangeably with `Symbol`. The syntax for Symbol is the following: +// ``` +// ::= ' ' ' ' { } | 'local ' +// ::= ' ' ' ' +// ::= any UTF-8, escape spaces with double space. +// ::= same as above +// ::= same as above +// ::= same as above +// ::= | | | | | | +// ::= '/' +// ::= '#' +// ::= '.' +// ::= ':' +// ::= '(' ').' +// ::= '[' ']' +// ::= '(' ')' +// ::= +// ::= +// ::= | +// ::= { } +// ::= '_' | '+' | '-' | '$' | ASCII letter or digit +// ::= '`' { } '`' +// ::= any UTF-8 character, escape backticks with double backtick. +// ``` +message Symbol { + string scheme = 1; + Package package = 2; + repeated Descriptor descriptors = 3; +} + +message Package { + string manager = 1; + string name = 2; + string version = 3; +} + +message Descriptor { + enum Suffix { + UnspecifiedSuffix = 0; + Package = 1; + Type = 2; + Term = 3; + Method = 4; + TypeParameter = 5; + Parameter = 6; + // Can be used for any purpose. + Meta = 7; + Local = 8; + } + string name = 1; + string disambiguator = 2; + Suffix suffix = 3; +} + +// SymbolInformation defines metadata about a symbol, such as the symbol's +// docstring or what package it's defined it. +message SymbolInformation { + // Identifier of this symbol, which can be referenced from `Occurence.symbol`. + // The string must be formatted according to the grammar in `Symbol`. + string symbol = 1; + // (optional, but strongly recommended) The markdown-formatted documentation + // for this symbol. This field is repeated to allow different kinds of + // documentation. For example, it's nice to include both the signature of a + // method (parameters and return type) along with the accompanying docstring. + repeated string documentation = 3; + // (optional) Relationships to other symbols (e.g., implements, type definition). + repeated Relationship relationships = 4; +} + +message Relationship { + string symbol = 1; + // When resolving "Find references", this field documents what other symbols + // should be included together with this symbol. For example, consider the + // following TypeScript code that defines two symbols `Animal#sound()` and + // `Dog#sound()`: + // ```ts + // interface Animal { + // ^^^^^^ definition Animal# + // sound(): string + // ^^^^^ definition Animal#sound() + // } + // class Dog implements Animal { + // ^^^ definition Dog#, implementation_symbols = Animal# + // public sound(): string { return "woof" } + // ^^^^^ definition Dog#sound(), references_symbols = Animal#sound(), implementation_symbols = Animal#sound() + // } + // const animal: Animal = new Dog() + // ^^^^^^ reference Animal# + // console.log(animal.sound()) + // ^^^^^ reference Animal#sound() + // ``` + // Doing "Find references" on the symbol `Animal#sound()` should return + // references to the `Dog#sound()` method as well. Vice-versa, doing "Find + // references" on the `Dog#sound()` method should include references to the + // `Animal#sound()` method as well. + bool is_reference = 2; + // Similar to `references_symbols` but for "Go to implementation". + // It's common for the `implementation_symbols` and `references_symbols` fields + // have the same values but that's not always the case. + // In the TypeScript example above, observe that `implementation_symbols` has + // the value `"Animal#"` for the "Dog#" symbol while `references_symbols` is + // empty. When requesting "Find references" on the "Animal#" symbol we don't + // want to include references to "Dog#" even if "Go to implementation" on the + // "Animal#" symbol should navigate to the "Dog#" symbol. + bool is_implementation = 3; + // Similar to `references_symbols` but for "Go to type definition". + bool is_type_definition = 4; +} + +// SymbolRole declares what "role" a symbol has in an occurrence. A role is +// encoded as a bitmask where each bit represents a different role. For example, +// to determine if the `Import` role is set test whether the second bit of the +// enum value is defined. In psuedo-code, this can be implemented with the +// logic: `const isImportRole = (role.value & SymbolRole.Import.value) > 0`. +enum SymbolRole { + UnspecifiedSymbolRole = 0; + // Is the symbol defined here? If not, then this is a symbol reference. + Definition = 0x1; + // Is the symbol imported here? + Import = 0x2; + // Is the symbol written here? + WriteAccess = 0x4; + // Is the symbol read here? + ReadAccess = 0x8; + // Is the symbol in generated code? + Generated = 0x10; + // Is the symbol in test code? + Test = 0x20; +} + +enum SyntaxKind { + UnspecifiedSyntaxKind = 0; + + // `+`, `*`, etc. + Operator = 1; + + // Comment, including comment markers and text + Comment = 2; + + // `;` `.` `,` + PunctuationDelimiter = 3; + // (), {}, [] when used syntactically + PunctuationBracket = 4; + // `{}` within a string. + PunctuationSpecial = 5; + + // `if`, `else`, `return`, `class`, etc. + Keyword = 6; + + // non-specific variables, function calls, etc. + // In general, prefer more specific identifier kinds if possible. + Identifier = 7; + // Identifiers builtin to the language: `min`, `print` in Python. + BuiltinIdentifier = 8; + // Identifiers builtin to the language: `None` in Python, `nil` in Go. + NullIdentifier = 9; + // `xyz` in `const xyz = "hello"` + ConstantIdentifier = 10; + // `var X = "hello"` in Go + MutableGlobalIdentifier = 11; + // both parameter definition and references + ParameterIdentifier = 12; + // identifiers for variable definitions and references within a local scope + LocalIdentifier = 13; + // Used when identifier shadowes some other identifier within the scope + ShadowedIdentifier = 14; + // `package main` + ModuleIdentifier = 15; + // Macro references only, not definitions + MacroIdentifier = 16; + + // Literal strings: "Hello, world!" + StringLiteral = 17; + // ".*" in a string regex + StringLiteralRegex = 18; + // "\t", "\n" + StringLiteralEscape = 19; + // datetimes within strings, special words within a string + StringLiteralSpecial = 20; + // "key" in { "key": "value" } + StringLiteralKey = 21; + // 'c' or similar, in languages that differentiate strings and characters + CharacterLiteral = 22; + // Literal numbers, both floats and integers + NumericLiteral = 23; + // `true`, `false` + BooleanLiteral = 24; + + // Function definition only. + // Included because many editors highlight a function definition differently + // from function calls. + FunctionDefinition = 25; + // Macro definition only. + // Included because many editors highlight a macro definition differently + // from macro usages. + MacroDefinition = 26; + + // non-builtin types, including namespaces + TypeIdentifier = 27; + // builtin types only, such as `str` for Python or `int` in Go + BuiltinTypeIdentifier = 28; + + // Python decorators, c-like __attribute__ + AttributeIdentifier = 29; + + // Used for XML-like tags + Tag = 30; + // Attribute name in XML-like tags + TagAttribute = 31; + // Delimiters for XML-like tags + TagDelimiter = 32; +} + +// Occurrence associates a source position with a symbol and/or highlighting +// information. +message Occurrence { + // Source position of this occurrence. Must be exactly three or four + // elements: + // + // - Four elements: `[startLine, startCharacter, endLine, endCharacter]` + // - Three elements: `[startLine, startCharacter, endCharacter]`. The end line + // is inferred to have the same value as the start line. + // + // Line numbers and characters are always 0-based. Make sure to increment the + // line/character values before displaying them in an editor-like UI because + // editors conventionally use 1-based numbers. + // + // Historical note: the original draft of this schema had a `Range` message + // type with `start` and `end` fields of type `Position`, mirroring LSP. + // Benchmarks revealed that this encoding was inefficient and that we could + // reduce the total payload size of an index by 50% by using `repeated int32` + // instead. The `repeated int32` encoding is admittedly more embarrassing to + // work with in some programming languages but we hope the performance + // improvements make up for it. + repeated int32 range = 1; + // (optional) The symbol that appears at this position. See + // `SymbolInformation.symbol` for how to format symbols as strings. + string symbol = 2; + // (optional) Bitmask for what `SymbolRole` apply to this occurrence. See + // `SymbolRole` for how to read and write this field. + int32 symbol_roles = 3; + // (optional) Markdown-formatted documentation for this specific range. If + // empty, the `Symbol.documentation` field is used instead. One example + // where this field might be useful is when the symbol represents a generic + // function (with abstract type parameters such as `List`) and at this + // occurrence we know the exact values (such as `List`). + repeated string override_documentation = 4; + // (optional) What syntax highlighting class should be used for this range? + SyntaxKind syntax_kind = 5; + // Diagnostics that have been reported for this specific range. + repeated Diagnostic diagnostics = 6; +} + +// Represents a diagnostic, such as a compiler error or warning, which should be +// reported for a document. +message Diagnostic { + // Should this diagnostic be reported as an error, warning, info, or hint? + Severity severity = 1; + // Code of this diagnostic, which might appear in the user interface. + string code = 2; + // Message of this diagnostic. + string message = 3; + // Human-readable string describing the source of this diagnostic, e.g. + // 'typescript' or 'super lint'. + string source = 4; + repeated DiagnosticTag tags = 5; +} + +enum Severity { + UnspecifiedSeverity = 0; + Error = 1; + Warning = 2; + Information = 3; + Hint = 4; +} + +enum DiagnosticTag { + UnspecifiedDiagnosticTag = 0; + Unnecessary = 1; + Deprecated = 2; +} diff --git a/semanticdb-java/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbSymbols.java b/semanticdb-java/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbSymbols.java index f4e68205..d2677cb0 100644 --- a/semanticdb-java/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbSymbols.java +++ b/semanticdb-java/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbSymbols.java @@ -35,6 +35,10 @@ public static boolean isGlobal(String symbol) { return !isLocal(symbol); } + public static boolean isMethod(String symbol) { + return symbol.endsWith(")."); + } + /** * A SemanticDB symbol is composed from a list of "descriptors". * diff --git a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java index a50102cb..f88287c7 100644 --- a/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java +++ b/semanticdb-javac/src/main/java/com/sourcegraph/semanticdb_javac/SemanticdbVisitor.java @@ -108,10 +108,12 @@ private void emitSymbolInformation(Symbol sym, JCTree tree) { case ENUM: case CLASS: builder.setKind(Kind.CLASS); + builder.addAllOverriddenSymbols(semanticdbParentSymbols(sym, new ArrayList<>())); break; case INTERFACE: case ANNOTATION_TYPE: builder.setKind(Kind.INTERFACE); + builder.addAllOverriddenSymbols(semanticdbParentSymbols(sym, new ArrayList<>())); break; case FIELD: builder.setKind(Kind.FIELD); @@ -381,6 +383,31 @@ private int semanticdbSymbolInfoProperties(Symbol sym) { return properties; } + private List semanticdbParentSymbols(Symbol sym, List result) { + if (!(sym instanceof Symbol.ClassSymbol)) { + return result; + } + Symbol.ClassSymbol csym = (Symbol.ClassSymbol) sym; + if (csym.getSuperclass() != Type.noType) { + semanticdbParentSymbol(csym.getSuperclass().tsym, result); + } + for (Type iType : csym.getInterfaces()) { + semanticdbParentSymbol(iType.tsym, result); + } + return result; + } + + private void semanticdbParentSymbol(Symbol sym, List result) { + if (sym == null) { + return; + } + String ssym = semanticdbSymbol(sym); + if (!Objects.equals(ssym, SemanticdbSymbols.NONE)) { + result.add(ssym); + semanticdbParentSymbols(sym, result); + } + } + private List semanticdbOverrides(Symbol sym) { ArrayList overriddenSymbols = new ArrayList<>(); Set overriddenMethods = javacTypes.getOverriddenMethods(sym); diff --git a/tests/snapshots/src/main/scala/tests/LsifGraphSnapshotGenerator.scala b/tests/snapshots/src/main/scala/tests/LsifGraphSnapshotGenerator.scala index 47e7d249..88cf240d 100644 --- a/tests/snapshots/src/main/scala/tests/LsifGraphSnapshotGenerator.scala +++ b/tests/snapshots/src/main/scala/tests/LsifGraphSnapshotGenerator.scala @@ -8,6 +8,7 @@ import com.sourcegraph.io.DeleteVisitor import com.sourcegraph.lsif_java.LsifJava import com.sourcegraph.lsif_java.commands.SnapshotLsifCommand import com.sourcegraph.lsif_java.commands.SnapshotLsifCommand.IndexedLsif +import moped.reporters.ConsoleReporter import moped.testkit.FileLayout class LsifGraphSnapshotGenerator extends SnapshotGenerator { @@ -140,7 +141,8 @@ class LsifGraphSnapshotGenerator extends SnapshotGenerator { sourceroot ) val objects = SnapshotLsifCommand.parseInput(outputLsif) - val lsif = new IndexedLsif(outputLsif, objects, sourceroot) + val reporter = ConsoleReporter(System.out) + val lsif = new IndexedLsif(outputLsif, objects, sourceroot, reporter) runSuccessfully( List( "snapshot-lsif",