Skip to content

Commit

Permalink
[Code] always read file content from the original bare repo in indexer (
Browse files Browse the repository at this point in the history
elastic#40176) (elastic#40648)

* [Code] reenable code mocha tests

* skip multi node test

* [Code] read file content from the original bare repo if fails to open workspace in indexr

* More refactoring

* Add more unit tests

* Add unit tests

* always read file content from the original bare repo

* remove catching the workspace prepartion errors in indexer
  • Loading branch information
mw-ding committed Jul 9, 2019
1 parent 46ee4cb commit 527524b
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 103 deletions.
1 change: 0 additions & 1 deletion x-pack/legacy/plugins/code/model/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ export interface IndexRequest {

// The request for LspIndexer
export interface LspIndexRequest extends IndexRequest {
localRepoPath: string; // The repository local file path
filePath: string; // The file path within the repository
revision: string; // The revision of the current repository
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ const esClient = {

function prepareProject(url: string, p: string) {
const opts: CloneOptions = {
bare: 1,
fetchOpts: {
callbacks: {
certificateCheck: () => 0,
Expand Down Expand Up @@ -179,7 +180,7 @@ describe('lsp_incremental_indexer unit tests', () => {
const indexer = new LspIncrementalIndexer(
'github.com/elastic/TypeScript-Node-Starter',
'HEAD',
'6206f643',
'67002808',
lspservice,
serverOptions,
gitOps,
Expand All @@ -193,19 +194,20 @@ describe('lsp_incremental_indexer unit tests', () => {
assert.strictEqual(createSpy.callCount, 0);
assert.strictEqual(putAliasSpy.callCount, 0);

// DeletebyQuery is called 8 times (1 file + 1 symbol reuqests per diff item)
// for 4 MODIFIED items
assert.strictEqual(deleteByQuerySpy.callCount, 8);
// DeletebyQuery is called 10 times (1 file + 1 symbol reuqests per diff item)
// for 5 MODIFIED items
assert.strictEqual(deleteByQuerySpy.callCount, 10);

// There are 4 MODIFIED items and 1 ADDED item. 1 file + 1 symbol + 1 reference
// = 3 objects to index for each item. Total doc indexed should be 5 * 3 = 15,
// There are 5 MODIFIED items and 1 ADDED item. Only 1 file is in supported
// language. Each file with supported language has 1 file + 1 symbol + 1 reference.
// Total doc indexed should be 8 * 3 = 15,
// which can be fitted into a single batch index.
assert.strictEqual(bulkSpy.callCount, 2);
let total = 0;
for (let i = 0; i < bulkSpy.callCount; i++) {
total += bulkSpy.getCall(i).args[0].body.length;
}
assert.strictEqual(total, 15 * 2);
assert.strictEqual(total, 8 * 2);

// @ts-ignore
}).timeout(20000);
Expand Down Expand Up @@ -235,7 +237,7 @@ describe('lsp_incremental_indexer unit tests', () => {
const indexer = new LspIncrementalIndexer(
'github.com/elastic/TypeScript-Node-Starter',
'HEAD',
'6206f643',
'67002808',
lspservice,
serverOptions,
gitOps,
Expand Down Expand Up @@ -282,7 +284,7 @@ describe('lsp_incremental_indexer unit tests', () => {
const indexer = new LspIncrementalIndexer(
'github.com/elastic/TypeScript-Node-Starter',
'HEAD',
'6206f643',
'67002808',
lspservice,
serverOptions,
gitOps,
Expand All @@ -295,8 +297,7 @@ describe('lsp_incremental_indexer unit tests', () => {
repoUri: '',
filePath: 'package.json',
revision: 'HEAD',
originRevision: '6206f643',
localRepoPath: '',
originRevision: '67002808',
kind: DiffKind.MODIFIED,
});

Expand All @@ -305,16 +306,15 @@ describe('lsp_incremental_indexer unit tests', () => {
assert.strictEqual(createSpy.callCount, 0);
assert.strictEqual(putAliasSpy.callCount, 0);

// There are 2 items after the checkpoint. 1 file
// + 1 symbol + 1 ref = 3 objects to be indexed for each item. Total doc
// indexed should be 3 * 2 = 6, which can be fitted into a single batch index.
// There are 2 items after the checkpoint. No items is in supported language.
// Total doc indexed should be 3 * 1 = 3, which can be fitted into a single batch index.
assert.strictEqual(bulkSpy.callCount, 2);
let total = 0;
for (let i = 0; i < bulkSpy.callCount; i++) {
total += bulkSpy.getCall(i).args[0].body.length;
}
assert.strictEqual(total, 6 * 2);
assert.strictEqual(deleteByQuerySpy.callCount, 2);
assert.strictEqual(total, 5 * 2);
assert.strictEqual(deleteByQuerySpy.callCount, 4);
// @ts-ignore
}).timeout(20000);
// @ts-ignore
Expand Down
2 changes: 1 addition & 1 deletion x-pack/legacy/plugins/code/server/__tests__/lsp_indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const esClient = {

function prepareProject(url: string, p: string) {
const opts: CloneOptions = {
bare: 1,
fetchOpts: {
callbacks: {
certificateCheck: () => 0,
Expand Down Expand Up @@ -314,7 +315,6 @@ describe('lsp_indexer unit tests', function(this: any) {
repoUri: '',
filePath: 'src/public/js/main.ts',
revision: 'HEAD',
localRepoPath: '',
});

// Expect EsClient deleteByQuery called 0 times for repository cleaning while
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
* you may not use this file except in compliance with the Elastic License.
*/

import fs from 'fs';
import util from 'util';
import path from 'path';

import { ProgressReporter } from '.';
import { Diff, DiffKind } from '../../common/git_diff';
import { toCanonicalUrl } from '../../common/uri_util';
Expand All @@ -25,7 +21,7 @@ import { LspService } from '../lsp/lsp_service';
import { ServerOptions } from '../server_options';
import { detectLanguage } from '../utils/detect_language';
import { LspIndexer } from './lsp_indexer';
import { DocumentIndexName, ReferenceIndexName, SymbolIndexName } from './schema';
import { DocumentIndexName, SymbolIndexName } from './schema';

export class LspIncrementalIndexer extends LspIndexer {
protected type: string = 'lsp_inc';
Expand Down Expand Up @@ -125,15 +121,10 @@ export class LspIncrementalIndexer extends LspIndexer {

protected async *getIndexRequestIterator(): AsyncIterableIterator<LspIncIndexRequest> {
try {
const { workspaceDir } = await this.lspService.workspaceHandler.openWorkspace(
this.repoUri,
HEAD
);
if (this.diff) {
for (const f of this.diff.files) {
yield {
repoUri: this.repoUri,
localRepoPath: workspaceDir,
filePath: f.path,
originPath: f.originPath,
// Always use HEAD for now until we have multi revision.
Expand Down Expand Up @@ -175,48 +166,25 @@ export class LspIncrementalIndexer extends LspIndexer {
}

private async handleAddedRequest(request: LspIncIndexRequest, stats: IndexStats) {
const { repoUri, revision, filePath, localRepoPath } = request;

const lspDocUri = toCanonicalUrl({ repoUri, revision, file: filePath, schema: 'git:' });
const symbolNames = new Set<string>();
const { repoUri, filePath } = request;

let content = '';
try {
const response = await this.lspService.sendRequest('textDocument/full', {
textDocument: {
uri: lspDocUri,
},
reference: this.options.enableGlobalReference,
});

if (response && response.result.length > 0) {
const { symbols, references } = response.result[0];
for (const symbol of symbols) {
await this.lspBatchIndexHelper.index(SymbolIndexName(repoUri), symbol);
symbolNames.add(symbol.symbolInformation.name);
}
stats.set(IndexStatsKey.Symbol, symbols.length);

for (const ref of references) {
await this.lspBatchIndexHelper.index(ReferenceIndexName(repoUri), ref);
}
stats.set(IndexStatsKey.Reference, references.length);
content = await this.getFileSource(request);
} catch (error) {
if ((error as Error).message === this.FILE_OVERSIZE_ERROR_MSG) {
// Skip this index request if the file is oversized
this.log.debug(this.FILE_OVERSIZE_ERROR_MSG);
return stats;
} else {
this.log.debug(`Empty response from lsp server. Skip symbols and references indexing.`);
// Rethrow the issue if for other reasons
throw error;
}
} catch (error) {
this.log.error(`Index symbols or references error. Skip to file indexing.`);
this.log.error(error);
}

const localFilePath = path.join(localRepoPath, filePath);
const lstat = util.promisify(fs.lstat);
const stat = await lstat(localFilePath);

const readLink = util.promisify(fs.readlink);
const readFile = util.promisify(fs.readFile);
const content = stat.isSymbolicLink()
? await readLink(localFilePath, 'utf8')
: await readFile(localFilePath, 'utf8');
const { symbolNames, symbolsLength, referencesLength } = await this.execLspIndexing(request);
stats.set(IndexStatsKey.Symbol, symbolsLength);
stats.set(IndexStatsKey.Reference, referencesLength);

const language = await detectLanguage(filePath, Buffer.from(content));
const body: Document = {
Expand Down Expand Up @@ -269,17 +237,14 @@ export class LspIncrementalIndexer extends LspIndexer {
}

private async handleModifiedRequest(request: LspIncIndexRequest, stats: IndexStats) {
const { kind, originRevision, originPath, repoUri, localRepoPath } = request;
const { originRevision, originPath } = request;

// 1. first delete all related indexed data
await this.handleDeletedRequest(
{
repoUri,
localRepoPath,
...request,
revision: originRevision,
filePath: originPath ? originPath : '',
kind,
originRevision,
},
stats
);
Expand Down
85 changes: 51 additions & 34 deletions x-pack/legacy/plugins/code/server/indexer/lsp_indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
* you may not use this file except in compliance with the Elastic License.
*/

import fs from 'fs';
import util from 'util';
import path from 'path';

import { ResponseError } from 'vscode-jsonrpc';

import { ProgressReporter } from '.';
Expand Down Expand Up @@ -103,16 +99,11 @@ export class LspIndexer extends AbstractIndexer {

protected async *getIndexRequestIterator(): AsyncIterableIterator<LspIndexRequest> {
try {
const { workspaceDir } = await this.lspService.workspaceHandler.openWorkspace(
this.repoUri,
HEAD
);
const fileIterator = await this.gitOps.iterateRepo(this.repoUri, HEAD);
for await (const file of fileIterator) {
const filePath = file.path!;
const req: LspIndexRequest = {
repoUri: this.repoUri,
localRepoPath: workspaceDir,
filePath,
// Always use HEAD for now until we have multi revision.
// Also, since the workspace might get updated during the index, we always
Expand Down Expand Up @@ -200,32 +191,29 @@ export class LspIndexer extends AbstractIndexer {
}
}

protected async processRequest(request: LspIndexRequest): Promise<IndexStats> {
const stats: IndexStats = new Map<IndexStatsKey, number>()
.set(IndexStatsKey.Symbol, 0)
.set(IndexStatsKey.Reference, 0)
.set(IndexStatsKey.File, 0);
const { repoUri, revision, filePath, localRepoPath } = request;
protected FILE_OVERSIZE_ERROR_MSG = 'File size exceeds limit. Skip index.';
protected async getFileSource(request: LspIndexRequest): Promise<string> {
const { revision, filePath } = request;
// Always read file content from the original bare repo
const blob = await this.gitOps.fileContent(this.repoUri, filePath, revision);
if (blob.rawsize() > TEXT_FILE_LIMIT) {
throw new Error(this.FILE_OVERSIZE_ERROR_MSG);
}
return blob.content().toString();
}

this.log.debug(`Indexing ${filePath} at revision ${revision} for ${repoUri}`);
protected async execLspIndexing(
request: LspIndexRequest
): Promise<{
symbolNames: Set<string>;
symbolsLength: number;
referencesLength: number;
}> {
const { repoUri, revision, filePath } = request;
const lspDocUri = toCanonicalUrl({ repoUri, revision, file: filePath, schema: 'git:' });
const symbolNames = new Set<string>();

const localFilePath = path.join(localRepoPath, filePath);
const lstat = util.promisify(fs.lstat);
const stat = await lstat(localFilePath);

if (stat.size > TEXT_FILE_LIMIT) {
this.log.debug(`File size exceeds limit. Skip index.`);
return stats;
}

const readLink = util.promisify(fs.readlink);
const readFile = util.promisify(fs.readFile);
const content = stat.isSymbolicLink()
? await readLink(localFilePath, 'utf8')
: await readFile(localFilePath, 'utf8');

let symbolsLength = 0;
let referencesLength = 0;
try {
const lang = detectLanguageByFilename(filePath);
// filter file by language
Expand All @@ -243,12 +231,12 @@ export class LspIndexer extends AbstractIndexer {
await this.lspBatchIndexHelper.index(SymbolIndexName(repoUri), symbol);
symbolNames.add(symbol.symbolInformation.name);
}
stats.set(IndexStatsKey.Symbol, symbols.length);
symbolsLength = symbols.length;

for (const ref of references) {
await this.lspBatchIndexHelper.index(ReferenceIndexName(repoUri), ref);
}
stats.set(IndexStatsKey.Reference, references.length);
referencesLength = references.length;
} else {
this.log.debug(`Empty response from lsp server. Skip symbols and references indexing.`);
}
Expand All @@ -269,6 +257,35 @@ export class LspIndexer extends AbstractIndexer {
}
}

return { symbolNames, symbolsLength, referencesLength };
}

protected async processRequest(request: LspIndexRequest): Promise<IndexStats> {
const stats: IndexStats = new Map<IndexStatsKey, number>()
.set(IndexStatsKey.Symbol, 0)
.set(IndexStatsKey.Reference, 0)
.set(IndexStatsKey.File, 0);
const { repoUri, revision, filePath } = request;
this.log.debug(`Indexing ${filePath} at revision ${revision} for ${repoUri}`);

let content = '';
try {
content = await this.getFileSource(request);
} catch (error) {
if ((error as Error).message === this.FILE_OVERSIZE_ERROR_MSG) {
// Skip this index request if the file is oversized
this.log.debug(this.FILE_OVERSIZE_ERROR_MSG);
return stats;
} else {
// Rethrow the issue if for other reasons
throw error;
}
}

const { symbolNames, symbolsLength, referencesLength } = await this.execLspIndexing(request);
stats.set(IndexStatsKey.Symbol, symbolsLength);
stats.set(IndexStatsKey.Reference, referencesLength);

const language = await detectLanguage(filePath, Buffer.from(content));
const body: Document = {
repoUri,
Expand Down

0 comments on commit 527524b

Please sign in to comment.