Skip to content
This repository has been archived by the owner on Oct 13, 2021. It is now read-only.

Commit

Permalink
Merge reduction of memory use in JS plugin. Close #553.
Browse files Browse the repository at this point in the history
  • Loading branch information
erikrose committed Jun 1, 2016
2 parents 3cb7441 + 3e5f0e4 commit 97b3f38
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 73 deletions.
5 changes: 0 additions & 5 deletions dxr/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,8 +487,6 @@ def span_to_lines((kv, span)):
warn('Bad Extent: end.row < start.row: %s < %s' %
(span.end.row, span.start.row))
else:
num_rows = span.end.row - span.start.row

# TODO: There are a lot of Nones used as slice bounds below. Do we
# ever translate them back into char offsets? If not, does the
# highlighter or anything else choke on them?
Expand All @@ -501,7 +499,6 @@ def span_to_lines((kv, span)):
yield (kv, 0, span.end.col), span.end.row



def split_into_lines(triples):
"""Split a bunch of (key, mapping, extent) triples into more triples
than those, with each one contained in a line.
Expand All @@ -519,8 +516,6 @@ def _split_one((key, mapping, extent)):
warn('Bad extent: end.row < start.row: %s < %s' %
(extent.end.row, extent.start.row))
else:
num_rows = extent.end.row - extent.start.row

# TODO: There are a lot of Nones used as slice bounds below. Do we
# ever translate them back into char offsets? If not, does the
# highlighter or anything else choke on them?
Expand Down
40 changes: 23 additions & 17 deletions dxr/plugins/js/analyze_js/analyze_file.js
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ const Analyzer = {
break;

default:
console.log(`In ${fileIndex}, Unexpected statement: ${stmt.type} ${JSON.stringify(stmt)}`);
console.warn(`In ${fileIndex}, Unexpected statement: ${stmt.type} ${JSON.stringify(stmt)}`);
break;
}
},
Expand Down Expand Up @@ -388,7 +388,7 @@ const Analyzer = {

// Handle an expression by dispatching based on its type.
expression(expr) {
if (!expr) console.log(Error().stack);
if (!expr) console.warn(Error().stack);

switch (expr.type) {
case "Identifier":
Expand Down Expand Up @@ -613,8 +613,8 @@ const Analyzer = {
break;

default:
console.log(Error().stack);
console.log(`In ${fileIndex}, Unexpected expression ${expr.type}: ${JSON.stringify(expr)}`);
console.warn(Error().stack);
console.warn(`In ${fileIndex}, Unexpected expression ${expr.type}: ${JSON.stringify(expr)}`);
break;
}
},
Expand All @@ -635,7 +635,7 @@ const Analyzer = {
// Handle a pattern-matching assignment by dispatching on type.
pattern(pat) {
if (!pat) {
console.log(Error().stack);
console.warn(Error().stack);
}

switch (pat.type) {
Expand Down Expand Up @@ -671,17 +671,16 @@ const Analyzer = {
break;

default:
console.log(`In ${fileIndex}, Unexpected pattern: ${pat.type} ${JSON.stringify(pat)}`);
console.warn(`In ${fileIndex}, Unexpected pattern: ${pat.type} ${JSON.stringify(pat)}`);
break;
}
}
};

// Attempt to comment out some mozilla-specific preprocessor headers.
function preprocess(text, comment)
function preprocess(lines, comment)
{
let substitution = false;
const lines = text.split("\n");
const preprocessedLines = [];
const branches = [true];
for (let i = 0; i < lines.length; i++) {
Expand Down Expand Up @@ -729,27 +728,34 @@ function preprocess(text, comment)
}
}

return preprocessedLines.join("\n");
return preprocessedLines.join('\n');
}

function analyzeJS(filepath, relpath, tempFilepath)
{
fileIndex = relpath;
nextSymId = 0;
outLines = [];
const text = preprocess(String(fs.readFileSync(filepath)), line => "//" + line);
const text = String(fs.readFileSync(filepath));
const lines = text.split('\n');
// With files this large we currently risk running out of memory in the
// indexer, so we skip them. TODO: fix the issue and disable this check.
if (lines.length >= 100000) {
console.warn(`Skipping ${filepath} because length of ${lines.length} exceeds limit.`);
return;
}
try {
const ast = esprima.parse(text,
{loc: true,
source: path.basename(filepath),
line: 1,
tolerant: true,
sourceType: "script"});
const ast = esprima.parse(preprocess(lines, line => "//" + line),
{loc: true,
source: path.basename(filepath),
line: 1,
tolerant: true,
sourceType: "script"});
if (ast) {
Analyzer.program(ast);
}
} catch (e) {
console.log(fileIndex, e.name, e.message);
console.error(fileIndex, e.name, e.message);
}
fs.writeFileSync(tempFilepath, outLines.join('\n'));
}
Expand Down
4 changes: 2 additions & 2 deletions dxr/plugins/js/analyze_js/analyze_tree.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ function main() {
const tempPath = path.join(tempRoot, pathSegment);
ensurePath(tempPath);
analyzeFile(fullPath,
path.join(pathSegment, stat.name),
path.join(tempPath, stat.name + '.data'));
path.join(pathSegment, stat.name),
path.join(tempPath, stat.name + '.data'));
}
next();
});
Expand Down
100 changes: 51 additions & 49 deletions dxr/plugins/js/indexers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import namedtuple
from itertools import imap
import json
import subprocess
Expand All @@ -10,39 +11,19 @@
from dxr.utils import cumulative_sum


class ReadAnalysis(object):
def __init__(self, tree, lines, contents):
self.needles = []
self.refs = []
# Build map of line number -> byte offset to use for emitting refs.
self.offsets = list(cumulative_sum(imap(len, contents.splitlines(True))))
for line in lines:
row, (start, end) = line['loc']
qref = QualifiedRef(tree, (line['sym'], line['name'], line['type']), qualname=line['sym'])
typ = line['type']
if line['kind'] == 'use':
typ += '_ref'
self.yield_needle(typ, row, start, end, line['name'], line['sym'])
self.yield_ref(row, start, end, qref)

def yield_ref(self, row, start, end, ref):
offset = self.row_to_offset(row)
self.refs.append((offset + start, offset + end, ref))

def row_to_offset(self, line):
"""Return the byte offset in the file of given line number.
"""
return self.offsets[line - 1]
AnalysisSchema = namedtuple('AnalysisSchema', ['loc', 'kind', 'type', 'name', 'sym'])

def yield_needle(self, filter_name, line, start, end, name, qualname=None):
"""Add needle for qualified filter_name from line:start
to line:end with given name and qualname.
"""
# If qualname is not provided, then use name.
mapping = {'name': name, 'qualname': qualname or name}
self.needles.append((PLUGIN_NAME + '_' + filter_name,
mapping,
Extent(Position(row=line, col=start), Position(row=line, col=end))))

def to_analysis(line):
"""Convert a json-parsed line into an AnalysisSchema.
"""
row, col = line['loc'].split(':', 1)
if '-' in col:
col = tuple(map(int, col.split('-', 1)))
else:
col = int(col), int(col)
line['loc'] = int(row), col
return AnalysisSchema(**line)


class TreeToIndex(dxr.indexers.TreeToIndex):
Expand Down Expand Up @@ -72,31 +53,52 @@ def __init__(self, path, contents, plugin_name, tree):
self.analysis_path = join(join(join(tree.temp_folder, 'plugins/js'),
relpath(dirname(self.absolute_path()), tree.source_folder)),
basename(path) + '.data')
lines = []
# All lines from the analysis output file.
self.lines = []
# Map of line number -> byte offset to use for emitting refs.
self.offsets = []
if self.is_interesting():
with open(self.analysis_path) as analysis:
lines = self.parse_analysis(analysis.readlines())
lines = sorted(lines, key=lambda x: x['loc'])
self.analyzer = ReadAnalysis(tree, lines, contents)
self.lines = sorted((self.parse_analysis(line) for line in analysis), key=lambda x: x.loc)
self.offsets = list(cumulative_sum(imap(len, contents.splitlines(True))))

def is_interesting(self):
return exists(self.analysis_path)

def parse_analysis(self, lines):
def parse_loc(line):
if 'loc' in line:
row, col = line['loc'].split(':', 1)
if '-' in col:
col = tuple(map(int, col.split('-', 1)))
else:
col = int(col), int(col)
line['loc'] = int(row), col
return line
def parse_analysis(self, line):
"""Convert JSON line string into a AnalysisSchema object.
"""
return json.loads(line, object_hook=to_analysis)

return (parse_loc(json.loads(line)) for line in lines)
def build_ref(self, row, start, end, ref):
"""Create a 3-tuple from given line, start and end columns, and ref.
"""
# Offset table is 0-indexed, line numbers are 1-indexed.
offset = self.offsets[row - 1]
return offset + start, offset + end, ref

def build_needle(self, filter_name, line, start, end, name, qualname=None):
"""Create a needle mapping for the given filter, line, start and end
columns, and name.
"""
# If qualname is not provided, then use name.
mapping = {'name': name, 'qualname': qualname or name}
return (PLUGIN_NAME + '_' + filter_name, mapping,
Extent(Position(row=line, col=start), Position(row=line, col=end)))

def needles_by_line(self):
return iterable_per_line(with_start_and_end(split_into_lines(self.analyzer.needles)))
def all_needles():
for line in self.lines:
row, (start, end) = line.loc
typ = line.type
if line.kind == 'use':
typ += '_ref'
yield self.build_needle(typ, row, start, end, line.name, line.sym)

return iterable_per_line(with_start_and_end(all_needles()))

def refs(self):
return self.analyzer.refs
for line in self.lines:
row, (start, end) = line.loc
qref = QualifiedRef(self.tree, (line.sym, line.name, line.type), qualname=line.sym)
yield self.build_ref(row, start, end, qref)

0 comments on commit 97b3f38

Please sign in to comment.