Skip to content

Commit

Permalink
Fuzzy indexing private repos
Browse files Browse the repository at this point in the history
Finishes #2
  • Loading branch information
lackac committed Jun 13, 2011
1 parent 1b9dfbe commit 8fa5777
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 16 deletions.
95 changes: 82 additions & 13 deletions ext/js/github.js
Expand Up @@ -66,53 +66,122 @@ window.GitHub = {
updateRepoIndex: function(callback) {
if (callback === undefined) callback = function() {}
// do not update if index is recent
if (localStorage.by_prefix_timestamp && (new Date().getTime()) - localStorage.by_prefix_timestamp < 86400000) {
if (localStorage.index_timestamp && (new Date().getTime()) - localStorage.index_timestamp < 86400000) {
return callback("index is recent");
}

this.fetchRepos(function(err, repos) {
if (err) return callback(err);
var index = {}, repoLookup = {};
var index = { by_prefix: {}, by_fuzzy: {} }, repoLookup = {};

function addToIndex(key, repo) {
if (index[key] === undefined) {
index[key] = [];
function addToIndex(index_type, key, repo) {
if (index[index_type][key] === undefined) {
index[index_type][key] = [];
}
var id = repo.owner + '/' + repo.name;
if (!repoLookup[id]) repoLookup[id] = id + ' ('+repo.watchers+') ' + repo.description;
index[key].push(id);
index[index_type][key].push(id);
}

repos.sort(function(a, b) {
return a.pushed_at < b.pushed_at ? 1 : a.pushed_at > b.pushed_at ? -1 : 0;
}).forEach(function(repo) {
var i, j, l1, l2;

// by_prefix index
// all prefixes of name
for (i = 1, l1 = repo.name.length; i <= l1; i++) {
addToIndex(repo.name.substr(0, i).toLowerCase(), repo);
addToIndex("by_prefix", repo.name.substr(0, i).toLowerCase(), repo);
}
// all prefixes of owner
for (i = 1, l1 = repo.owner.length; i <= l1; i++) {
var prefix = repo.owner.substr(0, i).toLowerCase();
addToIndex(prefix, repo);
addToIndex("by_prefix", prefix, repo);
// combine with all prefixes of name
for (j = 1, l2 = repo.name.length; j <= l2; j++) {
addToIndex(prefix + '/' + repo.name.substr(0, j), repo);
addToIndex("by_prefix", prefix + '/' + repo.name.substr(0, j), repo);
}
}

// by_fuzzy index
var id = repo.owner + ':' + repo.name;
if (id.indexOf('_') === -1 && id.indexOf('-') === -1) id = id.replace(/([A-Z][^A-Z])/g, '-$1');
var words = id.toLowerCase().split(/[^a-z0-9]+/).filter(function(s) { return s.length > 0; }),
cmbs = combinations(words, 4);
for (i = 0, l1 = cmbs.length; i < l1; i++) {
var combination = cmbs[i], max = combination.length < 4 ? undefined : 4,
prevars = prefixVariations(combination, max);
for (j = 0, l2 = prevars.length; j < l2; j++) {
addToIndex("by_fuzzy", prevars[j], repo);
}
}
if (words.length > 4 || combination.length > 3 && words.some(function(w) { return w.length > 4; })) {
addToIndex("by_fuzzy", id.toLowerCase().replace(/[^a-z0-9]/g, ''), repo);
}
});
localStorage.by_prefix = JSON.stringify(index);
localStorage.by_prefix_timestamp = new Date().getTime();
localStorage.by_prefix = JSON.stringify(index.by_prefix);
localStorage.by_fuzzy = JSON.stringify(index.by_fuzzy);
localStorage.index_timestamp = new Date().getTime();
localStorage.repos = JSON.stringify(repoLookup);
callback(null, localStorage.by_prefix_timestamp);
callback(null, localStorage.index_timestamp);
});
},

clearRepoIndex: function() {
delete localStorage.by_prefix;
delete localStorage.by_prefix_timestamp;
delete localStorage.by_fuzzy;
delete localStorage.index_timestamp;
delete localStorage.repos;
}
}


// compiled functions from the by_fuzzy couchdb view
// TODO: find a way to keep these db and local indexing function in sync
function combinations(arr, max) {
var all, combine, i;
combine = function(n, src, got, all) {
var item, j, _len;
if (n === 0) {
if (got.length > 0) {
all[all.length] = got;
}
return;
}
for (j = 0, _len = src.length; j < _len; j++) {
item = src[j];
combine(n - 1, src.slice(j + 1), got.concat([item]), all);
}
};
all = [];
if (!(max && max < arr.length)) {
max = arr.length;
}
for (i = 1; 1 <= max ? i <= max : i >= max; 1 <= max ? i++ : i--) {
combine(i, arr, [], all);
}
return all;
};
function prefixVariations(words, max, got) {
var all, current, i, max_length, _results;
if (got == null) {
got = [];
}
current = words[0];
max_length = max && max < current.length ? max : current.length;
if (words.length > 1) {
all = [];
for (i = 1; 1 <= max_length ? i <= max_length : i >= max_length; 1 <= max_length ? i++ : i--) {
all = all.concat(prefixVariations(words.slice(1), max, got.concat([current.substr(0, i)])));
}
return all;
} else {
_results = [];
for (i = 1; 1 <= max_length ? i <= max_length : i >= max_length; 1 <= max_length ? i++ : i--) {
_results.push(got.concat([current.substr(0, i)]).join(''));
}
return _results;
}
};

})();
6 changes: 3 additions & 3 deletions ext/js/options.js
Expand Up @@ -23,8 +23,8 @@ $(function() {
});

function updateCacheTimestamp() {
if (localStorage.by_prefix_timestamp) {
var t = new Date(localStorage.by_prefix_timestamp - 0);
if (localStorage.index_timestamp) {
var t = new Date(localStorage.index_timestamp - 0);
$('#cache-status span').html('generated at <em>'+t.toString().replace(/ GMT.*/, '')+'</em>');
} else {
$('#cache-status span').text('has not been generated');
Expand All @@ -33,7 +33,7 @@ $(function() {

$('#cache-status button').bind('click', function() {
$('#cache-status span').html('generated at <img src="images/loading.gif"/>');
delete localStorage.by_prefix_timestamp;
delete localStorage.index_timestamp;
GitHub.updateRepoIndex(function(err) {
if (err) {
$('#cache-status span').html('generation failed');
Expand Down

0 comments on commit 8fa5777

Please sign in to comment.