From ca2150da02f5e2b23ee02d8493293aae53d170cf Mon Sep 17 00:00:00 2001 From: Joshua Cranmer Date: Sat, 17 Aug 2013 15:40:58 -0500 Subject: [PATCH] Add the omniglot plugin, which can provide links to blame for both hg and git. This changeset is a partial fix for bug 842641. --HG-- extra : rebase_source : d99e59def249fe23ff99bab49c783e6b791e3752 --- dxr/plugins/omniglot/htmlifier.py | 261 ++++++++++++++++++++++++++++++ dxr/plugins/omniglot/indexer.py | 13 ++ dxr/plugins/omniglot/makefile | 3 + 3 files changed, 277 insertions(+) create mode 100644 dxr/plugins/omniglot/htmlifier.py create mode 100644 dxr/plugins/omniglot/indexer.py create mode 100644 dxr/plugins/omniglot/makefile diff --git a/dxr/plugins/omniglot/htmlifier.py b/dxr/plugins/omniglot/htmlifier.py new file mode 100644 index 000000000..b92f22b28 --- /dev/null +++ b/dxr/plugins/omniglot/htmlifier.py @@ -0,0 +1,261 @@ +import os +import subprocess +import urlparse + +import dxr.plugins + +"""Omniglot - Speaking all commonly-used version control systems. +At present, this plugin is still under development, so not all features are +fully implemented. + +Omniglot first scans the project directory looking for the hallmarks of a VCS +(such as the .hg or .git directory). It also looks for these in parent +directories in case DXR is only parsing a fraction of the repository. Once this +information is found, it attempts to extract upstream information about the +repository. From this information, it builds the necessary information to +reproduce the links. + +Currently supported VCSes and upstream views: +- git (github) +- mercurial (hgweb) + +Todos: +- add gitweb support for git +- add cvs, svn, bzr support +- produce in-DXR blame information using VCSs +- check if the mercurial paths are specific to Mozilla's customization or not. +""" + +# Global variables +tree = None +source_repositories = {} + +class VCS(object): + """A class representing an abstract notion of a version-control system. + In general, all path arguments to query methods should be normalized to be + relative to the root directory of the VCS. + """ + + def __init__(self, root): + self.root = root + self.untracked_files = set() + + def get_root_dir(self): + """Return the directory that is at the root of the VCS.""" + return self.root + + def get_vcs_name(self): + """Return a recognizable name for the VCS.""" + return type(self).__name__ + + def invoke_vcs(self, args): + """Return the result of invoking said command on the repository, with + the current working directory set to the root directory. + """ + return subprocess.check_output(args, cwd=self.get_root_dir()) + + def is_tracked(self, path): + """Does the repository track this file?""" + return path not in self.untracked_files + + def get_rev(self, path): + """Return a human-readable revision identifier for the repository.""" + raise NotImplemented + + def generate_log(self, path): + """Return a URL for a page that lists revisions for this file.""" + raise NotImplemented + + def generate_blame(self, path): + """Return a URL for a page that lists source annotations for lines in + this file. + """ + raise NotImplemented + + def generate_diff(self, path): + """Return a URL for a page that shows the last change made to this file. + """ + raise NotImplemented + + def generate_raw(self, path): + """Return a URL for a page that returns a raw copy of this file.""" + raise NotImplemented + + +class Mercurial(VCS): + def __init__(self, root): + super(Mercurial, self).__init__(root) + # Find the revision + self.revision = self.invoke_vcs(['hg', 'id', '-i']).strip() + # Sometimes hg id returns + at the end. + if self.revision.endswith("+"): + self.revision = self.revision[:-1] + + # Make and normalize the upstream URL + upstream = urlparse.urlparse(self.invoke_vcs(['hg', 'paths', 'default']).strip()) + recomb = list(upstream) + if upstream.scheme == 'ssh': + recomb[0] == 'http' + recomb[1] = upstream.hostname # Eliminate any username stuff + if not upstream.path.endswith('/'): + recomb[2] += '/' # Make sure we have a '/' on the end + recomb[3] = recomb[4] = recomb[5] = '' # Just those three + self.upstream = urlparse.urlunparse(recomb) + + # Find all untracked files + self.untracked_files = set(line.split()[1] for line in + self.invoke_vcs(['hg', 'status', '-u', '-i']).split('\n')[:-1]) + + @staticmethod + def claim_vcs_source(path, dirs): + if '.hg' in dirs: + dirs.remove('.hg') + return Mercurial(path) + return None + + def get_rev(self, path): + return self.revision + + def generate_log(self, path): + return self.upstream + 'filelog/' + self.revision + '/' + path + + def generate_blame(self, path): + return self.upstream + 'annotate/' + self.revision + '/' + path + + def generate_diff(self, path): + return self.upstream + 'diff/' + self.revision + '/' + path + + def generate_raw(self, path): + return self.upstream + 'raw-file/' + self.revision + '/' + path + + +class Git(VCS): + def __init__(self, root): + super(Git, self).__init__(root) + self.untracked_files = set(line for line in + self.invoke_vcs(['git', 'ls-files', '-o']).split('\n')[:-1]) + self.revision = self.invoke_vcs(['git', 'rev-parse', 'HEAD']) + source_urls = self.invoke_vcs(['git', 'remote', '-v']).split('\n') + for src_url in source_urls: + name, url, _ = src_url.split() + if name == 'origin': + self.upstream = self.synth_web_url(url) + break + + @staticmethod + def claim_vcs_source(path, dirs): + if '.git' in dirs: + dirs.remove('.git') + return Git(path) + return None + + def get_rev(self, path): + return self.revision[:10] + + def generate_log(self, path): + return self.upstream + "/commits/" + self.revision + "/" + path + + def generate_blame(self, path): + return self.upstream + "/blame/" + self.revision + "/" + path + + def generate_diff(self, path): + # I really want to make this anchor on the file in question, but github + # doesn't seem to do that nicely + return self.upstream + "/commit/" + self.revision + + def generate_raw(self, path): + return self.upstream + "/raw/" + self.revision + "/" + path + + def synth_web_url(self, repo): + if repo.startswith("git@github.com:"): + self._is_github = True + return "https://github.com/" + repo[len("git@github.com:"):] + elif repo.startswith("git://github.com/"): + self._is_github = True + if repo.endswith(".git"): + repo = repo[:-len(".git")] + return "https" + repo[len("git"):] + raise Exception("I don't know what's going on") + + +every_vcs = [Mercurial, Git] + + +# Load global variables +def load(tree_, conn): + global tree, lookup_order + tree = tree_ + # Find all of the VCS's in the source directory + for cwd, dirs, files in os.walk(tree.source_folder): + for vcs in every_vcs: + attempt = vcs.claim_vcs_source(cwd, dirs) + if attempt is not None: + source_repositories[attempt.root] = attempt + + # It's possible that the root of the tree is not a VCS by itself, so walk up + # the hierarchy until we find a parent folder that is a VCS. If we can't + # find any, than no VCSs exist for the top-level of this repository. + directory = tree.source_folder + while directory != '/' and directory not in source_repositories: + directory = os.path.dirname(directory) + for vcs in every_vcs: + attempt = vcs.claim_vcs_source(directory, os.listdir(directory)) + if attempt is not None: + source_repositories[directory] = attempt + # Note: we want to make sure that we look up source repositories by deepest + # directory first. + lookup_order = source_repositories.keys() + lookup_order.sort(key=len) + + +def find_vcs_for_file(path): + """Given an absolute path, find a source repository we know about that + claims to track that file. + """ + for directory in lookup_order: + # This seems to be the easiest way to find "is path in the subtree + # rooted at directory?" + if os.path.relpath(path, directory).startswith('..'): + continue + vcs = source_repositories[directory] + if vcs.is_tracked(os.path.relpath(path, vcs.get_root_dir())): + return vcs + return None + + +class LinksHtmlifier(object): + """Htmlifier which adds blame and external links to VCS web utilities.""" + def __init__(self, path): + if not os.path.isabs(path): + path = os.path.join(tree.source_folder, path) + self.vcs = find_vcs_for_file(path) + if self.vcs is not None: + self.path = os.path.relpath(path, self.vcs.get_root_dir()) + self.name = self.vcs.get_vcs_name() + + def refs(self): + return [] + + def regions(self): + return [] + + def annotations(self): + return [] + + def links(self): + if self.vcs is None: + yield 5, 'Untracked file', [] + return + def items(): + yield 'log', "Log", self.vcs.generate_log(self.path) + yield 'blame', "Blame", self.vcs.generate_blame(self.path) + yield 'diff', "Diff", self.vcs.generate_diff(self.path) + yield 'raw', "Raw", self.vcs.generate_raw(self.path) + yield 5, '%s (%s)' % (self.name, self.vcs.get_rev(self.path)), items() + + +def htmlify(path, text): + return LinksHtmlifier(path) + + +__all__ = dxr.plugins.htmlifier_exports() diff --git a/dxr/plugins/omniglot/indexer.py b/dxr/plugins/omniglot/indexer.py new file mode 100644 index 000000000..7e41d60e1 --- /dev/null +++ b/dxr/plugins/omniglot/indexer.py @@ -0,0 +1,13 @@ +import dxr.plugins + +# Nothing to do here, but we must implement indexer.py to explicitely declared +# that these functions are no-op. Otherwise DXR shall assume the file or the +# implementation is missing, and thus, something is badly wrong. + +def pre_process(tree, environ): + pass + +def post_process(tree, conn): + pass + +__all__ = dxr.plugins.indexer_exports() diff --git a/dxr/plugins/omniglot/makefile b/dxr/plugins/omniglot/makefile new file mode 100644 index 000000000..0f9684344 --- /dev/null +++ b/dxr/plugins/omniglot/makefile @@ -0,0 +1,3 @@ +build: +check: +clean: \ No newline at end of file