Skip to content
This repository has been archived by the owner on Oct 13, 2021. It is now read-only.

Commit

Permalink
When pulling a file out of a VCS revision, don't assume its containin…
Browse files Browse the repository at this point in the history
…g folder still exists. Fixes bug 1324026.

It may well have moved in the checkout's revision. So stop cd-ing to its containing folder. Instead, go into the deepest existing folder on its containing path, and run the VCS commands against the remainder of the relative path from there. Going into the deepest existing folder is a way to get the correct VCS invoked, even if there are nested repositories in the tree.

Refactor to create GenerativeTestCase, which should let us write these VCS test cases without tarring up an .hg or .git dir, which is hard to modify or examine and tempts us to couple too many tests together.
  • Loading branch information
erikrose committed Jan 4, 2017
1 parent a664b62 commit f3cc71a
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 60 deletions.
6 changes: 2 additions & 4 deletions dxr/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,7 @@ def raw_rev(tree, revision, path):

config = current_app.dxr_config
tree_config = config.trees[tree]
abs_path = join(tree_config.source_folder, path)
data = file_contents_at_rev(abs_path, revision)
data = file_contents_at_rev(tree_config.source_folder, path, revision)
if data is None:
raise NotFound
data_file = StringIO(data)
Expand Down Expand Up @@ -589,8 +588,7 @@ def rev(tree, revision, path):
"""
config = current_app.dxr_config
tree_config = config.trees[tree]
abs_path = join(tree_config.source_folder, path)
contents = file_contents_at_rev(abs_path, revision)
contents = file_contents_at_rev(tree_config.source_folder, path, revision)
if contents is not None:
image_rev = None
if is_binary_image(path):
Expand Down
85 changes: 53 additions & 32 deletions dxr/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from os.path import dirname, join
import re
from shutil import rmtree
import subprocess
from subprocess import check_call
import sys
from tempfile import mkdtemp
import unittest
Expand Down Expand Up @@ -42,6 +42,12 @@ def setup_class(cls):
def teardown_class(cls):
rmtree(cls._config_dir_path)

@classmethod
def code_dir(cls):
"""Return the path to the folder which typically contains the indexed
source code in tests with only one source tree."""
return join(cls._config_dir_path, 'code')

def app(self):
if not hasattr(self, '_app'):
self._app = make_app(self.config())
Expand Down Expand Up @@ -245,13 +251,13 @@ class DxrInstanceTestCase(TestCase):
@classmethod
def this_dir(cls):
"""Return the path to the dir containing the testcase class."""
# nose does some amazing magic that makes this work even if there are
# multiple test modules with the same name:
return dirname(sys.modules[cls.__module__].__file__)

@classmethod
def setup_class(cls):
"""Build the instance."""
# nose does some amazing magic that makes this work even if there are
# multiple test modules with the same name:
cls._config_dir_path = cls.this_dir()
chdir(cls._config_dir_path)
run('dxr index')
Expand All @@ -268,6 +274,43 @@ def config_input(cls, config_dir_path):
return file_text(join(cls._config_dir_path, 'dxr.config'))


class GenerativeTestCase(TestCase):
"""Container for tests whose source-code folders are generated rather than
living permanently in DXR's source tree.
You get one tree's source-code folder for free, and you can make more
yourself if you like.
"""
# Set this to True in a subclass to keep the generated instance around and
# host it on port 8000 so you can examine it:
stop_for_interaction = False

@classmethod
def setup_class(cls):
super(GenerativeTestCase, cls).setup_class()
code_path = cls.code_dir()
mkdir(code_path)
cls.generate_source()
for tree in cls.config().trees.itervalues():
index_and_deploy_tree(tree)
cls._es().refresh()

@classmethod
def teardown_class(cls):
if cls.stop_for_interaction:
print "Pausing for interaction at 0.0.0.0:8000..."
make_app(cls.config()).run(host='0.0.0.0', port=8000)
print "Cleaning up indices..."
cls._delete_es_indices()
super(GenerativeTestCase, cls).setup_class()

@classmethod
def generate_source(cls):
"""Generate any source code a subclass wishes."""


# TODO: Make into a GenerativeTestCase
class DxrInstanceTestCaseMakeFirst(DxrInstanceTestCase):
"""Test case which runs ``make`` before ``dxr index`` and ``make clean``
before ``dxr clean`` within a code directory and otherwise delegates to
Expand All @@ -279,18 +322,16 @@ class DxrInstanceTestCaseMakeFirst(DxrInstanceTestCase):
"""
@classmethod
def setup_class(cls):
build_dir = join(cls.this_dir(), 'code')
subprocess.check_call(['make'], cwd=build_dir)
check_call(['make'], cwd=join(cls.this_dir(), 'code'))
super(DxrInstanceTestCaseMakeFirst, cls).setup_class()

@classmethod
def teardown_class(cls):
build_dir = join(cls.this_dir(), 'code')
subprocess.check_call(['make', 'clean'], cwd=build_dir)
check_call(['make', 'clean'], cwd=join(cls.this_dir(), 'code'))
super(DxrInstanceTestCaseMakeFirst, cls).teardown_class()


class SingleFileTestCase(TestCase):
class SingleFileTestCase(GenerativeTestCase):
"""Container for tests that need only a single source file
You can express the source as a string rather than creating a whole bunch
Expand All @@ -300,32 +341,12 @@ class SingleFileTestCase(TestCase):
:cvar source_filename: The filename used for the source file
"""
# Set this to True in a subclass to keep the generated instance around and
# host it on port 8000 so you can examine it:
stop_for_interaction = False

source_filename = 'main'

@classmethod
def setup_class(cls):
"""Create a temporary DXR instance on the FS, and build it."""
super(SingleFileTestCase, cls).setup_class()
code_path = join(cls._config_dir_path, 'code')
mkdir(code_path)
_make_file(code_path, cls.source_filename, cls.source)

for tree in cls.config().trees.itervalues():
index_and_deploy_tree(tree)
cls._es().refresh()

@classmethod
def teardown_class(cls):
if cls.stop_for_interaction:
print "Pausing for interaction at 0.0.0.0:8000..."
make_app(cls.config()).run(host='0.0.0.0', port=8000)
print "Cleaning up indices..."
cls._delete_es_indices()
super(SingleFileTestCase, cls).setup_class()
def generate_source(cls):
"""Create a single file of source code on the FS."""
make_file(cls.code_dir(), cls.source_filename, cls.source)

def _source_for_query(self, s):
return (s.replace('<b>', '')
Expand Down Expand Up @@ -395,7 +416,7 @@ def single_result_eq(self, query, line_number):
'single')


def _make_file(path, filename, contents):
def make_file(path, filename, contents):
"""Make file ``filename`` within ``path``, full of unicode ``contents``."""
with open(join(path, filename), 'w') as file:
file.write(contents.encode('utf-8'))
Expand Down
94 changes: 72 additions & 22 deletions dxr/vcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from datetime import datetime
import marshal
import os
from os.path import relpath, join, split
from os.path import exists, join, realpath, relpath, split
from pkg_resources import resource_filename
import subprocess
import urlparse
Expand Down Expand Up @@ -90,9 +90,19 @@ def last_modified_date(self, path):
raise NotImplementedError

@classmethod
def get_contents(cls, path, revision, stderr=None):
"""Return contents of file at specified path at given revision, where path is an
absolute path."""
def get_contents(cls, working_dir, rel_path, revision, stderr=None):
"""Return contents of a file at a certain revision.
:arg working_dir: The working directory from which to run the VCS
command. Beware that the dirs which existed at the rev in question
may not exist in the checked-out rev. Also, you cannot blithely use
the root of the source folder, as there may be, for instance,
nested git repos in the tree.
:arg rel_path: The relative path to the file, from ``working_dir``
:arg revision: The revision at which to pull the file, in a
VCS-dependent format
"""
raise NotImplementedError

def display_rev(self, path):
Expand Down Expand Up @@ -186,9 +196,8 @@ def generate_log(self, path):
return "{}filelog/{}/{}".format(self.upstream, self.revision, path)

@classmethod
def get_contents(cls, path, revision, stderr=None):
head, tail = split(path)
return cls.invoke_vcs(['cat', '-r', revision, tail], head, stderr=stderr)
def get_contents(cls, working_dir, rel_path, revision, stderr=None):
return cls.invoke_vcs(['cat', '-r', revision, rel_path], working_dir, stderr=stderr)


class Git(Vcs):
Expand Down Expand Up @@ -285,9 +294,8 @@ def generate_log(self, path):
return "{}/commits/{}/{}".format(self.upstream, self.revision, path)

@classmethod
def get_contents(cls, path, revision, stderr=None):
head, tail = split(path)
return cls.invoke_vcs(['show', revision + ':./' + tail], head, stderr=stderr)
def get_contents(cls, working_dir, rel_path, revision, stderr=None):
return cls.invoke_vcs(['show', revision + ':./' + rel_path], working_dir, stderr=stderr)


class Perforce(Vcs):
Expand Down Expand Up @@ -354,15 +362,14 @@ def display_rev(self, path):
return '#' + info['haveRev']

@classmethod
def get_contents(cls, path, revision, stderr=None):
directory, filename = split(path)
env = os.environ
env["PWD"] = directory
def get_contents(cls, working_dir, rel_path, revision, stderr=None):
env = os.environ.copy()
env['PWD'] = working_dir
return subprocess.check_output([cls.command,
'print',
'-q',
filename + '@' + revision],
cwd=directory, env=env, stderr=stderr)
rel_path + '@' + revision],
cwd=working_dir, env=env, stderr=stderr)


every_vcs = [Mercurial, Git, Perforce]
Expand Down Expand Up @@ -405,21 +412,63 @@ def tree_to_repos(tree):
return ordered_sources


def file_contents_at_rev(abspath, revision):
"""Attempt to return the contents of a file at a specific revision."""
def _split_existent(abs_folder):
"""Split a path to a dir in two, with the first half consisting of the
longest segment that exists on the FS; the second, the remainder."""
existent = abs_folder
nonexistent = ''
while existent:
if exists(existent):
break
existent, non = split(existent)
nonexistent = join(non, nonexistent)
return existent, nonexistent


def _is_within(inner, outer):
"""Return whether path ``inner`` is contained by or identical with folder
``outer``."""
# The added slashes are meant to prevent wrong answers if outer='z/a' and
# inner='z/abc'.
return (realpath(inner) + '/').startswith(realpath(outer) + '/')


def file_contents_at_rev(source_folder, rel_file, revision):
"""Attempt to return the contents of a file at a specific revision.
If such a file is not found, return None.
:arg source_folder: The absolute path to the root of the source folder for
the tree we're talking about
:arg rel_file: The source-folder-relative path to a file
:arg revision: The VCS revision identifier, in a format defined by the VCS
"""
# Rather than keeping a memory-intensive VcsCache around in the web process
# (which we haven't measured; it might be okay, but I'm afraid), just keep
# stepping rootward in the FS hierarchy until we find an actually existing
# dir. Regardless of the method, the point is to work even on files whose
# containing dirs have been moved or renamed.
rel_folder, file = split(rel_file)
abs_folder = join(source_folder, rel_folder)
existent, nonexistent = _split_existent(abs_folder)

# Security check: don't serve files outside the source folder:
if not _is_within(existent, source_folder):
return None

with open(os.devnull, 'w') as devnull:
for cls in every_vcs:
try:
return cls.get_contents(abspath, revision, stderr=devnull)
return cls.get_contents(existent, join(nonexistent, file), revision, stderr=devnull)
except subprocess.CalledProcessError:
continue
return None


class VcsCache(object):
"""This class offers a way to obtain Vcs objects for any file within a
given tree."""

def __init__(self, tree):
"""Construct a VcsCache for the given tree.
Expand All @@ -435,13 +484,14 @@ def vcs_for_path(self, path):
know about that claims to track that file.
:arg string path: a path to a file (not a folder)
"""
if path in self._path_cache:
return self._path_cache[path]
abs_path = join(self.tree.source_folder, path)
for directory, vcs in self.repos.iteritems():
# This seems to be the easiest way to find "is abs_path in the subtree
# rooted at directory?"
# This seems to be the easiest way to find "is abs_path in the
# subtree rooted at directory?"
if relpath(abs_path, directory).startswith('..'):
continue
if vcs.is_tracked(relpath(abs_path, vcs.get_root_dir())):
Expand Down
33 changes: 31 additions & 2 deletions tests/test_vcs_hg/test_vcs_hg.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
from dxr.testing import DxrInstanceTestCaseMakeFirst
from os import mkdir
from os.path import join
from subprocess import check_call

from nose.tools import ok_, eq_

from dxr.testing import (DxrInstanceTestCaseMakeFirst, GenerativeTestCase,
make_file)
from dxr.utils import cd


class MercurialTests(DxrInstanceTestCaseMakeFirst):
"""Test our Mercurial integration, both core and omniglot."""
"""Tests for Mercurial integration, both core and omniglot."""

def test_diff_file1(self):
"""Make sure the diff link goes to the first after-initial commit."""
Expand Down Expand Up @@ -52,3 +58,26 @@ def test_mdates(self):
response = self.client().get('/code/source/').data
ok_('<time>2014 Nov 06 19:11</time>' in response)
ok_('<time>2014 Oct 30 20:10</time>' in response)


class MercurialMoveTests(GenerativeTestCase):
"""Tests for Mercurial integration that involves hg moves."""

@classmethod
def generate_source(cls):
code_dir = cls.code_dir()
folder = join(code_dir, 'folder')
mkdir(folder)
make_file(folder, 'file', 'some contents!')
with cd(code_dir):
check_call(['hg', 'init'])
check_call(['hg', 'add', 'folder'])
check_call(['hg', 'commit', '-m', 'Add a folder.', '-u', 'me'])
check_call(['hg', 'mv', 'folder', 'moved_folder'])
check_call(['hg', 'commit', '-m', 'Move the containing folder.', '-u', 'me'])

def test_cat_moved_file(self):
"""Make sure we can get the contents of a former-rev file whose parent
dir no longer exists."""
response = self.client().get('/code/rev/0/folder/file')
ok_('some contents!' in response.data)

0 comments on commit f3cc71a

Please sign in to comment.