Skip to content
This repository has been archived by the owner on Oct 13, 2021. It is now read-only.

Commit

Permalink
Fix bug 1185687, stop showing symlinks in search results.
Browse files Browse the repository at this point in the history
New behaviors:
We do not call the by lines processing for symlinks at index time.
Symlinks do not appear in search results (lines and files).
A symlink in folder browsing will have its href set to the real target.
A direct link to symlink will redirect to its real target.
  • Loading branch information
pelmers committed Jul 31, 2015
1 parent a4baee8 commit fce834c
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 13 deletions.
9 changes: 6 additions & 3 deletions dxr/app.py
Expand Up @@ -209,15 +209,18 @@ def browse(tree, path=''):
return _browse_folder(tree, path.rstrip('/'), config)
except NotFound:
frozen = frozen_config(tree)
# Grab the FILE doc, just for the sidebar nav links:
# Grab the FILE doc, just for the sidebar nav links and the symlink target:
files = filtered_query(
frozen['es_alias'],
FILE,
filter={'path': path},
size=1,
include=['links'])
include=['link', 'links'])
if not files:
raise NotFound
if 'link' in files[0]:
# Then this path is a symlink, so redirect to the real thing.
return redirect(url_for('.browse', tree=tree, path=files[0]['link'][0]))

lines = filtered_query(
frozen['es_alias'],
Expand Down Expand Up @@ -279,7 +282,7 @@ def _browse_folder(tree, path, config):
f['name'],
decode_es_datetime(f['modified']) if 'modified' in f else None,
f.get('size'),
url_for('.browse', tree=tree, path=f['path'][0]),
url_for('.browse', tree=tree, path=f.get('link', f['path'])[0]),
f.get('is_binary', [False])[0])
for f in files_and_folders])

Expand Down
12 changes: 6 additions & 6 deletions dxr/build.py
Expand Up @@ -459,8 +459,9 @@ def index_file(tree, tree_indexers, path, es, index):
raise

rel_path = relpath(path, tree.source_folder)
is_text = isinstance(contents, unicode)
if is_text:
# Index by line if the contents are text and the path is not a symlink.
index_by_line = isinstance(contents, unicode) and not islink(path)
if index_by_line:
lines = contents.splitlines(True)
num_lines = len(lines)
needles_by_line = [{} for _ in xrange(num_lines)]
Expand All @@ -477,7 +478,7 @@ def index_file(tree, tree_indexers, path, es, index):
linkses.append(file_to_index.links())

# Per-line stuff:
if is_text:
if index_by_line:
refses.append(file_to_index.refs())
regionses.append(file_to_index.regions())
append_update_by_line(needles_by_line,
Expand Down Expand Up @@ -517,9 +518,8 @@ def docs():
doc['links'] = links
yield es.index_op(doc, doc_type=FILE)

# Index all the lines. If it's an empty file (no lines), don't bother
# ES. It hates empty dicts.
if is_text and needles_by_line:
# Index all the lines.
if index_by_line:
for total, annotations_for_this_line, tags in izip(
needles_by_line,
annotations_by_line,
Expand Down
6 changes: 3 additions & 3 deletions dxr/indexers.py
Expand Up @@ -4,7 +4,7 @@
from collections import namedtuple
import json
from operator import itemgetter
from os.path import join
from os.path import join, islink
from warnings import warn

from funcy import group_by, decorator, imapcat
Expand Down Expand Up @@ -209,10 +209,10 @@ def is_interesting(self):
:meth:`~dxr.indexers.FileToSkim.links()`,
:meth:`~dxr.indexers.FileToSkim.refs()`, etc.
The default implementation selects only text files.
The default implementation selects only text files that are not symlinks.
"""
return self.contains_text()
return self.contains_text() and not islink(self.absolute_path())

def links(self):
"""Return an iterable of links for the navigation pane::
Expand Down
10 changes: 9 additions & 1 deletion dxr/plugins/core.py
Expand Up @@ -2,7 +2,7 @@

from base64 import b64encode
from itertools import chain
from os.path import relpath, splitext
from os.path import relpath, splitext, islink, realpath
import re

from flask import url_for
Expand Down Expand Up @@ -60,6 +60,11 @@

'ext': EXT_MAPPING,

'link': { # the target path if this FILE is a symlink
'type': 'string',
'index': 'not_analyzed'
},

# Folder listings query by folder and then display filename, size,
# and mod date.
'folder': { # path/to/a/folder
Expand Down Expand Up @@ -433,6 +438,9 @@ def __init__(self, path, contents, plugin_name, tree, vcs):

def needles(self):
"""Fill out path (and path.trigrams)."""
if islink(self.absolute_path()):
# realpath will keep following symlinks until it gets to the 'real' thing.
yield 'link', relpath(realpath(self.absolute_path()), self.tree.source_folder)
yield 'path', self.path
extension = splitext(self.path)[1]
if extension:
Expand Down
2 changes: 2 additions & 0 deletions dxr/query.py
Expand Up @@ -140,6 +140,8 @@ def group_filters_by_name(predicate):
# Don't show folders yet in search results. I don't think the JS
# is able to handle them.
ors.append({'term': {'is_folder': False}})
# Filter out all FILE docs who are links.
ors.append({'not': {'exists': {'field': 'link'}}})

if ors:
query = {
Expand Down
1 change: 1 addition & 0 deletions tests/test_symlink/code/README.mkd
@@ -0,0 +1 @@
This file should be indexed happily.
1 change: 1 addition & 0 deletions tests/test_symlink/code/link.mkd
13 changes: 13 additions & 0 deletions tests/test_symlink/dxr.config
@@ -0,0 +1,13 @@
[DXR]
enabled_plugins = pygmentize clang python
es_index = dxr_test_{format}_{tree}_{unique}
es_alias = dxr_test_{format}_{tree}
es_catalog_index = dxr_test_catalog

[code]
source_folder = code
build_command =
clean_command =

[[python]]
python_path = ./
36 changes: 36 additions & 0 deletions tests/test_symlink/test_symlink.py
@@ -0,0 +1,36 @@
from nose.tools import eq_, ok_

from dxr.testing import DxrInstanceTestCase


class SymlinkTests(DxrInstanceTestCase):
def test_follows_link(self):
"""Test that a symlink listed in a browsing view will actually point to the real target.
"""
response = self.client().get('/code/source/')
# Make sure the browse view actually shows the symlink...
ok_('link.mkd' in response.data)
# ...but links to the real file instead.
ok_('<a href="/code/source/link.mkd"' not in response.data)

def test_file_search(self):
"""Make sure that searching for path:<symlink name> does not return the symlink.
"""
self.found_files_eq('path:mkd', ['README.mkd'])

def test_line_search(self):
"""Make sure that searching for contents within the real file does not return duplicates
in the symlink.
"""
self.found_files_eq('happily', ['README.mkd'])

def test_redirect(self):
"""Make sure that a direct link to a symlink redirects to the real file.
"""
response = self.client().get('/code/source/link.mkd')
eq_(response.status_code, 302)
ok_(response.headers['Location'].endswith('/code/source/README.mkd'))

0 comments on commit fce834c

Please sign in to comment.