Skip to content
This repository has been archived by the owner on Oct 13, 2021. It is now read-only.

Commit

Permalink
Stop showing symlinks in search results, closes #458
Browse files Browse the repository at this point in the history
  • Loading branch information
pelmers committed Aug 5, 2015
2 parents e683e7b + 0b32af7 commit c63a854
Show file tree
Hide file tree
Showing 10 changed files with 99 additions and 15 deletions.
9 changes: 6 additions & 3 deletions dxr/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,15 +209,18 @@ def browse(tree, path=''):
return _browse_folder(tree, path.rstrip('/'), config)
except NotFound:
frozen = frozen_config(tree)
# Grab the FILE doc, just for the sidebar nav links:
# Grab the FILE doc, just for the sidebar nav links and the symlink target:
files = filtered_query(
frozen['es_alias'],
FILE,
filter={'path': path},
size=1,
include=['links'])
include=['link', 'links'])
if not files:
raise NotFound
if 'link' in files[0]:
# Then this path is a symlink, so redirect to the real thing.
return redirect(url_for('.browse', tree=tree, path=files[0]['link'][0]))

lines = filtered_query(
frozen['es_alias'],
Expand Down Expand Up @@ -279,7 +282,7 @@ def _browse_folder(tree, path, config):
f['name'],
decode_es_datetime(f['modified']) if 'modified' in f else None,
f.get('size'),
url_for('.browse', tree=tree, path=f['path'][0]),
url_for('.browse', tree=tree, path=f.get('link', f['path'])[0]),
f.get('is_binary', [False])[0])
for f in files_and_folders])

Expand Down
15 changes: 9 additions & 6 deletions dxr/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,10 @@ def index_file(tree, tree_indexers, path, es, index):

rel_path = relpath(path, tree.source_folder)
is_text = isinstance(contents, unicode)
if is_text:
is_link = islink(path)
# Index by line if the contents are text and the path is not a symlink.
index_by_line = is_text and not is_link
if index_by_line:
lines = contents.splitlines(True)
num_lines = len(lines)
needles_by_line = [{} for _ in xrange(num_lines)]
Expand All @@ -471,10 +474,11 @@ def index_file(tree, tree_indexers, path, es, index):
if file_to_index.is_interesting():
# Per-file stuff:
append_update(needles, file_to_index.needles())
linkses.append(file_to_index.links())
if not is_link:
linkses.append(file_to_index.links())

# Per-line stuff:
if is_text:
if index_by_line:
refses.append(file_to_index.refs())
regionses.append(file_to_index.regions())
append_update_by_line(needles_by_line,
Expand Down Expand Up @@ -514,9 +518,8 @@ def docs():
doc['links'] = links
yield es.index_op(doc, doc_type=FILE)

# Index all the lines. If it's an empty file (no lines), don't bother
# ES. It hates empty dicts.
if is_text and needles_by_line:
# Index all the lines.
if index_by_line:
for total, annotations_for_this_line, tags in izip(
needles_by_line,
annotations_by_line,
Expand Down
22 changes: 19 additions & 3 deletions dxr/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections import namedtuple
import json
from operator import itemgetter
from os.path import join
from os.path import join, islink
from warnings import warn

from funcy import group_by, decorator, imapcat
Expand Down Expand Up @@ -209,10 +209,13 @@ def is_interesting(self):
:meth:`~dxr.indexers.FileToSkim.links()`,
:meth:`~dxr.indexers.FileToSkim.refs()`, etc.
The default implementation selects only text files.
The default implementation selects only text files that are not symlinks.
Note: even if a plugin decides that symlinks are interesting, it should
remember that links, refs, regions and by-line annotations will not be
called because views of symlinks redirect to the original file.
"""
return self.contains_text()
return self.contains_text() and not self.is_link()

def links(self):
"""Return an iterable of links for the navigation pane::
Expand Down Expand Up @@ -306,6 +309,15 @@ def absolute_path(self):
"""
return join(self.tree.source_folder, self.path)

def is_link(self):
"""Return whether the file is a symlink.
Note: symlinks are never displayed in file browsing; a request for a symlink redirects
to its target.
"""
return islink(self.absolute_path())

# Private methods:

def _line_offsets(self):
Expand Down Expand Up @@ -393,6 +405,10 @@ def needles_by_line(self):
may be dicts, in which case common keys get merged by
:func:`~dxr.utils.append_update()`.
This method is not called on symlink files, to maintain the illusion
that they do not have contents, seeing as they cannot be viewed in
file browsing.
"""
return []

Expand Down
10 changes: 9 additions & 1 deletion dxr/plugins/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from base64 import b64encode
from itertools import chain
from os.path import relpath, splitext
from os.path import relpath, splitext, islink, realpath
import re

from flask import url_for
Expand Down Expand Up @@ -60,6 +60,11 @@

'ext': EXT_MAPPING,

'link': { # the target path if this FILE is a symlink
'type': 'string',
'index': 'not_analyzed'
},

# Folder listings query by folder and then display filename, size,
# and mod date.
'folder': { # path/to/a/folder
Expand Down Expand Up @@ -433,6 +438,9 @@ def __init__(self, path, contents, plugin_name, tree, vcs):

def needles(self):
"""Fill out path (and path.trigrams)."""
if self.is_link():
# realpath will keep following symlinks until it gets to the 'real' thing.
yield 'link', relpath(realpath(self.absolute_path()), self.tree.source_folder)
yield 'path', self.path
extension = splitext(self.path)[1]
if extension:
Expand Down
5 changes: 3 additions & 2 deletions dxr/plugins/python/indexers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import ast
import token
import tokenize
from StringIO import StringIO
from os.path import islink
from cStringIO import StringIO

from dxr.build import unignored
from dxr.filters import FILE, LINE
Expand Down Expand Up @@ -301,4 +302,4 @@ def is_interesting(path):
analyze.
"""
return path.endswith('.py')
return path.endswith('.py') and not islink(path)
2 changes: 2 additions & 0 deletions dxr/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ def group_filters_by_name(predicate):
# Don't show folders yet in search results. I don't think the JS
# is able to handle them.
ors.append({'term': {'is_folder': False}})
# Filter out all FILE docs who are links.
ors.append({'not': {'exists': {'field': 'link'}}})

if ors:
query = {
Expand Down
1 change: 1 addition & 0 deletions tests/test_symlink/code/README.mkd
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This file should be indexed happily.
1 change: 1 addition & 0 deletions tests/test_symlink/code/link.mkd
13 changes: 13 additions & 0 deletions tests/test_symlink/dxr.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[DXR]
enabled_plugins = pygmentize clang python
es_index = dxr_test_{format}_{tree}_{unique}
es_alias = dxr_test_{format}_{tree}
es_catalog_index = dxr_test_catalog

[code]
source_folder = code
build_command =
clean_command =

[[python]]
python_path = ./
36 changes: 36 additions & 0 deletions tests/test_symlink/test_symlink.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from nose.tools import eq_, ok_

from dxr.testing import DxrInstanceTestCase


class SymlinkTests(DxrInstanceTestCase):
def test_follows_link(self):
"""Test that a symlink listed in a browsing view will actually point to the real target.
"""
response = self.client().get('/code/source/')
# Make sure the browse view actually shows the symlink...
ok_('link.mkd' in response.data)
# ...but links to the real file instead.
ok_('<a href="/code/source/link.mkd"' not in response.data)

def test_file_search(self):
"""Make sure that searching for path:<symlink name> does not return the symlink.
"""
self.found_files_eq('path:mkd', ['README.mkd'])

def test_line_search(self):
"""Make sure that searching for contents within the real file does not return duplicates
in the symlink.
"""
self.found_files_eq('happily', ['README.mkd'])

def test_redirect(self):
"""Make sure that a direct link to a symlink redirects to the real file.
"""
response = self.client().get('/code/source/link.mkd')
eq_(response.status_code, 302)
ok_(response.headers['Location'].endswith('/code/source/README.mkd'))

0 comments on commit c63a854

Please sign in to comment.