Skip to content
This repository has been archived by the owner on Oct 13, 2021. It is now read-only.

Commit

Permalink
Merge pull request #528 from kleintom/file_operator
Browse files Browse the repository at this point in the history
File operator
  • Loading branch information
kleintom committed Apr 23, 2016
2 parents dc4d454 + fbba444 commit 066907f
Show file tree
Hide file tree
Showing 12 changed files with 125 additions and 49 deletions.
4 changes: 3 additions & 1 deletion dxr/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,9 @@ def _browse_folder(tree, path, config):
filter={'folder': path},
sort=[{'is_folder': 'desc'}, 'name'],
size=10000,
exclude=['raw_data'])
include=['name', 'modified', 'size', 'link', 'path', 'is_binary',
'is_folder'])

if not files_and_folders:
raise NotFound

Expand Down
12 changes: 3 additions & 9 deletions dxr/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import dxr
from dxr.app import make_app
from dxr.config import FORMAT
from dxr.es import UNINDEXED_STRING, TREE, create_index_and_wait
from dxr.es import UNINDEXED_STRING, UNANALYZED_STRING, TREE, create_index_and_wait
from dxr.exceptions import BuildError
from dxr.filters import LINE, FILE
from dxr.lines import es_lines, finished_tags
Expand Down Expand Up @@ -98,14 +98,8 @@ def deploy_tree(tree, es, index_name):
'enabled': False
},
'properties': {
'name': {
'type': 'string',
'index': 'not_analyzed'
},
'format': {
'type': 'string',
'index': 'not_analyzed'
},
'name': UNANALYZED_STRING,
'format': UNANALYZED_STRING,
# In case es_alias changes in the conf file:
'es_alias': UNINDEXED_STRING,
# Needed so new trees or edited descriptions can show
Expand Down
6 changes: 6 additions & 0 deletions dxr/es.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
}


UNANALYZED_STRING = {
'type': 'string',
'index': 'not_analyzed',
}


UNINDEXED_INT = {
'type': 'integer',
'index': 'no',
Expand Down
2 changes: 1 addition & 1 deletion dxr/format
Original file line number Diff line number Diff line change
@@ -1 +1 @@
17
18
96 changes: 58 additions & 38 deletions dxr/plugins/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@

from base64 import b64encode
from itertools import chain
from os.path import relpath, splitext, islink, realpath
from os.path import relpath, splitext, realpath, basename
import re

from flask import url_for
from funcy import identity
from jinja2 import Markup
from parsimonious import ParseError

from dxr.es import UNINDEXED_STRING, UNINDEXED_INT, UNINDEXED_LONG
from dxr.es import (UNINDEXED_STRING, UNANALYZED_STRING, UNINDEXED_INT,
UNINDEXED_LONG)
from dxr.exceptions import BadTerm
from dxr.filters import Filter, negatable, FILE, LINE
import dxr.indexers
Expand All @@ -21,11 +22,11 @@
NoTrigrams, PythonRegexVisitor)
from dxr.utils import glob_to_regex

__all__ = ['mappings', 'analyzers', 'TextFilter', 'PathFilter', 'ExtFilter',
'RegexpFilter', 'IdFilter', 'RefFilter']
__all__ = ['mappings', 'analyzers', 'TextFilter', 'PathFilter', 'FilenameFilter',
'ExtFilter', 'RegexpFilter', 'IdFilter', 'RefFilter']


PATH_MAPPING = { # path/to/a/folder/filename.cpp
PATH_SEGMENT_MAPPING = { # some portion of a path/to/a/folder/filename.cpp string
'type': 'string',
'index': 'not_analyzed', # support JS source fetching & sorting & browse() lookups
'fields': {
Expand All @@ -41,12 +42,6 @@
}


EXT_MAPPING = {
'type': 'string',
'index': 'not_analyzed'
}


mappings = {
# We also insert entries here for folders. This gives us folders in dir
# listings and the ability to find matches in folder pathnames.
Expand All @@ -56,26 +51,23 @@
},
'properties': {
# FILE filters query this. It supports globbing via JS regex script.
'path': PATH_MAPPING,
'path': PATH_SEGMENT_MAPPING, # path/to/a/folder/filename.cpp

'ext': EXT_MAPPING,
# Basename of path for fast lookup.
# FILE filters query this. It supports globbing via JS regex script.
'file_name': PATH_SEGMENT_MAPPING, # filename.cpp

'link': { # the target path if this FILE is a symlink
'type': 'string',
'index': 'not_analyzed'
},
'ext': UNANALYZED_STRING,

# the target path if this FILE is a symlink
'link': UNANALYZED_STRING,

# Folder listings query by folder and then display filename, size,
# and mod date.
'folder': { # path/to/a/folder
'type': 'string',
'index': 'not_analyzed'
},
'folder': UNANALYZED_STRING, # path/to/a/folder

'name': { # filename.cpp or leaf_folder (for sorting and display)
'type': 'string',
'index': 'not_analyzed'
},
# filename.cpp or leaf_folder (for sorting and display)
'name': UNANALYZED_STRING,
'size': UNINDEXED_INT, # bytes. not present for folders.
'modified': { # not present for folders
'type': 'date',
Expand Down Expand Up @@ -119,8 +111,9 @@
'enabled': False
},
'properties': {
'path': PATH_MAPPING,
'ext': EXT_MAPPING,
'path': PATH_SEGMENT_MAPPING,
'file_name': PATH_SEGMENT_MAPPING,
'ext': UNANALYZED_STRING,
# TODO: After the query language refresh, use match_phrase_prefix
# queries on non-globbed paths, analyzing them with the path
# analyzer, for max perf. Perfect! Otherwise, fall back to trigram-
Expand Down Expand Up @@ -283,30 +276,56 @@ def highlight_content(self, result):
maybe_lower(self._term['arg'])))


class PathFilter(Filter):
class _PathSegmentFilterBase(Filter):
"""A base class for a filter that matches a glob against a path segment."""
domain = FILE

def _regex_filter(self, path_seg_property_name, no_trigrams_error_text):
"""Return an ES regex filter that matches this filter's glob against the
path segment at path_seg_property_name.
"""
glob = self._term['arg']
try:
return es_regex_filter(
regex_grammar.parse(glob_to_regex(glob)),
path_seg_property_name,
is_case_sensitive=self._term['case_sensitive'])
except NoTrigrams:
raise BadTerm(no_trigrams_error_text)


class PathFilter(_PathSegmentFilterBase):
"""Substring filter for paths
Pre-ES parity dictates that this simply searches for paths that have the
argument as a substring. We may allow anchoring and such later.
"""
name = 'path'
domain = FILE
description = Markup('File or directory sub-path to search within. <code>*'
'</code>, <code>?</code>, and <code>[...]</code> act '
'as shell wildcards.')

@negatable
def filter(self):
glob = self._term['arg']
try:
return es_regex_filter(
regex_grammar.parse(glob_to_regex(glob)),
'path',
is_case_sensitive=self._term['case_sensitive'])
except NoTrigrams:
raise BadTerm('Path globs need at least 3 literal characters in a row '
'for speed.')
return self._regex_filter('path',
'Path globs need at least 3 literal '
'characters in a row for speed.')


class FilenameFilter(_PathSegmentFilterBase):
"""Substring filter for file names"""
name = 'file'
description = Markup('File to search within. <code>*</code>, '
'<code>?</code>, and <code>[...]</code> act as shell '
'wildcards.')

@negatable
def filter(self):
return self._regex_filter('file_name',
'File globs need at least 3 literal '
'characters in a row for speed.')


class ExtFilter(Filter):
Expand Down Expand Up @@ -436,6 +455,7 @@ def needles(self):
# realpath will keep following symlinks until it gets to the 'real' thing.
yield 'link', relpath(realpath(self.absolute_path()), self.tree.source_folder)
yield 'path', self.path
yield 'file_name', basename(self.path)
extension = splitext(self.path)[1]
if extension:
yield 'ext', extension[1:] # skip the period
Expand Down
Empty file.
Empty file.
Empty file.
1 change: 1 addition & 0 deletions tests/test_path_file_filters/code/folder/fish3
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fins
Empty file.
10 changes: 10 additions & 0 deletions tests/test_path_file_filters/dxr.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[DXR]
enabled_plugins = pygmentize
es_index = dxr_test_{format}_{tree}_{unique}
es_alias = dxr_test_{format}_{tree}
es_catalog_index = dxr_test_catalog

[code]
source_folder = code
build_command =
clean_command =
43 changes: 43 additions & 0 deletions tests/test_path_file_filters/test_path_file_filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from nose.tools import raises

from dxr.testing import DxrInstanceTestCase


class PathAndFileFilterTests(DxrInstanceTestCase):
"""Basic tests for functionality of the 'path:' and 'file:' filters"""

def test_basic_path_results(self):
"""Check that a 'path:' result includes both file and folder matches."""
self.found_files_eq('path:fish', ['fish1', 'fishy_folder/fish2',
'fishy_folder/gill', 'folder/fish3',
'folder/fish4'])

def test_basic_file_results(self):
"""Check that a 'file:' result includes only file matches."""
self.found_files_eq('file:fish', ['fish1', 'fishy_folder/fish2',
'folder/fish3', 'folder/fish4'])

def test_path_and_file_line_promotion(self):
"""Make sure promotion of a 'path:' or 'file:' filter to a LINE query
works.
"""
self.found_files_eq('path:fish fins', ['folder/fish3'])
self.found_files_eq('file:fish fins', ['folder/fish3'])

# This fails because we currently intentionally exclude folder paths from
# FILE query results - remove the @raises line when that's changed. (Of
# course then other tests here will need to be updated as well.)
@raises(AssertionError)
def test_empty_folder_path_results(self):
"""Check that 'path:' results include empty folders."""
self.found_files_eq('path:empty_folder', ['empty_folder'])

def test_basic_wildcard(self):
"""Test basic wildcard functionality."""
# 'path:' and 'file:' currently have the same underlying wildcard
# support, so we're spreading out the basic wildcard testing over both.
self.found_files_eq('path:fish?_fo*er',
['fishy_folder/fish2', 'fishy_folder/gill'])

self.found_files_eq('file:fish[14]', ['fish1', 'folder/fish4'])

0 comments on commit 066907f

Please sign in to comment.