Skip to content
This repository
Browse code

color-words: make regex configurable via attributes

Make the --color-words splitting regular expression configurable via
the diff driver's 'wordregex' attribute.  The user can then set the
driver on a file in .gitattributes.  If a regex is given on the
command line, it overrides the driver's setting.

We also provide built-in regexes for the languages that already had
funcname patterns, and add an appropriate diff driver entry for C/++.
(The patterns are designed to run UTF-8 sequences into a single chunk
to make sure they remain readable.)

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information...
commit 80c49c3de2d5a3aa12b0980a65f1163c8aef0c16 1 parent c4b252c
trast trast authored gitster committed
4 Documentation/diff-options.txt
@@ -102,6 +102,10 @@ differences. You may want to append `|[^[:space:]]` to your regular
102 102 expression to make sure that it matches all non-whitespace characters.
103 103 A match that contains a newline is silently truncated(!) at the
104 104 newline.
  105 ++
  106 +The regex can also be set via a diff driver, see
  107 +linkgit:gitattributes[1]; giving it explicitly overrides any diff
  108 +driver setting.
105 109
106 110 --no-renames::
107 111 Turn off rename detection, even when the configuration
21 Documentation/gitattributes.txt
@@ -317,6 +317,8 @@ patterns are available:
317 317
318 318 - `bibtex` suitable for files with BibTeX coded references.
319 319
  320 +- `cpp` suitable for source code in the C and C++ languages.
  321 +
320 322 - `html` suitable for HTML/XHTML documents.
321 323
322 324 - `java` suitable for source code in the Java language.
@@ -334,6 +336,25 @@ patterns are available:
334 336 - `tex` suitable for source code for LaTeX documents.
335 337
336 338
  339 +Customizing word diff
  340 +^^^^^^^^^^^^^^^^^^^^^
  341 +
  342 +You can customize the rules that `git diff --color-words` uses to
  343 +split words in a line, by specifying an appropriate regular expression
  344 +in the "diff.*.wordregex" configuration variable. For example, in TeX
  345 +a backslash followed by a sequence of letters forms a command, but
  346 +several such commands can be run together without intervening
  347 +whitespace. To separate them, use a regular expression such as
  348 +
  349 +------------------------
  350 +[diff "tex"]
  351 + wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+"
  352 +------------------------
  353 +
  354 +A built-in pattern is provided for all languages listed in the
  355 +previous section.
  356 +
  357 +
337 358 Performing text diffs of binary files
338 359 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
339 360
10 diff.c
@@ -1380,6 +1380,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe
1380 1380 return one->driver->funcname.pattern ? &one->driver->funcname : NULL;
1381 1381 }
1382 1382
  1383 +static const char *userdiff_word_regex(struct diff_filespec *one)
  1384 +{
  1385 + diff_filespec_load_driver(one);
  1386 + return one->driver->word_regex;
  1387 +}
  1388 +
1383 1389 void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
1384 1390 {
1385 1391 if (!options->a_prefix)
@@ -1540,6 +1546,10 @@ static void builtin_diff(const char *name_a,
1540 1546 ecbdata.diff_words =
1541 1547 xcalloc(1, sizeof(struct diff_words_data));
1542 1548 ecbdata.diff_words->file = o->file;
  1549 + if (!o->word_regex)
  1550 + o->word_regex = userdiff_word_regex(one);
  1551 + if (!o->word_regex)
  1552 + o->word_regex = userdiff_word_regex(two);
1543 1553 if (o->word_regex) {
1544 1554 ecbdata.diff_words->word_regex = (regex_t *)
1545 1555 xmalloc(sizeof(regex_t));
36 t/t4034-diff-words.sh
@@ -84,6 +84,41 @@ test_expect_success 'word diff with a regular expression' '
84 84
85 85 '
86 86
  87 +test_expect_success 'set a diff driver' '
  88 + git config diff.testdriver.wordregex "[^[:space:]]" &&
  89 + cat <<EOF > .gitattributes
  90 +pre diff=testdriver
  91 +post diff=testdriver
  92 +EOF
  93 +'
  94 +
  95 +test_expect_success 'option overrides default' '
  96 +
  97 + word_diff --color-words="[a-z]+"
  98 +
  99 +'
  100 +
  101 +cat > expect <<\EOF
  102 +<WHITE>diff --git a/pre b/post<RESET>
  103 +<WHITE>index 330b04f..5ed8eff 100644<RESET>
  104 +<WHITE>--- a/pre<RESET>
  105 +<WHITE>+++ b/post<RESET>
  106 +<BROWN>@@ -1,3 +1,7 @@<RESET>
  107 +h(4)<GREEN>,hh[44]<RESET>
  108 +<RESET>
  109 +a = b + c<RESET>
  110 +
  111 +<GREEN>aa = a<RESET>
  112 +
  113 +<GREEN>aeff = aeff * ( aaa )<RESET>
  114 +EOF
  115 +
  116 +test_expect_success 'use default supplied by driver' '
  117 +
  118 + word_diff --color-words
  119 +
  120 +'
  121 +
87 122 echo 'aaa (aaa)' > pre
88 123 echo 'aaa (aaa) aaa' > post
89 124
@@ -100,6 +135,7 @@ test_expect_success 'test parsing words for newline' '
100 135
101 136 word_diff --color-words="a+"
102 137
  138 +
103 139 '
104 140
105 141 echo '(:' > pre
78 userdiff.c
@@ -6,14 +6,20 @@ static struct userdiff_driver *drivers;
6 6 static int ndrivers;
7 7 static int drivers_alloc;
8 8
9   -#define FUNCNAME(name, pattern) \
10   - { name, NULL, -1, { pattern, REG_EXTENDED } }
  9 +#define PATTERNS(name, pattern, wordregex) \
  10 + { name, NULL, -1, { pattern, REG_EXTENDED }, wordregex }
11 11 static struct userdiff_driver builtin_drivers[] = {
12   -FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"),
13   -FUNCNAME("java",
  12 +PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$",
  13 + "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"),
  14 +PATTERNS("java",
14 15 "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
15   - "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"),
16   -FUNCNAME("objc",
  16 + "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
  17 + "[a-zA-Z_][a-zA-Z0-9_]*"
  18 + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
  19 + "|[-+*/<>%&^|=!]="
  20 + "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"
  21 + "|[^[:space:]]|[\x80-\xff]+"),
  22 +PATTERNS("objc",
17 23 /* Negate C statements that can look like functions */
18 24 "!^[ \t]*(do|for|if|else|return|switch|while)\n"
19 25 /* Objective-C methods */
@@ -21,20 +27,60 @@ FUNCNAME("objc",
21 27 /* C functions */
22 28 "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n"
23 29 /* Objective-C class/protocol definitions */
24   - "^(@(implementation|interface|protocol)[ \t].*)$"),
25   -FUNCNAME("pascal",
  30 + "^(@(implementation|interface|protocol)[ \t].*)$",
  31 + /* -- */
  32 + "[a-zA-Z_][a-zA-Z0-9_]*"
  33 + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
  34 + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
  35 + "|[^[:space:]]|[\x80-\xff]+"),
  36 +PATTERNS("pascal",
26 37 "^((procedure|function|constructor|destructor|interface|"
27 38 "implementation|initialization|finalization)[ \t]*.*)$"
28 39 "\n"
29   - "^(.*=[ \t]*(class|record).*)$"),
30   -FUNCNAME("php", "^[\t ]*((function|class).*)"),
31   -FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"),
32   -FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"),
33   -FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"),
34   -FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"),
  40 + "^(.*=[ \t]*(class|record).*)$",
  41 + /* -- */
  42 + "[a-zA-Z_][a-zA-Z0-9_]*"
  43 + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
  44 + "|<>|<=|>=|:=|\\.\\."
  45 + "|[^[:space:]]|[\x80-\xff]+"),
  46 +PATTERNS("php", "^[\t ]*((function|class).*)",
  47 + /* -- */
  48 + "[a-zA-Z_][a-zA-Z0-9_]*"
  49 + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
  50 + "|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->"
  51 + "|[^[:space:]]|[\x80-\xff]+"),
  52 +PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$",
  53 + /* -- */
  54 + "[a-zA-Z_][a-zA-Z0-9_]*"
  55 + "|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?"
  56 + "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?"
  57 + "|[^[:space:]|[\x80-\xff]+"),
  58 + /* -- */
  59 +PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$",
  60 + /* -- */
  61 + "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*"
  62 + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?."
  63 + "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"
  64 + "|[^[:space:]|[\x80-\xff]+"),
  65 +PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
  66 + "[={}\"]|[^={}\" \t]+"),
  67 +PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
  68 + "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+|[^[:space:]]"),
  69 +PATTERNS("cpp",
  70 + /* Jump targets or access declarations */
  71 + "!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:.*$\n"
  72 + /* C/++ functions/methods at top level */
  73 + "^([A-Za-z_][A-Za-z_0-9]*([ \t]+[A-Za-z_][A-Za-z_0-9]*([ \t]*::[ \t]*[^[:space:]]+)?){1,}[ \t]*\\([^;]*)$\n"
  74 + /* compound type at top level */
  75 + "^((struct|class|enum)[^;]*)$",
  76 + /* -- */
  77 + "[a-zA-Z_][a-zA-Z0-9_]*"
  78 + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
  79 + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
  80 + "|[^[:space:]]|[\x80-\xff]+"),
35 81 { "default", NULL, -1, { NULL, 0 } },
36 82 };
37   -#undef FUNCNAME
  83 +#undef PATTERNS
38 84
39 85 static struct userdiff_driver driver_true = {
40 86 "diff=true",
@@ -134,6 +180,8 @@ int userdiff_config(const char *k, const char *v)
134 180 return parse_string(&drv->external, k, v);
135 181 if ((drv = parse_driver(k, v, "textconv")))
136 182 return parse_string(&drv->textconv, k, v);
  183 + if ((drv = parse_driver(k, v, "wordregex")))
  184 + return parse_string(&drv->word_regex, k, v);
137 185
138 186 return 0;
139 187 }
1  userdiff.h
@@ -11,6 +11,7 @@ struct userdiff_driver {
11 11 const char *external;
12 12 int binary;
13 13 struct userdiff_funcname funcname;
  14 + const char *word_regex;
14 15 const char *textconv;
15 16 };
16 17

0 comments on commit 80c49c3

Please sign in to comment.
Something went wrong with that request. Please try again.