From 45e7ca0f0e1042c26d56b578165365c3f70c0121 Mon Sep 17 00:00:00 2001
From: Brandon Casey <casey@nrlssc.navy.mil>
Date: Thu, 18 Sep 2008 17:40:48 -0500
Subject: [PATCH 01/16] diff.c: return pattern entry pointer rather than just
 the hunk header pattern

This is in preparation for associating a flag with each pattern which will
control how the pattern is interpreted. For example, as a basic or extended
regular expression.

Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 55 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/diff.c b/diff.c
index 5e01b2bb27..406a76a203 100644
--- a/diff.c
+++ b/diff.c
@@ -94,32 +94,35 @@ static int parse_lldiff_command(const char *var, const char *ep, const char *val
  * to define a customized regexp to find the beginning of a function to
  * be used for hunk header lines of "diff -p" style output.
  */
-static struct funcname_pattern {
+struct funcname_pattern_entry {
 	char *name;
 	char *pattern;
-	struct funcname_pattern *next;
+};
+static struct funcname_pattern_list {
+	struct funcname_pattern_list *next;
+	struct funcname_pattern_entry e;
 } *funcname_pattern_list;
 
 static int parse_funcname_pattern(const char *var, const char *ep, const char *value)
 {
 	const char *name;
 	int namelen;
-	struct funcname_pattern *pp;
+	struct funcname_pattern_list *pp;
 
 	name = var + 5; /* "diff." */
 	namelen = ep - name;
 
 	for (pp = funcname_pattern_list; pp; pp = pp->next)
-		if (!strncmp(pp->name, name, namelen) && !pp->name[namelen])
+		if (!strncmp(pp->e.name, name, namelen) && !pp->e.name[namelen])
 			break;
 	if (!pp) {
 		pp = xcalloc(1, sizeof(*pp));
-		pp->name = xmemdupz(name, namelen);
+		pp->e.name = xmemdupz(name, namelen);
 		pp->next = funcname_pattern_list;
 		funcname_pattern_list = pp;
 	}
-	free(pp->pattern);
-	pp->pattern = xstrdup(value);
+	free(pp->e.pattern);
+	pp->e.pattern = xstrdup(value);
 	return 0;
 }
 
@@ -1377,20 +1380,17 @@ int diff_filespec_is_binary(struct diff_filespec *one)
 	return one->is_binary;
 }
 
-static const char *funcname_pattern(const char *ident)
+static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
 {
-	struct funcname_pattern *pp;
+	struct funcname_pattern_list *pp;
 
 	for (pp = funcname_pattern_list; pp; pp = pp->next)
-		if (!strcmp(ident, pp->name))
-			return pp->pattern;
+		if (!strcmp(ident, pp->e.name))
+			return &pp->e;
 	return NULL;
 }
 
-static struct builtin_funcname_pattern {
-	const char *name;
-	const char *pattern;
-} builtin_funcname_pattern[] = {
+static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
 	{ "java", "!^[ 	]*\\(catch\\|do\\|for\\|if\\|instanceof\\|"
 			"new\\|return\\|switch\\|throw\\|while\\)\n"
 			"^[ 	]*\\(\\([ 	]*"
@@ -1407,9 +1407,10 @@ static struct builtin_funcname_pattern {
 	{ "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$" },
 };
 
-static const char *diff_funcname_pattern(struct diff_filespec *one)
+static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one)
 {
-	const char *ident, *pattern;
+	const char *ident;
+	const struct funcname_pattern_entry *pe;
 	int i;
 
 	diff_filespec_check_attr(one);
@@ -1424,9 +1425,9 @@ static const char *diff_funcname_pattern(struct diff_filespec *one)
 		return funcname_pattern("default");
 
 	/* Look up custom "funcname.$ident" regexp from config. */
-	pattern = funcname_pattern(ident);
-	if (pattern)
-		return pattern;
+	pe = funcname_pattern(ident);
+	if (pe)
+		return pe;
 
 	/*
 	 * And define built-in fallback patterns here.  Note that
@@ -1434,7 +1435,7 @@ static const char *diff_funcname_pattern(struct diff_filespec *one)
 	 */
 	for (i = 0; i < ARRAY_SIZE(builtin_funcname_pattern); i++)
 		if (!strcmp(ident, builtin_funcname_pattern[i].name))
-			return builtin_funcname_pattern[i].pattern;
+			return &builtin_funcname_pattern[i];
 
 	return NULL;
 }
@@ -1512,11 +1513,11 @@ static void builtin_diff(const char *name_a,
 		xdemitconf_t xecfg;
 		xdemitcb_t ecb;
 		struct emit_callback ecbdata;
-		const char *funcname_pattern;
+		const struct funcname_pattern_entry *pe;
 
-		funcname_pattern = diff_funcname_pattern(one);
-		if (!funcname_pattern)
-			funcname_pattern = diff_funcname_pattern(two);
+		pe = diff_funcname_pattern(one);
+		if (!pe)
+			pe = diff_funcname_pattern(two);
 
 		memset(&xecfg, 0, sizeof(xecfg));
 		memset(&ecbdata, 0, sizeof(ecbdata));
@@ -1528,8 +1529,8 @@ static void builtin_diff(const char *name_a,
 		xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts;
 		xecfg.ctxlen = o->context;
 		xecfg.flags = XDL_EMIT_FUNCNAMES;
-		if (funcname_pattern)
-			xdiff_set_find_func(&xecfg, funcname_pattern);
+		if (pe)
+			xdiff_set_find_func(&xecfg, pe->pattern);
 		if (!diffopts)
 			;
 		else if (!prefixcmp(diffopts, "--unified="))

From a013585b20ac757b0e75a72181ffa44674f35235 Mon Sep 17 00:00:00 2001
From: Brandon Casey <casey@nrlssc.navy.mil>
Date: Thu, 18 Sep 2008 17:42:48 -0500
Subject: [PATCH 02/16] diff.c: associate a flag with each pattern and use it
 for compiling regex

This is in preparation for allowing extended regular expression patterns.

Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c            | 21 ++++++++++++---------
 xdiff-interface.c |  4 ++--
 xdiff-interface.h |  2 +-
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/diff.c b/diff.c
index 406a76a203..6881cf4efa 100644
--- a/diff.c
+++ b/diff.c
@@ -97,13 +97,14 @@ static int parse_lldiff_command(const char *var, const char *ep, const char *val
 struct funcname_pattern_entry {
 	char *name;
 	char *pattern;
+	int cflags;
 };
 static struct funcname_pattern_list {
 	struct funcname_pattern_list *next;
 	struct funcname_pattern_entry e;
 } *funcname_pattern_list;
 
-static int parse_funcname_pattern(const char *var, const char *ep, const char *value)
+static int parse_funcname_pattern(const char *var, const char *ep, const char *value, int cflags)
 {
 	const char *name;
 	int namelen;
@@ -123,6 +124,7 @@ static int parse_funcname_pattern(const char *var, const char *ep, const char *v
 	}
 	free(pp->e.pattern);
 	pp->e.pattern = xstrdup(value);
+	pp->e.cflags = cflags;
 	return 0;
 }
 
@@ -185,7 +187,8 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
 			if (!strcmp(ep, ".funcname")) {
 				if (!value)
 					return config_error_nonbool(var);
-				return parse_funcname_pattern(var, ep, value);
+				return parse_funcname_pattern(var, ep, value,
+					0);
 			}
 		}
 	}
@@ -1395,16 +1398,16 @@ static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
 			"new\\|return\\|switch\\|throw\\|while\\)\n"
 			"^[ 	]*\\(\\([ 	]*"
 			"[A-Za-z_][A-Za-z_0-9]*\\)\\{2,\\}"
-			"[ 	]*([^;]*\\)$" },
+			"[ 	]*([^;]*\\)$", 0 },
 	{ "pascal", "^\\(\\(procedure\\|function\\|constructor\\|"
 			"destructor\\|interface\\|implementation\\|"
 			"initialization\\|finalization\\)[ \t]*.*\\)$"
 			"\\|"
-			"^\\(.*=[ \t]*\\(class\\|record\\).*\\)$"
-			},
-	{ "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$" },
-	{ "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$" },
-	{ "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$" },
+			"^\\(.*=[ \t]*\\(class\\|record\\).*\\)$",
+			0 },
+	{ "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$", 0 },
+	{ "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$", 0 },
+	{ "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$", 0 },
 };
 
 static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one)
@@ -1530,7 +1533,7 @@ static void builtin_diff(const char *name_a,
 		xecfg.ctxlen = o->context;
 		xecfg.flags = XDL_EMIT_FUNCNAMES;
 		if (pe)
-			xdiff_set_find_func(&xecfg, pe->pattern);
+			xdiff_set_find_func(&xecfg, pe->pattern, pe->cflags);
 		if (!diffopts)
 			;
 		else if (!prefixcmp(diffopts, "--unified="))
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 61dc5c5470..2c81f40cb6 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -206,7 +206,7 @@ static long ff_regexp(const char *line, long len,
 	return result;
 }
 
-void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value)
+void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value, int cflags)
 {
 	int i;
 	struct ff_regs *regs;
@@ -231,7 +231,7 @@ void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value)
 			expression = buffer = xstrndup(value, ep - value);
 		else
 			expression = value;
-		if (regcomp(&reg->re, expression, 0))
+		if (regcomp(&reg->re, expression, cflags))
 			die("Invalid regexp to look for hunk header: %s", expression);
 		free(buffer);
 		value = ep + 1;
diff --git a/xdiff-interface.h b/xdiff-interface.h
index f7f791d96b..33cab9dd59 100644
--- a/xdiff-interface.h
+++ b/xdiff-interface.h
@@ -21,6 +21,6 @@ int parse_hunk_header(char *line, int len,
 int read_mmfile(mmfile_t *ptr, const char *filename);
 int buffer_is_binary(const char *ptr, unsigned long size);
 
-extern void xdiff_set_find_func(xdemitconf_t *xecfg, const char *line);
+extern void xdiff_set_find_func(xdemitconf_t *xecfg, const char *line, int cflags);
 
 #endif

From 45d9414fa5599b41578625961b53e18a9b9148c7 Mon Sep 17 00:00:00 2001
From: Brandon Casey <casey@nrlssc.navy.mil>
Date: Thu, 18 Sep 2008 17:44:33 -0500
Subject: [PATCH 03/16] diff.*.xfuncname which uses "extended" regex's for hunk
 header selection

Currently, the hunk headers produced by 'diff -p' are customizable by
setting the diff.*.funcname option in the config file. The 'funcname' option
takes a basic regular expression. This functionality was designed using the
GNU regex library which, by default, allows using backslashed versions of
some extended regular expression operators, even in Basic Regular Expression
mode. For example, the following characters, when backslashed, are
interpreted according to the extended regular expression rules: ?, +, and |.
As such, the builtin funcname patterns were created using some extended
regular expression operators.

Other platforms which adhere more strictly to the POSIX spec do not
interpret the backslashed extended RE operators in Basic Regular Expression
mode. This causes the pattern matching for the builtin funcname patterns to
fail on those platforms.

Introduce a new option 'xfuncname' which uses extended regular expressions,
and advertise it _instead_ of funcname. Since most users are on GNU
platforms, the majority of funcname patterns are created and tested there.
Advertising only xfuncname should help to avoid the creation of non-portable
patterns which work with GNU regex but not elsewhere.

Additionally, the extended regular expressions may be less ugly and
complicated compared to the basic RE since many common special operators do
not need to be backslashed.

For example, the GNU Basic RE:

    ^[ 	]*\\(\\(public\\|static\\).*\\)$

becomes the following Extended RE:

    ^[ 	]*((public|static).*)$

Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/gitattributes.txt | 4 ++--
 diff.c                          | 5 +++++
 t/t4018-diff-funcname.sh        | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 94e6752aa2..9259637609 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -288,13 +288,13 @@ for paths.
 *.tex	diff=tex
 ------------------------
 
-Then, you would define "diff.tex.funcname" configuration to
+Then, you would define "diff.tex.xfuncname" configuration to
 specify a regular expression that matches a line that you would
 want to appear as the hunk header, like this:
 
 ------------------------
 [diff "tex"]
-	funcname = "^\\(\\\\\\(sub\\)*section{.*\\)$"
+	xfuncname = "^(\\\\(sub)*section\\{.*)$"
 ------------------------
 
 Note.  A single level of backslashes are eaten by the
diff --git a/diff.c b/diff.c
index 6881cf4efa..dabb4b4a02 100644
--- a/diff.c
+++ b/diff.c
@@ -189,6 +189,11 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
 					return config_error_nonbool(var);
 				return parse_funcname_pattern(var, ep, value,
 					0);
+			} else if (!strcmp(ep, ".xfuncname")) {
+				if (!value)
+					return config_error_nonbool(var);
+				return parse_funcname_pattern(var, ep, value,
+					REG_EXTENDED);
 			}
 		}
 	}
diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh
index 18bcd9713d..602d68f092 100755
--- a/t/t4018-diff-funcname.sh
+++ b/t/t4018-diff-funcname.sh
@@ -58,7 +58,7 @@ test_expect_success 'last regexp must not be negated' '
 '
 
 test_expect_success 'alternation in pattern' '
-	git config diff.java.funcname "^[ 	]*\\(\\(public\\|static\\).*\\)$"
+	git config diff.java.xfuncname "^[ 	]*((public|static).*)$" &&
 	git diff --no-index Beer.java Beer-correct.java |
 	grep "^@@.*@@ public static void main("
 '

From 6a6baf9b4e819a0bbfd70627f966cd7144dd8301 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Fri, 19 Sep 2008 23:45:04 -0700
Subject: [PATCH 04/16] diff: use extended regexp to find hunk headers

Using ERE elements such as "|" (alternation) by backquoting in BRE
is a GNU extension and should not be done in portable programs.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/diff.c b/diff.c
index dabb4b4a02..175a044a34 100644
--- a/diff.c
+++ b/diff.c
@@ -1399,20 +1399,23 @@ static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
 }
 
 static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
-	{ "java", "!^[ 	]*\\(catch\\|do\\|for\\|if\\|instanceof\\|"
-			"new\\|return\\|switch\\|throw\\|while\\)\n"
-			"^[ 	]*\\(\\([ 	]*"
-			"[A-Za-z_][A-Za-z_0-9]*\\)\\{2,\\}"
-			"[ 	]*([^;]*\\)$", 0 },
-	{ "pascal", "^\\(\\(procedure\\|function\\|constructor\\|"
-			"destructor\\|interface\\|implementation\\|"
-			"initialization\\|finalization\\)[ \t]*.*\\)$"
-			"\\|"
-			"^\\(.*=[ \t]*\\(class\\|record\\).*\\)$",
-			0 },
-	{ "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$", 0 },
-	{ "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$", 0 },
-	{ "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$", 0 },
+	{ "java",
+	  "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
+	  "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
+	  REG_EXTENDED },
+	{ "pascal",
+	  "^((procedure|function|constructor|destructor|interface|"
+		"implementation|initialization|finalization)[ \t]*.*)$"
+	  "|"
+	  "^(.*=[ \t]*(class|record).*)$",
+	  REG_EXTENDED },
+	{ "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
+	  REG_EXTENDED },
+	{ "tex",
+	  "^(\\\\((sub)*section|chapter|part)\\*{0,1}\{.*)$",
+	  REG_EXTENDED },
+	{ "ruby", "^[ \t]*((class|module|def)[ \t].*)$",
+	  REG_EXTENDED },
 };
 
 static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one)

From 1883a0d3b7ad7c9de1ac790bda6f1a6181237439 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Fri, 19 Sep 2008 23:52:49 -0700
Subject: [PATCH 05/16] diff: use extended regexp to find hunk headers

Using ERE elements such as "|" (alternation) by backquoting in BRE
is a GNU extension and should not be done in portable programs.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/diff.c b/diff.c
index 5b9b074856..a733010170 100644
--- a/diff.c
+++ b/diff.c
@@ -1406,7 +1406,7 @@ static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
 static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
 	{ "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
 	  REG_EXTENDED },
-	{ "html", "^\\s*\\(<[Hh][1-6]\\s.*>.*\\)$", 0 },
+	{ "html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", REG_EXTENDED },
 	{ "java",
 	  "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
 	  "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
@@ -1417,8 +1417,8 @@ static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
 	  "|"
 	  "^(.*=[ \t]*(class|record).*)$",
 	  REG_EXTENDED },
-	{ "php", "^[\t ]*\\(\\(function\\|class\\).*\\)", 0 },
-	{ "python", "^\\s*\\(\\(class\\|def\\)\\s.*\\)$", 0 },
+	{ "php", "^[\t ]*((function|class).*)", REG_EXTENDED },
+	{ "python", "^[ \t]*((class|def)[ \t].*)$", REG_EXTENDED },
 	{ "ruby", "^[ \t]*((class|module|def)[ \t].*)$",
 	  REG_EXTENDED },
 	{ "tex",

From 3d8dccd74aa29a9019c4e8b52e75a40189e6f5cb Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Sat, 20 Sep 2008 00:52:11 -0700
Subject: [PATCH 06/16] diff: fix "multiple regexp" semantics to find hunk
 header comment

When multiple regular expressions are concatenated with "\n", they were
traditionally AND'ed together, and only a line that matches _all_ of them
is taken as a match.  This however is unwieldy when multiple regexp
feature is used to specify alternatives.

This fixes the semantics to take the first match.  A nagative pattern, if
matches, makes the line to fail as before.  A match with a positive
pattern will be the final match, and what it captures in $1 is used as the
hunk header comment.

We could write alternatives using "|" in ERE, but the machinery can only
use captured $1 as the hunk header comment (or $0 if there is no match in
$1), so you cannot write:

    "junk ( A | B ) | garbage ( C | D )"

and expect both "junk" and "garbage" to get stripped with the existing
code.  With this fix, you can write it as:

    "junk ( A | B ) \n garbage ( C | D )"

and the way capture works would match the user expectation more
naturally.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c            |  2 +-
 xdiff-interface.c | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/diff.c b/diff.c
index a733010170..1bcbbd5bb1 100644
--- a/diff.c
+++ b/diff.c
@@ -1414,7 +1414,7 @@ static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
 	{ "pascal",
 	  "^((procedure|function|constructor|destructor|interface|"
 		"implementation|initialization|finalization)[ \t]*.*)$"
-	  "|"
+	  "\n"
 	  "^(.*=[ \t]*(class|record).*)$",
 	  REG_EXTENDED },
 	{ "php", "^[\t ]*((function|class).*)", REG_EXTENDED },
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 7f1a7d3ffc..6c6bb19973 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -194,26 +194,29 @@ static long ff_regexp(const char *line, long len,
 	char *line_buffer = xstrndup(line, len); /* make NUL terminated */
 	struct ff_regs *regs = priv;
 	regmatch_t pmatch[2];
-	int result = 0, i;
+	int i;
+	int result = -1;
 
 	for (i = 0; i < regs->nr; i++) {
 		struct ff_reg *reg = regs->array + i;
-		if (reg->negate ^ !!regexec(&reg->re,
-					line_buffer, 2, pmatch, 0)) {
-			free(line_buffer);
-			return -1;
+		if (!regexec(&reg->re, line_buffer, 2, pmatch, 0)) {
+			if (reg->negate)
+				goto fail;
+			break;
 		}
 	}
+	if (regs->nr <= i)
+		goto fail;
 	i = pmatch[1].rm_so >= 0 ? 1 : 0;
 	line += pmatch[i].rm_so;
 	result = pmatch[i].rm_eo - pmatch[i].rm_so;
 	if (result > buffer_size)
 		result = buffer_size;
 	else
-		while (result > 0 && (isspace(line[result - 1]) ||
-					line[result - 1] == '\n'))
+		while (result > 0 && (isspace(line[result - 1])))
 			result--;
 	memcpy(buffer, line, result);
+ fail:
 	free(line_buffer);
 	return result;
 }

From 96d1a8e9d44fd635fad8466dbe0aab6d73495c9f Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Sat, 20 Sep 2008 15:30:12 -0700
Subject: [PATCH 07/16] diff hunk pattern: fix misconverted "\{" tex macro
 introducers

Pointed out by Brandon Casey.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 175a044a34..a283738616 100644
--- a/diff.c
+++ b/diff.c
@@ -1409,10 +1409,10 @@ static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
 	  "|"
 	  "^(.*=[ \t]*(class|record).*)$",
 	  REG_EXTENDED },
-	{ "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
+	{ "bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
 	  REG_EXTENDED },
 	{ "tex",
-	  "^(\\\\((sub)*section|chapter|part)\\*{0,1}\{.*)$",
+	  "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
 	  REG_EXTENDED },
 	{ "ruby", "^[ \t]*((class|module|def)[ \t].*)$",
 	  REG_EXTENDED },

From 7d20e2189e19f51662353ea853891e00882d29cc Mon Sep 17 00:00:00 2001
From: Michael J Gruber <git@drmicha.warpmail.net>
Date: Mon, 22 Sep 2008 10:57:51 +0200
Subject: [PATCH 08/16] make "git remote" report multiple URLs

This patch makes "git remote -v" and "git remote show" report multiple URLs
rather than warn about them. Multiple URLs are OK for pushing into
multiple repos simultaneously. Without "-v" each repo is shown once only.

Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin-remote.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/builtin-remote.c b/builtin-remote.c
index 01945a8651..1e2edc2050 100644
--- a/builtin-remote.c
+++ b/builtin-remote.c
@@ -652,10 +652,13 @@ static int get_one_entry(struct remote *remote, void *priv)
 {
 	struct string_list *list = priv;
 
-	string_list_append(remote->name, list)->util = remote->url_nr ?
-		(void *)remote->url[0] : NULL;
-	if (remote->url_nr > 1)
-		warning("Remote %s has more than one URL", remote->name);
+	if (remote->url_nr > 0) {
+		int i;
+
+		for (i = 0; i < remote->url_nr; i++)
+			string_list_append(remote->name, list)->util = (void *)remote->url[i];
+	} else
+		string_list_append(remote->name, list)->util = NULL;
 
 	return 0;
 }
@@ -671,10 +674,14 @@ static int show_all(void)
 		sort_string_list(&list);
 		for (i = 0; i < list.nr; i++) {
 			struct string_list_item *item = list.items + i;
-			printf("%s%s%s\n", item->string,
-				verbose ? "\t" : "",
-				verbose && item->util ?
-					(const char *)item->util : "");
+			if (verbose)
+				printf("%s\t%s\n", item->string,
+					item->util ? (const char *)item->util : "");
+			else {
+				if (i && !strcmp((item - 1)->string, item->string))
+					continue;
+				printf("%s\n", item->string);
+			}
 		}
 	}
 	return result;

From e3bf5e43fd9db9391ebc876ef118dbb431853d69 Mon Sep 17 00:00:00 2001
From: Brandon Casey <casey@nrlssc.navy.mil>
Date: Mon, 22 Sep 2008 18:19:05 -0500
Subject: [PATCH 09/16] t4018-diff-funcname: test syntax of builtin xfuncname
 patterns

Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t4018-diff-funcname.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh
index 602d68f092..99fff973eb 100755
--- a/t/t4018-diff-funcname.sh
+++ b/t/t4018-diff-funcname.sh
@@ -32,7 +32,18 @@ EOF
 
 sed 's/beer\\/beer,\\/' < Beer.java > Beer-correct.java
 
+builtin_patterns="bibtex java pascal ruby tex"
+for p in $builtin_patterns
+do
+	test_expect_success "builtin $p pattern compiles" '
+		echo "*.java diff=$p" > .gitattributes &&
+		! ( git diff --no-index Beer.java Beer-correct.java 2>&1 |
+			grep "fatal" > /dev/null )
+	'
+done
+
 test_expect_success 'default behaviour' '
+	rm -f .gitattributes &&
 	git diff --no-index Beer.java Beer-correct.java |
 	grep "^@@.*@@ public class Beer"
 '

From fdac6692a0b0eda293f9f1bf4bc49b05b29f3c45 Mon Sep 17 00:00:00 2001
From: Brandon Casey <casey@nrlssc.navy.mil>
Date: Mon, 22 Sep 2008 18:26:20 -0500
Subject: [PATCH 10/16] t4018-diff-funcname: test syntax of builtin xfuncname
 patterns

[jc: fixes bibtex pattern breakage exposed by this test]

Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c                   | 2 +-
 t/t4018-diff-funcname.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 0f98bff46b..05dd8f0b56 100644
--- a/diff.c
+++ b/diff.c
@@ -1404,7 +1404,7 @@ static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
 }
 
 static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
-	{ "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
+	{ "bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
 	  REG_EXTENDED },
 	{ "html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", REG_EXTENDED },
 	{ "java",
diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh
index 99fff973eb..520e095c59 100755
--- a/t/t4018-diff-funcname.sh
+++ b/t/t4018-diff-funcname.sh
@@ -32,7 +32,7 @@ EOF
 
 sed 's/beer\\/beer,\\/' < Beer.java > Beer-correct.java
 
-builtin_patterns="bibtex java pascal ruby tex"
+builtin_patterns="bibtex html java pascal php python ruby tex"
 for p in $builtin_patterns
 do
 	test_expect_success "builtin $p pattern compiles" '

From 2a79d2f662a5a82a6cfcfd2a0d980e7b1875f320 Mon Sep 17 00:00:00 2001
From: Stephen Haberman <stephen@exigencecorp.com>
Date: Mon, 29 Sep 2008 04:12:04 -0500
Subject: [PATCH 11/16] Clarify how the user can satisfy stash's 'dirty state'
 check.

Signed-off-by: Stephen Haberman <stephen@exigencecorp.com>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 git-stash.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/git-stash.sh b/git-stash.sh
index 6bd2572f77..42f626f9d5 100755
--- a/git-stash.sh
+++ b/git-stash.sh
@@ -161,7 +161,7 @@ show_stash () {
 apply_stash () {
 	git update-index -q --refresh &&
 	git diff-files --quiet --ignore-submodules ||
-		die 'Cannot restore on top of a dirty state'
+		die 'Cannot apply to a dirty working tree, please stage your changes'
 
 	unstash_index=
 	case "$1" in

From b9b378a001d35a64a30a652a45f8084ee2be6cdf Mon Sep 17 00:00:00 2001
From: Ping Yin <pkufranky@gmail.com>
Date: Fri, 26 Sep 2008 23:33:23 +0800
Subject: [PATCH 12/16] git-submodule: Fix "Unable to checkout" for the initial
 'update'

Since commit 55218("checkout: do not lose staged removal"), in
cmd_add/cmd_update, "git checkout <commit>" following
"git clone -n" may fail if <commit> is different from HEAD.

So Use "git checkout -f <commit>" to fix this.

Signed-off-by: Ping Yin <pkufranky@gmail.com>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 git-submodule.sh | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/git-submodule.sh b/git-submodule.sh
index b40f876a2c..5888735e4f 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -185,7 +185,7 @@ cmd_add()
 	else
 
 		module_clone "$path" "$realrepo" || exit
-		(unset GIT_DIR; cd "$path" && git checkout -q ${branch:+-b "$branch" "origin/$branch"}) ||
+		(unset GIT_DIR; cd "$path" && git checkout -f -q ${branch:+-b "$branch" "origin/$branch"}) ||
 		die "Unable to checkout submodule '$path'"
 	fi
 
@@ -311,8 +311,13 @@ cmd_update()
 
 		if test "$subsha1" != "$sha1"
 		then
+			force=
+			if test -z "$subsha1"
+			then
+				force="-f"
+			fi
 			(unset GIT_DIR; cd "$path" && git-fetch &&
-				git-checkout -q "$sha1") ||
+				git-checkout $force -q "$sha1") ||
 			die "Unable to checkout '$sha1' in submodule path '$path'"
 
 			say "Submodule path '$path': checked out '$sha1'"

From 4a92d1bfb784b09641180d164e7d719080165dc4 Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Sat, 27 Sep 2008 00:56:46 +0200
Subject: [PATCH 13/16] Add remove_path: a function to remove as much as
 possible of a path

The function has two potential users which both managed to get wrong
their implementations (the one in builtin-rm.c one has a memleak, and
builtin-merge-recursive.c scribles over its const argument).

Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 dir.c | 20 ++++++++++++++++++++
 dir.h |  3 +++
 2 files changed, 23 insertions(+)

diff --git a/dir.c b/dir.c
index 109e05b013..cfaa28ff23 100644
--- a/dir.c
+++ b/dir.c
@@ -837,3 +837,23 @@ void setup_standard_excludes(struct dir_struct *dir)
 	if (excludes_file && !access(excludes_file, R_OK))
 		add_excludes_from_file(dir, excludes_file);
 }
+
+int remove_path(const char *name)
+{
+	char *slash;
+
+	if (unlink(name) && errno != ENOENT)
+		return -1;
+
+	slash = strrchr(name, '/');
+	if (slash) {
+		char *dirs = xstrdup(name);
+		slash = dirs + (slash - name);
+		do {
+			*slash = '\0';
+		} while (rmdir(dirs) && (slash = strrchr(dirs, '/')));
+		free(dirs);
+	}
+	return 0;
+}
+
diff --git a/dir.h b/dir.h
index 2df15defb6..278ee42295 100644
--- a/dir.h
+++ b/dir.h
@@ -81,4 +81,7 @@ extern int is_inside_dir(const char *dir);
 extern void setup_standard_excludes(struct dir_struct *dir);
 extern int remove_dir_recursively(struct strbuf *path, int only_empty);
 
+/* tries to remove the path with empty directories along it, ignores ENOENT */
+extern int remove_path(const char *path);
+
 #endif

From 175a494823c1eacd4da9c4a5b90ec0668f7051c0 Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Sat, 27 Sep 2008 00:59:14 +0200
Subject: [PATCH 14/16] Use remove_path from dir.c instead of own
 implementation

Besides, it fixes a memleak (builtin-rm.c) and accidental change of
the input const argument (builtin-merge-recursive.c).

Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 builtin-apply.c           | 11 ++---------
 builtin-merge-recursive.c | 21 ++-------------------
 builtin-rm.c              | 22 +---------------------
 3 files changed, 5 insertions(+), 49 deletions(-)

diff --git a/builtin-apply.c b/builtin-apply.c
index 20bef1f21d..70c9f93554 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -13,6 +13,7 @@
 #include "delta.h"
 #include "builtin.h"
 #include "string-list.h"
+#include "dir.h"
 
 /*
  *  --check turns on checking that the working tree matches the
@@ -2735,15 +2736,7 @@ static void remove_file(struct patch *patch, int rmdir_empty)
 				warning("unable to remove submodule %s",
 					patch->old_name);
 		} else if (!unlink(patch->old_name) && rmdir_empty) {
-			char *name = xstrdup(patch->old_name);
-			char *end = strrchr(name, '/');
-			while (end) {
-				*end = 0;
-				if (rmdir(name))
-					break;
-				end = strrchr(name, '/');
-			}
-			free(name);
+			remove_path(patch->old_name);
 		}
 	}
 }
diff --git a/builtin-merge-recursive.c b/builtin-merge-recursive.c
index f628a62751..b9738655ad 100644
--- a/builtin-merge-recursive.c
+++ b/builtin-merge-recursive.c
@@ -18,6 +18,7 @@
 #include "ll-merge.h"
 #include "interpolate.h"
 #include "attr.h"
+#include "dir.h"
 #include "merge-recursive.h"
 
 static int subtree_merge;
@@ -416,24 +417,6 @@ static int update_stages(const char *path, struct diff_filespec *o,
 	return 0;
 }
 
-static int remove_path(const char *name)
-{
-	int ret;
-	char *slash, *dirs;
-
-	ret = unlink(name);
-	if (ret)
-		return ret;
-	dirs = xstrdup(name);
-	while ((slash = strrchr(name, '/'))) {
-		*slash = '\0';
-		if (rmdir(name) != 0)
-			break;
-	}
-	free(dirs);
-	return ret;
-}
-
 static int remove_file(int clean, const char *path, int no_wd)
 {
 	int update_cache = index_only || clean;
@@ -444,7 +427,7 @@ static int remove_file(int clean, const char *path, int no_wd)
 			return -1;
 	}
 	if (update_working_directory) {
-		if (remove_path(path) && errno != ENOENT)
+		if (remove_path(path))
 			return -1;
 	}
 	return 0;
diff --git a/builtin-rm.c b/builtin-rm.c
index fdac34f242..50ae6d5401 100644
--- a/builtin-rm.c
+++ b/builtin-rm.c
@@ -29,26 +29,6 @@ static void add_list(const char *name)
 	list.name[list.nr++] = name;
 }
 
-static int remove_file(const char *name)
-{
-	int ret;
-	char *slash;
-
-	ret = unlink(name);
-	if (ret && errno == ENOENT)
-		/* The user has removed it from the filesystem by hand */
-		ret = errno = 0;
-
-	if (!ret && (slash = strrchr(name, '/'))) {
-		char *n = xstrdup(name);
-		do {
-			n[slash - name] = 0;
-			name = n;
-		} while (!rmdir(name) && (slash = strrchr(name, '/')));
-	}
-	return ret;
-}
-
 static int check_local_mod(unsigned char *head, int index_only)
 {
 	/* items in list are already sorted in the cache order,
@@ -239,7 +219,7 @@ int cmd_rm(int argc, const char **argv, const char *prefix)
 		int removed = 0;
 		for (i = 0; i < list.nr; i++) {
 			const char *path = list.name[i];
-			if (!remove_file(path)) {
+			if (!remove_path(path)) {
 				removed = 1;
 				continue;
 			}

From 94e02e7f3b0504243b364ad04b58332bb1502934 Mon Sep 17 00:00:00 2001
From: Johan Herland <johan@herland.net>
Date: Sun, 28 Sep 2008 00:24:36 +0200
Subject: [PATCH 15/16] Use strchrnul() instead of strchr() plus manual
 workaround

Also gets rid of a C++ comment.

Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 builtin-for-each-ref.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/builtin-for-each-ref.c b/builtin-for-each-ref.c
index be9dc9e3f0..72c087840c 100644
--- a/builtin-for-each-ref.c
+++ b/builtin-for-each-ref.c
@@ -320,9 +320,7 @@ static const char *find_wholine(const char *who, int wholen, const char *buf, un
 
 static const char *copy_line(const char *buf)
 {
-	const char *eol = strchr(buf, '\n');
-	if (!eol) // simulate strchrnul()
-		eol = buf + strlen(buf);
+	const char *eol = strchrnul(buf, '\n');
 	return xmemdupz(buf, eol - buf);
 }
 

From cbce6c0be30cbaf8c39051f85006d679338a8ddc Mon Sep 17 00:00:00 2001
From: Rafael Garcia-Suarez <rgarciasuarez@gmail.com>
Date: Mon, 29 Sep 2008 18:04:41 +0200
Subject: [PATCH 16/16] Clarify commit error message for unmerged files

Currently, trying to use git-commit with unmerged files in the index
will show the message "Error building trees", which can be a bit
obscure to the end user. This patch makes the error message clearer, and
consistent with what git-write-tree reports in a similar situation.

Signed-off-by: Rafael Garcia-Suarez <rgarciasuarez@gmail.com>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 builtin-commit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin-commit.c b/builtin-commit.c
index c870037b07..e2a7e48b1c 100644
--- a/builtin-commit.c
+++ b/builtin-commit.c
@@ -639,7 +639,7 @@ static int prepare_to_commit(const char *index_file, const char *prefix)
 		active_cache_tree = cache_tree();
 	if (cache_tree_update(active_cache_tree,
 			      active_cache, active_nr, 0, 0) < 0) {
-		error("Error building trees");
+		error("Error building trees; the index is unmerged?");
 		return 0;
 	}