Skip to content

Commit

Permalink
Merge pull request libgit2#1595 from arrbee/even-more-rename-fixes
Browse files Browse the repository at this point in the history
Even more rename detection fixes
  • Loading branch information
Vicent Martí committed May 24, 2013
2 parents 87a56fe + 49f70f2 commit 30caf0c
Show file tree
Hide file tree
Showing 16 changed files with 1,109 additions and 284 deletions.
50 changes: 44 additions & 6 deletions examples/diff.c
Expand Up @@ -84,6 +84,10 @@ static int check_uint16_param(const char *arg, const char *pattern, uint16_t *va
char *endptr = NULL;
if (strncmp(arg, pattern, len))
return 0;
if (arg[len] == '\0' && pattern[len - 1] != '=')
return 1;
if (arg[len] == '=')
len++;
strval = strtoul(arg + len, &endptr, 0);
if (endptr == arg)
return 0;
Expand All @@ -110,13 +114,20 @@ static void usage(const char *message, const char *arg)
exit(1);
}

enum {
FORMAT_PATCH = 0,
FORMAT_COMPACT = 1,
FORMAT_RAW = 2
};

int main(int argc, char *argv[])
{
git_repository *repo = NULL;
git_tree *t1 = NULL, *t2 = NULL;
git_diff_options opts = GIT_DIFF_OPTIONS_INIT;
git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT;
git_diff_list *diff;
int i, color = -1, compact = 0, cached = 0;
int i, color = -1, format = FORMAT_PATCH, cached = 0;
char *a, *treeish1 = NULL, *treeish2 = NULL;
const char *dir = ".";

Expand All @@ -137,11 +148,13 @@ int main(int argc, char *argv[])
}
else if (!strcmp(a, "-p") || !strcmp(a, "-u") ||
!strcmp(a, "--patch"))
compact = 0;
format = FORMAT_PATCH;
else if (!strcmp(a, "--cached"))
cached = 1;
else if (!strcmp(a, "--name-status"))
compact = 1;
format = FORMAT_COMPACT;
else if (!strcmp(a, "--raw"))
format = FORMAT_RAW;
else if (!strcmp(a, "--color"))
color = 0;
else if (!strcmp(a, "--no-color"))
Expand All @@ -160,6 +173,20 @@ int main(int argc, char *argv[])
opts.flags |= GIT_DIFF_INCLUDE_IGNORED;
else if (!strcmp(a, "--untracked"))
opts.flags |= GIT_DIFF_INCLUDE_UNTRACKED;
else if (check_uint16_param(a, "-M", &findopts.rename_threshold) ||
check_uint16_param(a, "--find-renames",
&findopts.rename_threshold))
findopts.flags |= GIT_DIFF_FIND_RENAMES;
else if (check_uint16_param(a, "-C", &findopts.copy_threshold) ||
check_uint16_param(a, "--find-copies",
&findopts.copy_threshold))
findopts.flags |= GIT_DIFF_FIND_COPIES;
else if (!strcmp(a, "--find-copies-harder"))
findopts.flags |= GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED;
else if (!strncmp(a, "-B", 2) || !strncmp(a, "--break-rewrites", 16)) {
/* TODO: parse thresholds */
findopts.flags |= GIT_DIFF_FIND_REWRITES;
}
else if (!check_uint16_param(a, "-U", &opts.context_lines) &&
!check_uint16_param(a, "--unified=", &opts.context_lines) &&
!check_uint16_param(a, "--inter-hunk-context=",
Expand Down Expand Up @@ -204,13 +231,24 @@ int main(int argc, char *argv[])
else
check(git_diff_index_to_workdir(&diff, repo, NULL, &opts), "Diff");

if ((findopts.flags & GIT_DIFF_FIND_ALL) != 0)
check(git_diff_find_similar(diff, &findopts),
"finding renames and copies ");

if (color >= 0)
fputs(colors[0], stdout);

if (compact)
check(git_diff_print_compact(diff, printer, &color), "Displaying diff");
else
switch (format) {
case FORMAT_PATCH:
check(git_diff_print_patch(diff, printer, &color), "Displaying diff");
break;
case FORMAT_COMPACT:
check(git_diff_print_compact(diff, printer, &color), "Displaying diff");
break;
case FORMAT_RAW:
check(git_diff_print_raw(diff, printer, &color), "Displaying diff");
break;
}

if (color >= 0)
fputs(colors[0], stdout);
Expand Down
70 changes: 56 additions & 14 deletions include/git2/diff.h
Expand Up @@ -243,6 +243,19 @@ typedef struct {
* `NOT_BINARY` flag set to avoid examining file contents if you do not pass
* in hunk and/or line callbacks to the diff foreach iteration function. It
* will just use the git attributes for those files.
*
* The similarity score is zero unless you call `git_diff_find_similar()`
* which does a similarity analysis of files in the diff. Use that
* function to do rename and copy detection, and to split heavily modified
* files in add/delete pairs. After that call, deltas with a status of
* GIT_DELTA_RENAMED or GIT_DELTA_COPIED will have a similarity score
* between 0 and 100 indicating how similar the old and new sides are.
*
* If you ask `git_diff_find_similar` to find heavily modified files to
* break, but to not *actually* break the records, then GIT_DELTA_MODIFIED
* records may have a non-zero similarity score if the self-similarity is
* below the split threshold. To display this value like core Git, invert
* the score (a la `printf("M%03d", 100 - delta->similarity)`).
*/
typedef struct {
git_diff_file old_file;
Expand Down Expand Up @@ -408,18 +421,28 @@ typedef enum {
/** consider unmodified as copy sources? (`--find-copies-harder`) */
GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1 << 3),

/** split large rewrites into delete/add pairs (`--break-rewrites=/M`) */
GIT_DIFF_FIND_AND_BREAK_REWRITES = (1 << 4),
/** mark large rewrites for split (`--break-rewrites=/M`) */
GIT_DIFF_FIND_REWRITES = (1 << 4),
/** actually split large rewrites into delete/add pairs */
GIT_DIFF_BREAK_REWRITES = (1 << 5),
/** mark rewrites for split and break into delete/add pairs */
GIT_DIFF_FIND_AND_BREAK_REWRITES =
(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),

/** find renames/copies for untracked items in working directory */
GIT_DIFF_FIND_FOR_UNTRACKED = (1 << 6),

/** turn on all finding features */
GIT_DIFF_FIND_ALL = (0x1f),
GIT_DIFF_FIND_ALL = (0x0ff),

/** measure similarity ignoring leading whitespace (default) */
GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0,
/** measure similarity ignoring all whitespace */
GIT_DIFF_FIND_IGNORE_WHITESPACE = (1 << 6),
GIT_DIFF_FIND_IGNORE_WHITESPACE = (1 << 12),
/** measure similarity including all data */
GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1 << 7),
GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1 << 13),
/** measure similarity only by comparing SHAs (fast and cheap) */
GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1 << 14),
} git_diff_find_t;

/**
Expand All @@ -446,7 +469,10 @@ typedef struct {
* - `copy_threshold` is the same as the -C option with a value
* - `rename_from_rewrite_threshold` matches the top of the -B option
* - `break_rewrite_threshold` matches the bottom of the -B option
* - `target_limit` matches the -l option
* - `rename_limit` is the maximum number of matches to consider for
* a particular file. This is a little different from the `-l` option
* to regular Git because we will still process up to this many matches
* before abandoning the search.
*
* The `metric` option allows you to plug in a custom similarity metric.
* Set it to NULL for the default internal metric which is based on sampling
Expand All @@ -458,21 +484,21 @@ typedef struct {
unsigned int version;

/** Combination of git_diff_find_t values (default FIND_RENAMES) */
unsigned int flags;
uint32_t flags;

/** Similarity to consider a file renamed (default 50) */
unsigned int rename_threshold;
uint16_t rename_threshold;
/** Similarity of modified to be eligible rename source (default 50) */
unsigned int rename_from_rewrite_threshold;
uint16_t rename_from_rewrite_threshold;
/** Similarity to consider a file a copy (default 50) */
unsigned int copy_threshold;
uint16_t copy_threshold;
/** Similarity to split modify into delete/add pair (default 60) */
unsigned int break_rewrite_threshold;
uint16_t break_rewrite_threshold;

/** Maximum similarity sources to examine (a la diff's `-l` option or
* the `diff.renameLimit` config) (default 200)
/** Maximum similarity sources to examine for a file (somewhat like
* git-diff's `-l` option or `diff.renameLimit` config) (default 200)
*/
unsigned int target_limit;
size_t rename_limit;

/** Pluggable similarity metric; pass NULL to use internal metric */
git_diff_similarity_metric *metric;
Expand Down Expand Up @@ -688,6 +714,22 @@ GIT_EXTERN(int) git_diff_print_compact(
git_diff_data_cb print_cb,
void *payload);

/**
* Iterate over a diff generating text output like "git diff --raw".
*
* Returning a non-zero value from the callbacks will terminate the
* iteration and cause this return `GIT_EUSER`.
*
* @param diff A git_diff_list generated by one of the above functions.
* @param print_cb Callback to make per line of diff text.
* @param payload Reference pointer that will be passed to your callback.
* @return 0 on success, GIT_EUSER on non-zero callback, or error code
*/
GIT_EXTERN(int) git_diff_print_raw(
git_diff_list *diff,
git_diff_data_cb print_cb,
void *payload);

/**
* Look up the single character abbreviation for a delta status code.
*
Expand Down
21 changes: 17 additions & 4 deletions include/git2/oid.h
Expand Up @@ -89,6 +89,17 @@ GIT_EXTERN(void) git_oid_fromraw(git_oid *out, const unsigned char *raw);
*/
GIT_EXTERN(void) git_oid_fmt(char *out, const git_oid *id);

/**
* Format a git_oid into a partial hex string.
*
* @param out output hex string; you say how many bytes to write.
* If the number of bytes is > GIT_OID_HEXSZ, extra bytes
* will be zeroed; if not, a '\0' terminator is NOT added.
* @param n number of characters to write into out string
* @param oid oid structure to format.
*/
GIT_EXTERN(void) git_oid_nfmt(char *out, size_t n, const git_oid *id);

/**
* Format a git_oid into a loose-object path string.
*
Expand Down Expand Up @@ -117,10 +128,12 @@ GIT_EXTERN(char *) git_oid_allocfmt(const git_oid *id);
* Format a git_oid into a buffer as a hex format c-string.
*
* If the buffer is smaller than GIT_OID_HEXSZ+1, then the resulting
* oid c-string will be truncated to n-1 characters. If there are
* any input parameter errors (out == NULL, n == 0, oid == NULL),
* then a pointer to an empty string is returned, so that the return
* value can always be printed.
* oid c-string will be truncated to n-1 characters (but will still be
* NUL-byte terminated).
*
* If there are any input parameter errors (out == NULL, n == 0, oid ==
* NULL), then a pointer to an empty string is returned, so that the
* return value can always be printed.
*
* @param out the buffer into which the oid string is output.
* @param n the size of the out buffer.
Expand Down
29 changes: 11 additions & 18 deletions src/checkout.c
Expand Up @@ -676,33 +676,26 @@ static int buffer_to_file(
int file_open_flags,
mode_t file_mode)
{
int fd, error;
int error;

if ((error = git_futils_mkpath2file(path, dir_mode)) < 0)
return error;

if ((fd = p_open(path, file_open_flags, file_mode)) < 0) {
giterr_set(GITERR_OS, "Could not open '%s' for writing", path);
return fd;
}

if ((error = p_write(fd, git_buf_cstr(buffer), git_buf_len(buffer))) < 0) {
giterr_set(GITERR_OS, "Could not write to '%s'", path);
(void)p_close(fd);
} else {
if ((error = p_close(fd)) < 0)
giterr_set(GITERR_OS, "Error while closing '%s'", path);
if ((error = git_futils_writebuffer(
buffer, path, file_open_flags, file_mode)) < 0)
return error;

if ((error = p_stat(path, st)) < 0)
giterr_set(GITERR_OS, "Error while statting '%s'", path);
if (st != NULL && (error = p_stat(path, st)) < 0) {
giterr_set(GITERR_OS, "Error while statting '%s'", path);
return error;
}

if (!error &&
(file_mode & 0100) != 0 &&
(error = p_chmod(path, file_mode)) < 0)
if ((file_mode & 0100) != 0 && (error = p_chmod(path, file_mode)) < 0) {
giterr_set(GITERR_OS, "Failed to set permissions on '%s'", path);
return error;
}

return error;
return 0;
}

static int blob_content_to_file(
Expand Down
15 changes: 14 additions & 1 deletion src/diff.c
Expand Up @@ -231,10 +231,23 @@ static char *diff_strdup_prefix(git_pool *pool, const char *prefix)
return git_pool_strndup(pool, prefix, len + 1);
}

GIT_INLINE(const char *) diff_delta__path(const git_diff_delta *delta)
{
const char *str = delta->old_file.path;

if (!str ||
delta->status == GIT_DELTA_ADDED ||
delta->status == GIT_DELTA_RENAMED ||
delta->status == GIT_DELTA_COPIED)
str = delta->new_file.path;

return str;
}

int git_diff_delta__cmp(const void *a, const void *b)
{
const git_diff_delta *da = a, *db = b;
int val = strcmp(da->old_file.path, db->old_file.path);
int val = strcmp(diff_delta__path(da), diff_delta__path(db));
return val ? val : ((int)da->status - (int)db->status);
}

Expand Down
12 changes: 10 additions & 2 deletions src/diff.h
Expand Up @@ -34,10 +34,18 @@ enum {
GIT_DIFF_FLAG__FREE_DATA = (1 << 8), /* internal file data is allocated */
GIT_DIFF_FLAG__UNMAP_DATA = (1 << 9), /* internal file data is mmap'ed */
GIT_DIFF_FLAG__NO_DATA = (1 << 10), /* file data should not be loaded */
GIT_DIFF_FLAG__TO_DELETE = (1 << 11), /* delete entry during rename det. */
GIT_DIFF_FLAG__TO_SPLIT = (1 << 12), /* split entry during rename det. */

GIT_DIFF_FLAG__TO_DELETE = (1 << 16), /* delete entry during rename det. */
GIT_DIFF_FLAG__TO_SPLIT = (1 << 17), /* split entry during rename det. */
GIT_DIFF_FLAG__IS_RENAME_TARGET = (1 << 18),
GIT_DIFF_FLAG__IS_RENAME_SOURCE = (1 << 19),
GIT_DIFF_FLAG__HAS_SELF_SIMILARITY = (1 << 20),
};

#define GIT_DIFF_FLAG__CLEAR_INTERNAL(F) (F) = ((F) & 0x00FFFF)

#define GIT_DIFF__VERBOSE (1 << 30)

struct git_diff_list {
git_refcount rc;
git_repository *repo;
Expand Down

0 comments on commit 30caf0c

Please sign in to comment.