Skip to content

Commit

Permalink
实现了fullChecksum补全
Browse files Browse the repository at this point in the history
  • Loading branch information
jqk committed Sep 21, 2023
1 parent 796b8ea commit f181009
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 5 deletions.
2 changes: 1 addition & 1 deletion file-diff-grouper.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"program": "${workspaceFolder}/file-diff-cli",
"env": {},
//"args": [],
"args": ["e:/temp/fdg/config-d-2020-整理.yaml"],
"args": ["e:/temp/fdg/config-m-data.yaml"],
//"args": [ "config.yaml" ],
"showLog": true
}
Expand Down
14 changes: 11 additions & 3 deletions file-diff/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func ScanDir(config *DirConfig, handler FileScanedFunc) (*ScanResult, error) {
return nil, err
}

sortAndFindDupFiles(result)
sortAndFindDupFiles(result, buffer, provider)

// 保存文件的耗时不计入工作耗时。
stopwatch.Stop()
Expand All @@ -125,15 +125,23 @@ func ScanDir(config *DirConfig, handler FileScanedFunc) (*ScanResult, error) {
return result, nil
}

func sortAndFindDupFiles(r *ScanResult) {
func sortAndFindDupFiles(r *ScanResult, buffer []byte, provider *fileutils.CommonFileChecksumProvider) {
for _, identities := range r.Files {
if len(identities) > 1 { // 多个文件具有相同的 headerChecksum 才需排序并查重。
sortFileIdentities(identities) // 1. 排序。

m := FileIdentities{} // 2. 准备查重。

for _, id := range identities {
// 使用文件长度加整体校验和作为 key。这是判断文件是否重复的标准。即使 fullChecksum 无值,也要加上。
if !id.HasFullChecksum && r.CompareFullChecksum {
// 如果没有 FullChecksum,并且要求计算之,则补全。
temp, _ := getFileIdentity(id.Filename, r.HeaderSize, buffer, true, provider)
id.HasFullChecksum = true
id.FullChecksum = temp.FullChecksum
r.FullChecksumCount++
}

// 使用文件长度加整体校验和作为 key。这是判断文件是否重复的标准。
key := strconv.FormatInt(id.FileSize, 10) + "_" + checksumToString(id.FullChecksum)
m[key] = append(m[key], id)
}
Expand Down
3 changes: 2 additions & 1 deletion file-diff/test-data/scan.test.config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ compareBase:
loadScanResult: false
# 不要求计算文件摘要。
needFullChecksum: false
compareFullChecksum: true

compareTarget:
dir: "test-data/origin/compare_target"
Expand All @@ -26,8 +27,8 @@ compareTarget:
loadScanResult: false
# 要求计算文件摘要。
needFullChecksum: true

compareFullChecksum: true

backupDir: "test-data/output"
moveMore: false
moveSame: false
Expand Down

0 comments on commit f181009

Please sign in to comment.