-
Notifications
You must be signed in to change notification settings - Fork 496
/
weights.go
154 lines (132 loc) · 3.7 KB
/
weights.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
package weights
import (
"os"
"path/filepath"
"sort"
"strings"
)
var prefixesToIgnore = []string{".cog", ".git", "__pycache__"}
var suffixesToIgnore = []string{
".py", ".ipynb", ".whl", // Python projects
".jpg", ".jpeg", ".png", ".webp", ".svg", ".gif", ".avif", ".heic", // images
".mp4", ".mov", ".avi", ".wmv", ".mkv", ".webm", // videos
".mp3", ".wav", ".ogg", ".flac", ".aac", ".m4a", // audio files
".log", // logs
}
// FileWalker is a function type that walks the file tree rooted at root, calling walkFn for each file or directory in the tree, including root.
type FileWalker func(root string, walkFn filepath.WalkFunc) error
func FindWeights(fw FileWalker) ([]string, []string, error) {
var files []string
var codeFiles []string
err := fw(".", func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
if isCodeFile(path) {
codeFiles = append(codeFiles, path)
return nil
}
if info.Size() < sizeThreshold {
return nil
}
if isNonModelFiles(path) {
return nil
}
files = append(files, path)
return nil
})
if err != nil {
return nil, nil, err
}
// by sorting the files by levels, we can filter out directories that are prefixes of other directories
// e.g. /a/b/ is a prefix of /a/b/c/, so we can filter out /a/b/c/
sortFilesByLevels(files)
dirs, rootFiles := getDirsAndRootfiles(files)
dirs = filterDirsContainingCode(dirs, codeFiles)
return dirs, rootFiles, nil
}
func isNonModelFiles(path string) bool {
for _, prefix := range prefixesToIgnore {
if strings.HasPrefix(path, prefix) {
return true
}
}
for _, suffix := range suffixesToIgnore {
if strings.HasSuffix(path, suffix) {
return true
}
}
return false
}
const sizeThreshold = 10 * 1024 * 1024 // 10MB
func sortFilesByLevels(files []string) {
sort.Slice(files, func(i, j int) bool {
list1 := strings.Split(files[i], "/")
list2 := strings.Split(files[j], "/")
if len(list1) != len(list2) {
return len(list1) < len(list2)
}
for k := range list1 {
if list1[k] != list2[k] {
return list1[k] < list2[k]
}
}
return false
})
}
// isCodeFile detects if a given path is a code file based on whether the file path ends with ".py" or ".ipynb"
func isCodeFile(path string) bool {
ext := filepath.Ext(path)
return ext == ".py" || ext == ".ipynb"
}
// filterDirsContainingCode filters out directories that contain code files.
// If a dir is a prefix for any given codeFiles, it will be filtered out.
func filterDirsContainingCode(dirs []string, codeFiles []string) []string {
filteredDirs := make([]string, 0, len(dirs))
// Filter out directories that are prefixes of code directories
for _, dir := range dirs {
isPrefix := false
for _, codeFile := range codeFiles {
if strings.HasPrefix(codeFile, dir) {
isPrefix = true
break
}
}
if !isPrefix {
filteredDirs = append(filteredDirs, dir)
}
}
return filteredDirs
}
func getDirsAndRootfiles(files []string) ([]string, []string) {
// get all the directories that contain model weights files
// remove sub-directories if their parent directory is already in the list
var dirs []string
// for large model files in root directory, we should not add the "." to dirs
var rootFiles []string
for _, f := range files {
dir := filepath.Dir(f)
if dir == "." || dir == "/" {
rootFiles = append(rootFiles, f)
continue
}
if hasParent(dir, dirs) {
continue
}
dirs = append(dirs, dir)
}
return dirs, rootFiles
}
func hasParent(dir string, dirs []string) bool {
for _, d := range dirs {
parent := d + string(filepath.Separator)
child := dir + string(filepath.Separator)
if strings.HasPrefix(child, parent) {
return true
}
}
return false
}