Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 44 additions & 6 deletions pkg/huggingface-api/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,15 @@ func (c *Client) SetBaseURL(url string) {
c.baseURL = url
}

// ListFiles lists all files in a HuggingFace repository
func (c *Client) ListFiles(repoID string) ([]FileInfo, error) {
// listFilesInPath lists all files in a specific path of a HuggingFace repository (recursive helper)
func (c *Client) listFilesInPath(repoID, path string) ([]FileInfo, error) {
baseURL := strings.TrimSuffix(c.baseURL, "/api/models")
url := fmt.Sprintf("%s/api/models/%s/tree/main", baseURL, repoID)
var url string
if path == "" {
url = fmt.Sprintf("%s/api/models/%s/tree/main", baseURL, repoID)
} else {
url = fmt.Sprintf("%s/api/models/%s/tree/main/%s", baseURL, repoID, path)
}

req, err := http.NewRequest("GET", url, nil)
if err != nil {
Expand All @@ -173,12 +178,45 @@ func (c *Client) ListFiles(repoID string) ([]FileInfo, error) {
return nil, fmt.Errorf("failed to read response body: %w", err)
}

var files []FileInfo
if err := json.Unmarshal(body, &files); err != nil {
var items []FileInfo
if err := json.Unmarshal(body, &items); err != nil {
return nil, fmt.Errorf("failed to parse JSON response: %w", err)
}

return files, nil
var allFiles []FileInfo
for _, item := range items {
switch item.Type {
// If it's a directory/folder, recursively list its contents
case "directory", "folder":
// Build the subfolder path
subPath := item.Path
if path != "" {
subPath = fmt.Sprintf("%s/%s", path, item.Path)
}

// Recursively get files from subfolder
// The recursive call will already prepend the subPath to each file's path
subFiles, err := c.listFilesInPath(repoID, subPath)
if err != nil {
return nil, fmt.Errorf("failed to list files in subfolder %s: %w", subPath, err)
}

allFiles = append(allFiles, subFiles...)
case "file":
// It's a file, prepend the current path to make it relative to root
// if path != "" {
// item.Path = fmt.Sprintf("%s/%s", path, item.Path)
// }
allFiles = append(allFiles, item)
}
}

return allFiles, nil
}

// ListFiles lists all files in a HuggingFace repository, including files in subfolders
func (c *Client) ListFiles(repoID string) ([]FileInfo, error) {
return c.listFilesInPath(repoID, "")
}

// GetFileSHA gets the SHA256 checksum for a specific file by searching through the file list
Expand Down
213 changes: 213 additions & 0 deletions pkg/huggingface-api/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,137 @@ var _ = Describe("HuggingFace API Client", func() {
})
})

Context("when listing files with subfolders", func() {
BeforeEach(func() {
// Mock response for root directory with files and a subfolder
mockRootResponse := `[
{
"type": "file",
"path": "README.md",
"size": 5000,
"oid": "readme123"
},
{
"type": "directory",
"path": "subfolder",
"size": 0,
"oid": "dir123"
},
{
"type": "file",
"path": "config.json",
"size": 1000,
"oid": "config123"
}
]`

// Mock response for subfolder directory
mockSubfolderResponse := `[
{
"type": "file",
"path": "subfolder/file.bin",
"size": 2000000,
"oid": "filebin123",
"lfs": {
"oid": "filebin456",
"size": 2000000,
"pointerSize": 135
}
},
{
"type": "directory",
"path": "nested",
"size": 0,
"oid": "nesteddir123"
}
]`

// Mock response for nested subfolder
mockNestedResponse := `[
{
"type": "file",
"path": "subfolder/nested/nested_file.gguf",
"size": 5000000,
"oid": "nested123",
"lfs": {
"oid": "nested456",
"size": 5000000,
"pointerSize": 135
}
}
]`

server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
urlPath := r.URL.Path
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)

if strings.Contains(urlPath, "/tree/main/subfolder/nested") {
w.Write([]byte(mockNestedResponse))
} else if strings.Contains(urlPath, "/tree/main/subfolder") {
w.Write([]byte(mockSubfolderResponse))
} else if strings.Contains(urlPath, "/tree/main") {
w.Write([]byte(mockRootResponse))
} else {
w.WriteHeader(http.StatusNotFound)
}
}))

client.SetBaseURL(server.URL)
})

It("should recursively list all files including those in subfolders", func() {
files, err := client.ListFiles("test/model")

Expect(err).ToNot(HaveOccurred())
Expect(files).To(HaveLen(4))

// Verify root level files
readmeFile := findFileByPath(files, "README.md")
Expect(readmeFile).ToNot(BeNil())
Expect(readmeFile.Size).To(Equal(int64(5000)))
Expect(readmeFile.Oid).To(Equal("readme123"))

configFile := findFileByPath(files, "config.json")
Expect(configFile).ToNot(BeNil())
Expect(configFile.Size).To(Equal(int64(1000)))
Expect(configFile.Oid).To(Equal("config123"))

// Verify subfolder file with relative path
subfolderFile := findFileByPath(files, "subfolder/file.bin")
Expect(subfolderFile).ToNot(BeNil())
Expect(subfolderFile.Size).To(Equal(int64(2000000)))
Expect(subfolderFile.LFS).ToNot(BeNil())
Expect(subfolderFile.LFS.Oid).To(Equal("filebin456"))

// Verify nested subfolder file
nestedFile := findFileByPath(files, "subfolder/nested/nested_file.gguf")
Expect(nestedFile).ToNot(BeNil())
Expect(nestedFile.Size).To(Equal(int64(5000000)))
Expect(nestedFile.LFS).ToNot(BeNil())
Expect(nestedFile.LFS.Oid).To(Equal("nested456"))
})

It("should handle files with correct relative paths", func() {
files, err := client.ListFiles("test/model")

Expect(err).ToNot(HaveOccurred())

// Check that all paths are relative and correct
paths := make([]string, len(files))
for i, file := range files {
paths[i] = file.Path
}

Expect(paths).To(ContainElements(
"README.md",
"config.json",
"subfolder/file.bin",
"subfolder/nested/nested_file.gguf",
))
})
})

Context("when getting file SHA", func() {
BeforeEach(func() {
mockFilesResponse := `[
Expand Down Expand Up @@ -405,6 +536,7 @@ var _ = Describe("HuggingFace API Client", func() {
BeforeEach(func() {
mockFilesResponse := `[
{
"type": "file",
"path": "model-Q4_K_M.gguf",
"size": 1000000,
"oid": "abc123",
Expand All @@ -416,6 +548,7 @@ var _ = Describe("HuggingFace API Client", func() {
}
},
{
"type": "file",
"path": "README.md",
"size": 5000,
"oid": "readme123"
Expand Down Expand Up @@ -538,4 +671,84 @@ var _ = Describe("HuggingFace API Client", func() {
Expect(preferred).To(BeNil())
})
})

Context("integration test with real HuggingFace API", func() {
It("should recursively list all files including subfolders from real repository", func() {
// This test makes actual API calls to HuggingFace
// Skip if running in CI or if network is not available
realClient := hfapi.NewClient()
repoID := "bartowski/Qwen_Qwen3-Next-80B-A3B-Instruct-GGUF"

files, err := realClient.ListFiles(repoID)

Expect(err).ToNot(HaveOccurred())
Expect(files).ToNot(BeEmpty(), "should return at least some files")

// Verify that we get files from subfolders
// Based on the repository structure, there should be files in subfolders like:
// - Qwen_Qwen3-Next-80B-A3B-Instruct-Q4_1/...
// - Qwen_Qwen3-Next-80B-A3B-Instruct-Q5_K_L/...
// etc.
hasSubfolderFiles := false
rootLevelFiles := 0
subfolderFiles := 0

for _, file := range files {
if strings.Contains(file.Path, "/") {
hasSubfolderFiles = true
subfolderFiles++
// Verify the path format is correct (subfolder/file.gguf)
Expect(file.Path).ToNot(HavePrefix("/"), "paths should be relative, not absolute")
Expect(file.Path).ToNot(HaveSuffix("/"), "file paths should not end with /")
} else {
rootLevelFiles++
}
}

Expect(hasSubfolderFiles).To(BeTrue(), "should find files in subfolders")
Expect(rootLevelFiles).To(BeNumerically(">", 0), "should find files at root level")
Expect(subfolderFiles).To(BeNumerically(">", 0), "should find files in subfolders")
// Verify specific expected files exist
// Root level files
readmeFile := findFileByPath(files, "README.md")
Expect(readmeFile).ToNot(BeNil(), "README.md should exist at root level")

// Verify we can find files in subfolders
// Look for any file in a subfolder (the exact structure may vary, can be nested)
foundSubfolderFile := false
for _, file := range files {
if strings.Contains(file.Path, "/") && strings.HasSuffix(file.Path, ".gguf") {
foundSubfolderFile = true
// Verify the path structure: can be nested like subfolder/subfolder/file.gguf
parts := strings.Split(file.Path, "/")
Expect(len(parts)).To(BeNumerically(">=", 2), "subfolder files should have at least subfolder/file.gguf format")
// The last part should be the filename
Expect(parts[len(parts)-1]).To(HaveSuffix(".gguf"), "file in subfolder should be a .gguf file")
Expect(parts[len(parts)-1]).ToNot(BeEmpty(), "filename should not be empty")
break
}
}
Expect(foundSubfolderFile).To(BeTrue(), "should find at least one .gguf file in a subfolder")

// Verify file properties are populated
for _, file := range files {
Expect(file.Path).ToNot(BeEmpty(), "file path should not be empty")
Expect(file.Type).To(Equal("file"), "all returned items should be files, not directories")
// Size might be 0 for some files, but OID should be present
if file.LFS == nil {
Expect(file.Oid).ToNot(BeEmpty(), "file should have an OID if no LFS")
}
}
})
})
})

// findFileByPath is a helper function to find a file by its path in a slice of FileInfo
func findFileByPath(files []hfapi.FileInfo, path string) *hfapi.FileInfo {
for i := range files {
if files[i].Path == path {
return &files[i]
}
}
return nil
}
Loading