diff --git a/core/gallery/importers/importers_test.go b/core/gallery/importers/importers_test.go index 48fb6ac3d46a..34814fe66c0d 100644 --- a/core/gallery/importers/importers_test.go +++ b/core/gallery/importers/importers_test.go @@ -27,6 +27,48 @@ var _ = Describe("DiscoverModelConfig", func() { Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/resolve/main/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) Expect(modelConfig.Files[0].SHA256).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"), fmt.Sprintf("Model config: %+v", modelConfig)) }) + + It("should discover and import using LlamaCPPImporter", func() { + uri := "https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF" + preferences := json.RawMessage(`{}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err)) + Expect(modelConfig.Name).To(Equal("Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: mmproj/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(len(modelConfig.Files)).To(Equal(2), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].Filename).To(Equal("Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[1].Filename).To(Equal("mmproj/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[1].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[1].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig)) + }) + + It("should discover and import using LlamaCPPImporter", func() { + uri := "https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF" + preferences := json.RawMessage(`{ "quantizations": "Q8_0", "mmproj_quantizations": "f16" }`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err)) + Expect(modelConfig.Name).To(Equal("Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: mmproj/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(len(modelConfig.Files)).To(Equal(2), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].Filename).To(Equal("Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[1].Filename).To(Equal("mmproj/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[1].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[1].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig)) + }) }) Context("with .gguf URI", func() { diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go index 65bebf7d1966..f0e3b9e596f6 100644 --- a/core/gallery/importers/llama-cpp.go +++ b/core/gallery/importers/llama-cpp.go @@ -69,7 +69,7 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) } preferedQuantizations, _ := preferencesMap["quantizations"].(string) - quants := []string{"q4_k_m", "q4_0", "q8_0", "f16"} + quants := []string{"q4_k_m"} if preferedQuantizations != "" { quants = strings.Split(preferedQuantizations, ",") } @@ -100,7 +100,7 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) Description: description, } - if strings.Contains(details.URI, ".gguf") { + if strings.HasSuffix(details.URI, ".gguf") { cfg.Files = append(cfg.Files, gallery.File{ URI: details.URI, Filename: filepath.Base(details.URI), @@ -111,46 +111,62 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) }, } } else if details.HuggingFace != nil { - lastMMProjFile := gallery.File{} + // We want to: + // Get first the chosen quants that match filenames + // OR the first mmproj/gguf file found + var lastMMProjFile *gallery.File + var lastGGUFFile *gallery.File foundPreferedQuant := false + foundPreferedMMprojQuant := false for _, file := range details.HuggingFace.Files { - // get the files of the prefered quants - if slices.ContainsFunc(quants, func(quant string) bool { - return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant)) - }) { - cfg.Files = append(cfg.Files, gallery.File{ - URI: file.URL, - Filename: filepath.Base(file.Path), - SHA256: file.SHA256, - }) - } // Get the mmproj prefered quants if strings.Contains(strings.ToLower(file.Path), "mmproj") { - lastMMProjFile = gallery.File{ + lastMMProjFile = &gallery.File{ URI: file.URL, - Filename: filepath.Base(file.Path), + Filename: filepath.Join("mmproj", filepath.Base(file.Path)), SHA256: file.SHA256, } if slices.ContainsFunc(mmprojQuantsList, func(quant string) bool { return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant)) + }) { + cfg.Files = append(cfg.Files, *lastMMProjFile) + foundPreferedMMprojQuant = true + } + } else if strings.HasSuffix(strings.ToLower(file.Path), "gguf") { + lastGGUFFile = &gallery.File{ + URI: file.URL, + Filename: filepath.Base(file.Path), + SHA256: file.SHA256, + } + // get the files of the prefered quants + if slices.ContainsFunc(quants, func(quant string) bool { + return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant)) }) { foundPreferedQuant = true - cfg.Files = append(cfg.Files, lastMMProjFile) + cfg.Files = append(cfg.Files, *lastGGUFFile) } } } - if !foundPreferedQuant && lastMMProjFile.URI != "" { - cfg.Files = append(cfg.Files, lastMMProjFile) - modelConfig.PredictionOptions = schema.PredictionOptions{ - BasicModelRequest: schema.BasicModelRequest{ - Model: lastMMProjFile.Filename, - }, + // Make sure to add at least one file if not already present (which is the latest one) + if lastMMProjFile != nil && !foundPreferedMMprojQuant { + if !slices.ContainsFunc(cfg.Files, func(f gallery.File) bool { + return f.Filename == lastMMProjFile.Filename + }) { + cfg.Files = append(cfg.Files, *lastMMProjFile) + } + } + + if lastGGUFFile != nil && !foundPreferedQuant { + if !slices.ContainsFunc(cfg.Files, func(f gallery.File) bool { + return f.Filename == lastGGUFFile.Filename + }) { + cfg.Files = append(cfg.Files, *lastGGUFFile) } } - // Find first mmproj file + // Find first mmproj file and configure it in the config file for _, file := range cfg.Files { if !strings.Contains(strings.ToLower(file.Filename), "mmproj") { continue @@ -159,7 +175,7 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) break } - // Find first non-mmproj file + // Find first non-mmproj file and configure it in the config file for _, file := range cfg.Files { if strings.Contains(strings.ToLower(file.Filename), "mmproj") { continue