From d77a12aee8786f3b59658556c8997aec2a103ae7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 3 Oct 2025 09:23:34 +0200 Subject: [PATCH] chore(model gallery): add ibm-granite_granite-4.0-h-small Signed-off-by: Ettore Di Giacinto --- gallery/granite4.yaml | 48 +++++++++++++++++++++++++++++++++++++++++++ gallery/index.yaml | 22 ++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 gallery/granite4.yaml diff --git a/gallery/granite4.yaml b/gallery/granite4.yaml new file mode 100644 index 000000000000..65a870cf3323 --- /dev/null +++ b/gallery/granite4.yaml @@ -0,0 +1,48 @@ +--- +name: "granite-3.2" + +config_file: | + backend: "llama-cpp" + mmap: true + template: + chat_message: | + <|start_of_role|>{{ .RoleName }}<|end_of_role|> + {{ if .FunctionCall -}} + + {{ else if eq .RoleName "tool" -}} + + {{ end -}} + {{ if .Content -}} + {{.Content }} + {{ end -}} + {{ if eq .RoleName "tool" -}} + + {{ end -}} + {{ if .FunctionCall -}} + {{toJson .FunctionCall}} + + {{ end -}} + <|end_of_text|> + function: | + <|start_of_role|>system<|end_of_role|> + You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user's query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request. + + Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data. + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + For each function call return a json object with function name and arguments + {{.Input -}} + <|start_of_role|>assistant<|end_of_role|> + chat: | + {{.Input -}} + <|start_of_role|>assistant<|end_of_role|> + completion: | + {{.Input}} + context_size: 8192 + f16: true + stopwords: + - '<|im_end|>' + - '' + - '' + - '<|end_of_text|>' diff --git a/gallery/index.yaml b/gallery/index.yaml index 24d506f62153..8bd9dca96dc9 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1,4 +1,26 @@ --- +- &granite4 + url: "github:mudler/LocalAI/gallery/granite4.yaml@master" + name: "ibm-granite_granite-4.0-h-small" + license: apache-2.0 + icon: https://cdn-avatars.huggingface.co/v1/production/uploads/639bcaa2445b133a4e942436/CEW-OjXkRkDNmTxSu8Egh.png + tags: + - gguf + - GPU + - CPU + - text-to-text + urls: + - https://huggingface.co/ibm-granite/granite-4.0-h-small + - https://huggingface.co/bartowski/ibm-granite_granite-4.0-h-small-GGUF + description: | + Granite-4.0-H-Small is a 32B parameter long-context instruct model finetuned from Granite-4.0-H-Small-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging. Granite 4.0 instruct models feature improved instruction following (IF) and tool-calling capabilities, making them more effective in enterprise applications. + overrides: + parameters: + model: ibm-granite_granite-4.0-h-small-Q4_K_M.gguf + files: + - filename: ibm-granite_granite-4.0-h-small-Q4_K_M.gguf + sha256: c59ce76239bd5794acdbdf88616dfc296247f4e78792a9678d4b3e24966ead69 + uri: huggingface://bartowski/ibm-granite_granite-4.0-h-small-GGUF/ibm-granite_granite-4.0-h-small-Q4_K_M.gguf - &ernie url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "baidu_ernie-4.5-21b-a3b-thinking"