From bfea99ef8f35b489b91a9a8b9b1c80c8b689d98c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 19 Nov 2025 19:19:37 +0100
Subject: [PATCH 1/5] docs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .gitmodules                                   |   3 -
 docs/config.toml                              | 208 ------------------
 docs/content/_index.md                        |   5 +
 docs/content/{docs => }/advanced/_index.en.md |   1 +
 .../{docs => }/advanced/advanced-usage.md     |  19 +-
 .../{docs => }/advanced/fine-tuning.md        |  15 +-
 docs/content/{docs => }/advanced/installer.md |   8 +-
 .../advanced/model-configuration.md           |   6 +-
 .../{docs => }/advanced/vram-management.md    |  22 +-
 docs/content/{docs => }/faq.md                |   4 +-
 .../{docs => }/features/GPU-acceleration.md   |  14 +-
 docs/content/{docs => }/features/_index.en.md |   4 +-
 .../{docs => }/features/audio-to-text.md      |   0
 docs/content/{docs => }/features/backends.md  |   2 -
 .../features/constrained_grammars.md          |   4 +-
 .../features/distributed_inferencing.md       |  20 +-
 .../content/{docs => }/features/embeddings.md |   6 +-
 .../content/{docs => }/features/gpt-vision.md |   0
 .../{docs => }/features/image-generation.md   |   8 +-
 docs/content/{docs => }/features/mcp.md       |   3 -
 .../{docs => }/features/model-gallery.md      |  21 +-
 .../{docs => }/features/object-detection.md   |  12 +-
 .../{docs => }/features/openai-functions.md   |   2 -
 docs/content/{docs => }/features/reranker.md  |   4 -
 docs/content/{docs => }/features/stores.md    |   0
 .../{docs => }/features/text-generation.md    |  36 +--
 .../{docs => }/features/text-to-audio.md      |   0
 .../{docs => }/getting-started/_index.en.md   |   3 +-
 .../{docs => }/getting-started/build.md       |  15 --
 .../getting-started/container-images.md       |  26 +--
 .../getting-started/customize-model.md        |  12 +-
 .../{docs => }/getting-started/kubernetes.md  |   6 -
 .../{docs => }/getting-started/models.md      |  69 ++----
 .../{docs => }/getting-started/quickstart.md  |  69 ++----
 .../{docs => }/getting-started/try-it-out.md  |  14 +-
 docs/content/installation/_index.en.md        |  24 ++
 docs/content/installation/build.md            | 185 ++++++++++++++++
 docs/content/installation/kubernetes.md       |  31 +++
 docs/content/installation/linux.md            |  65 ++++++
 docs/content/installation/macos.md            |  40 ++++
 docs/content/{docs => }/integrations.md       |   0
 docs/content/{docs => }/overview.md           |   1 -
 .../content/{docs => }/reference/_index.en.md |   1 +
 .../{docs => }/reference/architecture.md      |   2 +-
 docs/content/{docs => }/reference/binaries.md |   4 +-
 .../{docs => }/reference/cli-reference.md     |  29 +--
 .../reference/compatibility-table.md          |  18 +-
 .../{docs => }/reference/nvidia-l4t.md        |   0
 docs/content/{docs => }/whats-new.md          |  52 ++---
 docs/go.mod                                   |   4 +-
 docs/go.sum                                   |   2 +
 docs/hugo.toml                                | 104 +++++++++
 docs/themes/hugo-theme-relearn                |   2 +-
 docs/themes/lotusdocs                         |   1 -
 54 files changed, 619 insertions(+), 587 deletions(-)
 delete mode 100644 docs/config.toml
 create mode 100644 docs/content/_index.md
 rename docs/content/{docs => }/advanced/_index.en.md (92%)
 rename docs/content/{docs => }/advanced/advanced-usage.md (96%)
 rename docs/content/{docs => }/advanced/fine-tuning.md (90%)
 rename docs/content/{docs => }/advanced/installer.md (88%)
 rename docs/content/{docs => }/advanced/model-configuration.md (98%)
 rename docs/content/{docs => }/advanced/vram-management.md (84%)
 rename docs/content/{docs => }/faq.md (95%)
 rename docs/content/{docs => }/features/GPU-acceleration.md (97%)
 rename docs/content/{docs => }/features/_index.en.md (67%)
 rename docs/content/{docs => }/features/audio-to-text.md (100%)
 rename docs/content/{docs => }/features/backends.md (99%)
 rename docs/content/{docs => }/features/constrained_grammars.md (98%)
 rename docs/content/{docs => }/features/distributed_inferencing.md (78%)
 rename docs/content/{docs => }/features/embeddings.md (97%)
 rename docs/content/{docs => }/features/gpt-vision.md (100%)
 rename docs/content/{docs => }/features/image-generation.md (98%)
 rename docs/content/{docs => }/features/mcp.md (99%)
 rename docs/content/{docs => }/features/model-gallery.md (97%)
 rename docs/content/{docs => }/features/object-detection.md (90%)
 rename docs/content/{docs => }/features/openai-functions.md (99%)
 rename docs/content/{docs => }/features/reranker.md (95%)
 rename docs/content/{docs => }/features/stores.md (100%)
 rename docs/content/{docs => }/features/text-generation.md (90%)
 rename docs/content/{docs => }/features/text-to-audio.md (100%)
 rename docs/content/{docs => }/getting-started/_index.en.md (73%)
 rename docs/content/{docs => }/getting-started/build.md (91%)
 rename docs/content/{docs => }/getting-started/container-images.md (94%)
 rename docs/content/{docs => }/getting-started/customize-model.md (84%)
 rename docs/content/{docs => }/getting-started/kubernetes.md (84%)
 rename docs/content/{docs => }/getting-started/models.md (70%)
 rename docs/content/{docs => }/getting-started/quickstart.md (74%)
 rename docs/content/{docs => }/getting-started/try-it-out.md (94%)
 create mode 100644 docs/content/installation/_index.en.md
 create mode 100644 docs/content/installation/build.md
 create mode 100644 docs/content/installation/kubernetes.md
 create mode 100644 docs/content/installation/linux.md
 create mode 100644 docs/content/installation/macos.md
 rename docs/content/{docs => }/integrations.md (100%)
 rename docs/content/{docs => }/overview.md (99%)
 rename docs/content/{docs => }/reference/_index.en.md (92%)
 rename docs/content/{docs => }/reference/architecture.md (96%)
 rename docs/content/{docs => }/reference/binaries.md (96%)
 rename docs/content/{docs => }/reference/cli-reference.md (91%)
 rename docs/content/{docs => }/reference/compatibility-table.md (92%)
 rename docs/content/{docs => }/reference/nvidia-l4t.md (100%)
 rename docs/content/{docs => }/whats-new.md (88%)
 create mode 100644 docs/hugo.toml
 mode change 100644 => 160000 docs/themes/hugo-theme-relearn
 delete mode 160000 docs/themes/lotusdocs

diff --git a/.gitmodules b/.gitmodules
index 2478b3aee084..c263dbe06f80 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,3 @@
 [submodule "docs/themes/hugo-theme-relearn"]
 	path = docs/themes/hugo-theme-relearn
 	url = https://github.com/McShelby/hugo-theme-relearn.git
-[submodule "docs/themes/lotusdocs"]
-	path = docs/themes/lotusdocs
-	url = https://github.com/colinwilson/lotusdocs
diff --git a/docs/config.toml b/docs/config.toml
deleted file mode 100644
index 660f069235ba..000000000000
--- a/docs/config.toml
+++ /dev/null
@@ -1,208 +0,0 @@
-baseURL = "https://localai.io/"
-languageCode = "en-GB"
-contentDir = "content"
-enableEmoji = true
-enableGitInfo = true # N.B. .GitInfo does not currently function with git submodule content directories
-
-defaultContentLanguage = 'en'
-
-
-[markup]
-  defaultMarkdownHandler = "goldmark"
-  [markup.tableOfContents]
-      endLevel = 3
-      startLevel = 1
-  [markup.goldmark]
-    [markup.goldmark.renderer]
-      unsafe = true # https://jdhao.github.io/2019/12/29/hugo_html_not_shown/
-  # [markup.highlight]
-  #   codeFences = false # disables Hugo's default syntax highlighting
-  # [markup.goldmark.parser]
-  #   [markup.goldmark.parser.attribute]
-  #     block = true
-  #     title = true
-
-
-
-[params]
-
-  google_fonts = [
-    ["Inter", "300, 400, 600, 700"],
-    ["Fira Code", "500, 700"]
-  ]
-
-  sans_serif_font = "Inter"     # Default is System font
-  secondary_font  = "Inter"     # Default is System font
-  mono_font       = "Fira Code" # Default is System font
-
-    [params.footer]
-        copyright = "© 2023-2025 <a href='https://mudler.pm' target=_blank>Ettore Di Giacinto</a>"
-        version = true # includes git commit info
-
-    [params.social]
-        github = "mudler/LocalAI"        # YOUR_GITHUB_ID or YOUR_GITHUB_URL
-        twitter = "LocalAI_API"       # YOUR_TWITTER_ID
-        dicord = "uJAeKSAGDy"
-        # instagram = "colinwilson"     # YOUR_INSTAGRAM_ID
-        rss = true                    # show rss icon with link
-
-    [params.docs] # Parameters for the /docs 'template'
-
-        logo = "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/core/http/static/logo.png"
-        logo_text = ""
-        title           = "LocalAI"           # default html title for documentation pages/sections
-
-        pathName        = "docs"                            # path name for documentation site | default "docs"
-
-        # themeColor      = "cyan"                            # (optional) - Set theme accent colour. Options include: blue (default), green, red, yellow, emerald, cardinal, magenta, cyan
-
-        darkMode        = true                                # enable dark mode option? default false
-
-        prism           = true                                # enable syntax highlighting via Prism
-
-        prismTheme      = "solarized-light"                           # (optional) - Set theme for PrismJS. Options include: lotusdocs (default), solarized-light, twilight, lucario
-
-        # gitinfo
-        repoURL         = "https://github.com/mudler/LocalAI"  # Git repository URL for your site [support for GitHub, GitLab, and BitBucket]
-        repoBranch      = "master"
-        editPage        = true                                # enable 'Edit this page' feature - default false
-        lastMod         = true                                # enable 'Last modified' date on pages - default false
-        lastModRelative = true                                # format 'Last modified' time as relative - default true
-
-        sidebarIcons    = true                                # enable sidebar icons? default false
-        breadcrumbs     = true                                # default is true
-        backToTop       = true                                # enable back-to-top button? default true
-
-        # ToC
-        toc             = true                                # enable table of contents? default is true
-        tocMobile       = true                                # enable table of contents in mobile view? default is true
-        scrollSpy       = true                                # enable scrollspy on ToC? default is true
-
-        # front matter
-        descriptions    = true                                # enable front matter descriptions under content title?
-        titleIcon       = true                                # enable front matter icon title prefix? default is false
-
-        # content navigation
-        navDesc         = true                                # include front matter descriptions in Prev/Next navigation cards
-        navDescTrunc    = 30                                  # Number of characters by which to truncate the Prev/Next descriptions
-
-        listDescTrunc   = 100                                 # Number of characters by which to truncate the list card description
-
-        # Link behaviour
-        intLinkTooltip  = true                                # Enable a tooltip for internal links that displays info about the destination? default false
-        # extLinkNewTab   = false                             # Open external links in a new Tab? default true
-        # logoLinkURL = ""                                    # Set a custom URL destination for the top header logo link.
-
-    [params.flexsearch] # Parameters for FlexSearch
-        enabled             = true
-        # tokenize            = "full"
-        # optimize            = true
-        # cache               = 100
-        # minQueryChar        = 3 # default is 0 (disabled)
-        # maxResult           = 5 # default is 5
-        # searchSectionsIndex = []
-
-    [params.docsearch] # Parameters for DocSearch
-        # appID     = "" # Algolia Application ID
-        # apiKey    = "" # Algolia Search-Only API (Public) Key
-        # indexName = "" # Index Name to perform search on (or set env variable HUGO_PARAM_DOCSEARCH_indexName)
-
-    [params.analytics] # Parameters for Analytics (Google, Plausible)
-        # google = "G-XXXXXXXXXX" # Replace with your Google Analytics ID
-        # plausibleURL    = "/docs/s" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleURL)
-        # plausibleAPI    = "/docs/s" # optional - (or set via env variable HUGO_PARAM_ANALYTICS_plausibleAPI)
-        # plausibleDomain = ""      # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleDomain)
-
-    # [params.feedback]
-    #     enabled = true
-    #     emoticonTpl = true
-    #     eventDest = ["plausible","google"]
-    #     emoticonEventName = "Feedback"
-    #     positiveEventName = "Positive Feedback"
-    #     negativeEventName = "Negative Feedback"
-    #     positiveFormTitle = "What did you like?"
-    #     negativeFormTitle = "What went wrong?"
-    #     successMsg = "Thank you for helping to improve Lotus Docs' documentation!"
-    #     errorMsg = "Sorry! There was an error while attempting to submit your feedback!"
-    #     positiveForm = [
-    #       ["Accurate", "Accurately describes the feature or option."],
-    #       ["Solved my problem", "Helped me resolve an issue."],
-    #       ["Easy to understand", "Easy to follow and comprehend."],
-    #       ["Something else"]
-    #     ]
-    #     negativeForm = [
-    #       ["Inaccurate", "Doesn't accurately describe the feature or option."],
-    #       ["Couldn't find what I was looking for", "Missing important information."],
-    #       ["Hard to understand", "Too complicated or unclear."],
-    #       ["Code sample errors", "One or more code samples are incorrect."],
-    #       ["Something else"]
-    #     ]
-
-[menu]
- [[menu.primary]]
-    name  = "Docs"
-    url = "docs/"
-    identifier = "docs"
-    weight = 10
-[[menu.primary]]
-    name = "Discord"
-    url = "https://discord.gg/uJAeKSAGDy"
-    identifier = "discord"
-    weight = 20
-
-[languages]
-  [languages.en]
-    title = "LocalAI"
-    languageName = "English"
-    weight = 10
-#  [languages.fr]
-#    title = "LocalAI documentation"
-#    languageName = "Français"
-#    contentDir = "content/fr"
-#    weight = 20
-#  [languages.de]
-#    title = "LocalAI documentation"
-#    languageName = "Deutsch"
-#    contentDir = "content/de"
-#    weight = 30
-
-
-
-
-
-# mounts are only needed in this showcase to access the publicly available screenshots;
-# remove this section if you don't need further mounts
-[module]
-  replacements = "github.com/colinwilson/lotusdocs -> lotusdocs"
-  [[module.mounts]]
-    source = 'archetypes'
-    target = 'archetypes'
-  [[module.mounts]]
-    source = 'assets'
-    target = 'assets'
-  [[module.mounts]]
-    source = 'content'
-    target = 'content'
-  [[module.mounts]]
-    source = 'data'
-    target = 'data'
-  [[module.mounts]]
-    source = 'i18n'
-    target = 'i18n'
-  [[module.mounts]]
-    source = '../images'
-    target = 'static/images'
-  [[module.mounts]]
-    source = 'layouts'
-    target = 'layouts'
-  [[module.mounts]]
-    source = 'static'
-    target = 'static'
-    # uncomment line below for temporary local development of module
-    # or when using a 'theme' as a git submodule
-  [[module.imports]]
-    path = "github.com/colinwilson/lotusdocs"
-    disable = false
-  [[module.imports]]
-    path = "github.com/gohugoio/hugo-mod-bootstrap-scss/v5"
-    disable = false
diff --git a/docs/content/_index.md b/docs/content/_index.md
new file mode 100644
index 000000000000..2260afe8f043
--- /dev/null
+++ b/docs/content/_index.md
@@ -0,0 +1,5 @@
++++
+linktitle = 'Purple Pulpo'
+title = 'The Purple Pulpo'
+type = 'home'
++++
diff --git a/docs/content/docs/advanced/_index.en.md b/docs/content/advanced/_index.en.md
similarity index 92%
rename from docs/content/docs/advanced/_index.en.md
rename to docs/content/advanced/_index.en.md
index bee814b4d947..4804650c4f72 100644
--- a/docs/content/docs/advanced/_index.en.md
+++ b/docs/content/advanced/_index.en.md
@@ -2,6 +2,7 @@
 weight: 20
 title: "Advanced"
 description: "Advanced usage"
+type: chapter
 icon: settings
 lead: ""
 date: 2020-10-06T08:49:15+00:00
diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/advanced/advanced-usage.md
similarity index 96%
rename from docs/content/docs/advanced/advanced-usage.md
rename to docs/content/advanced/advanced-usage.md
index faefebba0605..7742eb29a874 100644
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/advanced/advanced-usage.md
@@ -27,7 +27,7 @@ template:
   chat: chat
 ```
 
-For a complete reference of all available configuration options, see the [Model Configuration]({{%relref "docs/advanced/model-configuration" %}}) page.
+For a complete reference of all available configuration options, see the [Model Configuration]({{%relref "advanced/model-configuration" %}}) page.
 
 **Configuration File Locations:**
 
@@ -108,7 +108,6 @@ Similarly it can be specified a path to a YAML configuration file containing a l
 ```yaml
 - url: https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml
   name: gpt4all-j
-# ...
 ```
 
 ### Automatic prompt caching
@@ -119,7 +118,6 @@ To enable prompt caching, you can control the settings in the model config YAML
 
 ```yaml
 
-# Enable prompt caching
 prompt_cache_path: "cache"
 prompt_cache_all: true
 
@@ -131,20 +129,18 @@ prompt_cache_all: true
 
 By default LocalAI will try to autoload the model by trying all the backends. This might work for most of models, but some of the backends are NOT configured to autoload.
 
-The available backends are listed in the [model compatibility table]({{%relref "docs/reference/compatibility-table" %}}).
+The available backends are listed in the [model compatibility table]({{%relref "reference/compatibility-table" %}}).
 
 In order to specify a backend for your models, create a model config file in your `models` directory specifying the backend:
 
 ```yaml
 name: gpt-3.5-turbo
 
-# Default model parameters
 parameters:
   # Relative to the models path
   model: ...
 
 backend: llama-stable
-# ...
 ```
 
 ### Connect external backends
@@ -183,7 +179,6 @@ make -C backend/python/vllm
 When LocalAI runs in a container,
 there are additional environment variables available that modify the behavior of LocalAI on startup:
 
-{{< table "table-responsive" >}}
 | Environment variable       | Default | Description                                                                                                |
 |----------------------------|---------|------------------------------------------------------------------------------------------------------------|
 | `REBUILD`                  | `false` | Rebuild LocalAI on startup                                                                                 |
@@ -193,20 +188,17 @@ there are additional environment variables available that modify the behavior of
 | `EXTRA_BACKENDS`          |         | A space separated list of backends to prepare. For example `EXTRA_BACKENDS="backend/python/diffusers backend/python/transformers"` prepares the python environment on start |
 | `DISABLE_AUTODETECT`       | `false` | Disable autodetect of CPU flagset on start                                                                     |
 | `LLAMACPP_GRPC_SERVERS`   |         | A list of llama.cpp workers to distribute the workload. For example `LLAMACPP_GRPC_SERVERS="address1:port,address2:port"` |
-{{< /table >}}
 
 Here is how to configure these variables:
 
 ```bash
-# Option 1: command line
 docker run --env REBUILD=true localai
-# Option 2: set within an env file
 docker run --env-file .env localai
 ```
 
 ### CLI Parameters
 
-For a complete reference of all CLI parameters, environment variables, and command-line options, see the [CLI Reference]({{%relref "docs/reference/cli-reference" %}}) page.
+For a complete reference of all CLI parameters, environment variables, and command-line options, see the [CLI Reference]({{%relref "reference/cli-reference" %}}) page.
 
 You can control LocalAI with command line arguments to specify a binding address, number of threads, model paths, and many other options. Any command line parameter can be specified via an environment variable.
 
@@ -282,20 +274,17 @@ A list of the environment variable that tweaks parallelism is the following:
 ### Python backends GRPC max workers
 ### Default number of workers for GRPC Python backends.
 ### This actually controls wether a backend can process multiple requests or not.
-# PYTHON_GRPC_MAX_WORKERS=1
 
 ### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
-# LLAMACPP_PARALLEL=1
 
 ### Enable to run parallel requests
-# LOCALAI_PARALLEL_REQUESTS=true
 ```
 
 Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the number of parallel processes your GPU/CPU can handle. For python-based backends (like vLLM) you can set `PYTHON_GRPC_MAX_WORKERS` to the number of parallel requests.
 
 ### VRAM and Memory Management
 
-For detailed information on managing VRAM when running multiple models, see the dedicated [VRAM and Memory Management]({{%relref "docs/advanced/vram-management" %}}) page.
+For detailed information on managing VRAM when running multiple models, see the dedicated [VRAM and Memory Management]({{%relref "advanced/vram-management" %}}) page.
 
 ### Disable CPU flagset auto detection in llama.cpp
 
diff --git a/docs/content/docs/advanced/fine-tuning.md b/docs/content/advanced/fine-tuning.md
similarity index 90%
rename from docs/content/docs/advanced/fine-tuning.md
rename to docs/content/advanced/fine-tuning.md
index 0310d42ba23f..f6c529bf3dc8 100644
--- a/docs/content/docs/advanced/fine-tuning.md
+++ b/docs/content/advanced/fine-tuning.md
@@ -5,9 +5,9 @@ title = "Fine-tuning LLMs for text generation"
 weight = 22
 +++
 
-{{% alert note %}}
+{{% notice note %}}
 Section under construction
-{{% /alert %}}
+ {{% /notice %}}
 
 This section covers how to fine-tune a language model for text generation and consume it in LocalAI.
 
@@ -74,12 +74,10 @@ Prepare a dataset, and upload it to your Google Drive in case you are using the
 ### Install dependencies
 
 ```bash
-# Install axolotl and dependencies
 git clone https://github.com/OpenAccess-AI-Collective/axolotl && pushd axolotl && git checkout 797f3dd1de8fd8c0eafbd1c9fdb172abd9ff840a && popd #0.3.0
 pip install packaging
 pushd axolotl && pip install -e '.[flash-attn,deepspeed]' && popd
 
-# https://github.com/oobabooga/text-generation-webui/issues/4238
 pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 ```
 
@@ -96,19 +94,16 @@ We will need to configure axolotl. In this example is provided a file to use `ax
 If you have a big dataset, you can pre-tokenize it to speedup the fine-tuning process:
 
 ```bash
-# Optional pre-tokenize (run only if big dataset)
 python -m axolotl.cli.preprocess axolotl.yaml
 ```
 
 Now we are ready to start the fine-tuning process:
 ```bash
-# Fine-tune
 accelerate launch -m axolotl.cli.train axolotl.yaml
 ```
 
 After we have finished the fine-tuning, we merge the Lora base with the model:
 ```bash
-# Merge lora
 python3 -m axolotl.cli.merge_lora axolotl.yaml --lora_model_dir="./qlora-out" --load_in_8bit=False --load_in_4bit=False
 ```
 
@@ -116,17 +111,11 @@ And we convert it to the gguf format that LocalAI can consume:
 
 ```bash
 
-# Convert to gguf
 git clone https://github.com/ggerganov/llama.cpp.git
 pushd llama.cpp && cmake -B build -DGGML_CUDA=ON && cmake --build build --config Release && popd
 
-# We need to convert the pytorch model into ggml for quantization
-# It crates 'ggml-model-f16.bin' in the 'merged' directory.
 pushd llama.cpp && python3 convert_hf_to_gguf.py ../qlora-out/merged && popd
 
-# Start off by making a basic q4_0 4-bit quantization.
-# It's important to have 'ggml' in the name of the quant for some
-# software to recognize it's file format.
 pushd llama.cpp/build/bin &&  ./llama-quantize ../../../qlora-out/merged/Merged-33B-F16.gguf \
     ../../../custom-model-q4_0.gguf q4_0
 
diff --git a/docs/content/docs/advanced/installer.md b/docs/content/advanced/installer.md
similarity index 88%
rename from docs/content/docs/advanced/installer.md
rename to docs/content/advanced/installer.md
index cd84c9f7daa8..9ac6f2d039f3 100644
--- a/docs/content/docs/advanced/installer.md
+++ b/docs/content/advanced/installer.md
@@ -29,10 +29,10 @@ List of the Environment Variables:
 | **THREADS**              | Number of processor threads the application should use. Defaults to the number of logical cores minus one. |
 | **VERSION**              | Specifies the version of LocalAI to install. Defaults to the latest available version. |
 | **MODELS_PATH**          | Directory path where LocalAI models are stored (default is /usr/share/local-ai/models). |
-| **P2P_TOKEN** | Token to use for the federation or for starting workers see [documentation]({{%relref "docs/features/distributed_inferencing" %}}) |
-| **WORKER** | Set to "true" to make the instance a worker (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}})) |
-| **FEDERATED** | Set to "true" to share the instance with the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}}))  |
-| **FEDERATED_SERVER** | Set to "true" to run the instance as a federation server which forwards requests to the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}}))  |
+| **P2P_TOKEN** | Token to use for the federation or for starting workers see [documentation]({{%relref "features/distributed_inferencing" %}}) |
+| **WORKER** | Set to "true" to make the instance a worker (p2p token is required see [documentation]({{%relref "features/distributed_inferencing" %}})) |
+| **FEDERATED** | Set to "true" to share the instance with the federation (p2p token is required see [documentation]({{%relref "features/distributed_inferencing" %}}))  |
+| **FEDERATED_SERVER** | Set to "true" to run the instance as a federation server which forwards requests to the federation (p2p token is required see [documentation]({{%relref "features/distributed_inferencing" %}}))  |
 
 ## Image Selection
 
diff --git a/docs/content/docs/advanced/model-configuration.md b/docs/content/advanced/model-configuration.md
similarity index 98%
rename from docs/content/docs/advanced/model-configuration.md
rename to docs/content/advanced/model-configuration.md
index 975c6fb1d091..b4d3b3e9ee6c 100644
--- a/docs/content/docs/advanced/model-configuration.md
+++ b/docs/content/advanced/model-configuration.md
@@ -498,7 +498,7 @@ feature_flags:
 
 ## Related Documentation
 
-- See [Advanced Usage]({{%relref "docs/advanced/advanced-usage" %}}) for other configuration options
-- See [Prompt Templates]({{%relref "docs/advanced/advanced-usage#prompt-templates" %}}) for template examples
-- See [CLI Reference]({{%relref "docs/reference/cli-reference" %}}) for command-line options
+- See [Advanced Usage]({{%relref "advanced/advanced-usage" %}}) for other configuration options
+- See [Prompt Templates]({{%relref "advanced/advanced-usage#prompt-templates" %}}) for template examples
+- See [CLI Reference]({{%relref "reference/cli-reference" %}}) for command-line options
 
diff --git a/docs/content/docs/advanced/vram-management.md b/docs/content/advanced/vram-management.md
similarity index 84%
rename from docs/content/docs/advanced/vram-management.md
rename to docs/content/advanced/vram-management.md
index 986b80c100c4..a9f846ef9f16 100644
--- a/docs/content/docs/advanced/vram-management.md
+++ b/docs/content/advanced/vram-management.md
@@ -23,10 +23,8 @@ The simplest approach is to ensure only one model is loaded at a time. When a ne
 ### Configuration
 
 ```bash
-# Via command line
 ./local-ai --single-active-backend
 
-# Via environment variable
 LOCALAI_SINGLE_ACTIVE_BACKEND=true ./local-ai
 ```
 
@@ -39,13 +37,10 @@ LOCALAI_SINGLE_ACTIVE_BACKEND=true ./local-ai
 ### Example
 
 ```bash
-# Start LocalAI with single active backend
 LOCALAI_SINGLE_ACTIVE_BACKEND=true ./local-ai
 
-# First request loads model A
 curl http://localhost:8080/v1/chat/completions -d '{"model": "model-a", ...}'
 
-# Second request automatically unloads model A and loads model B
 curl http://localhost:8080/v1/chat/completions -d '{"model": "model-b", ...}'
 ```
 
@@ -60,13 +55,10 @@ The idle watchdog monitors models that haven't been used for a specified period
 #### Configuration
 
 ```bash
-# Enable idle watchdog with default timeout (15 minutes)
 LOCALAI_WATCHDOG_IDLE=true ./local-ai
 
-# Customize the idle timeout (e.g., 10 minutes)
 LOCALAI_WATCHDOG_IDLE=true LOCALAI_WATCHDOG_IDLE_TIMEOUT=10m ./local-ai
 
-# Via command line
 ./local-ai --enable-watchdog-idle --watchdog-idle-timeout=10m
 ```
 
@@ -77,13 +69,10 @@ The busy watchdog monitors models that have been processing requests for an unus
 #### Configuration
 
 ```bash
-# Enable busy watchdog with default timeout (5 minutes)
 LOCALAI_WATCHDOG_BUSY=true ./local-ai
 
-# Customize the busy timeout (e.g., 10 minutes)
 LOCALAI_WATCHDOG_BUSY=true LOCALAI_WATCHDOG_BUSY_TIMEOUT=10m ./local-ai
 
-# Via command line
 ./local-ai --enable-watchdog-busy --watchdog-busy-timeout=10m
 ```
 
@@ -117,19 +106,15 @@ Or using command line flags:
 ### Example
 
 ```bash
-# Start LocalAI with both watchdogs enabled
 LOCALAI_WATCHDOG_IDLE=true \
 LOCALAI_WATCHDOG_IDLE_TIMEOUT=10m \
 LOCALAI_WATCHDOG_BUSY=true \
 LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m \
 ./local-ai
 
-# Load multiple models
 curl http://localhost:8080/v1/chat/completions -d '{"model": "model-a", ...}'
 curl http://localhost:8080/v1/chat/completions -d '{"model": "model-b", ...}'
 
-# After 10 minutes of inactivity, model-a will be automatically unloaded
-# If a model gets stuck processing for more than 5 minutes, it will be terminated
 ```
 
 ### Timeout Format
@@ -154,7 +139,6 @@ LocalAI cannot reliably estimate VRAM usage of new models to load across differe
 If automatic management doesn't meet your needs, you can manually stop models using the LocalAI management API:
 
 ```bash
-# Stop a specific model
 curl -X POST http://localhost:8080/backend/shutdown \
   -H "Content-Type: application/json" \
   -d '{"model": "model-name"}'
@@ -172,7 +156,7 @@ To stop all models, you'll need to call the endpoint for each loaded model indiv
 
 ## Related Documentation
 
-- See [Advanced Usage]({{%relref "docs/advanced/advanced-usage" %}}) for other configuration options
-- See [GPU Acceleration]({{%relref "docs/features/GPU-acceleration" %}}) for GPU setup and configuration
-- See [Backend Flags]({{%relref "docs/advanced/advanced-usage#backend-flags" %}}) for all available backend configuration options
+- See [Advanced Usage]({{%relref "advanced/advanced-usage" %}}) for other configuration options
+- See [GPU Acceleration]({{%relref "features/GPU-acceleration" %}}) for GPU setup and configuration
+- See [Backend Flags]({{%relref "advanced/advanced-usage#backend-flags" %}}) for all available backend configuration options
 
diff --git a/docs/content/docs/faq.md b/docs/content/faq.md
similarity index 95%
rename from docs/content/docs/faq.md
rename to docs/content/faq.md
index 475b0efeb6a3..afb6459e3b00 100644
--- a/docs/content/docs/faq.md
+++ b/docs/content/faq.md
@@ -46,7 +46,7 @@ Model sizes vary significantly depending on the model and quantization level:
 
 ### Benchmarking LocalAI and llama.cpp shows different results!
 
-LocalAI applies a set of defaults when loading models with the llama.cpp backend, one of these is mirostat sampling - while it achieves better results, it slows down the inference. You can disable this by setting `mirostat: 0` in the model config file. See also the advanced section ({{%relref "docs/advanced/advanced-usage" %}}) for more information and [this issue](https://github.com/mudler/LocalAI/issues/2780).
+LocalAI applies a set of defaults when loading models with the llama.cpp backend, one of these is mirostat sampling - while it achieves better results, it slows down the inference. You can disable this by setting `mirostat: 0` in the model config file. See also the advanced section ({{%relref "advanced/advanced-usage" %}}) for more information and [this issue](https://github.com/mudler/LocalAI/issues/2780).
 
 ### What's the difference with Serge, or XXX?
 
@@ -66,7 +66,7 @@ Yes! If the client uses OpenAI and supports setting a different base URL to send
 
 ### Can this leverage GPUs? 
 
-There is GPU support, see {{%relref "docs/features/GPU-acceleration" %}}.
+There is GPU support, see {{%relref "features/GPU-acceleration" %}}.
 
 ### Where is the webUI? 
 
diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/features/GPU-acceleration.md
similarity index 97%
rename from docs/content/docs/features/GPU-acceleration.md
rename to docs/content/features/GPU-acceleration.md
index 00bdfbdae9ee..d8bfd7d1897c 100644
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/features/GPU-acceleration.md
@@ -5,15 +5,15 @@ weight = 9
 url = "/features/gpu-acceleration/"
 +++
 
-{{% alert context="warning" %}}
+{{% notice context="warning" %}}
 Section under construction
-{{% /alert %}}
+ {{% /notice %}}
 
 This section contains instruction on how to use LocalAI with GPU acceleration.
 
-{{% alert icon="⚡" context="warning" %}}
-For acceleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
-{{% /alert %}}
+{{% notice icon="⚡" context="warning" %}}
+For acceleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "getting-started/build#Acceleration" %}})
+ {{% /notice %}}
 
 ## Automatic Backend Detection
 
@@ -32,7 +32,6 @@ Depending on the model architecture and backend used, there might be different w
 
 ```yaml
 name: my-model-name
-# Default model parameters
 parameters:
   # Relative to the models path
   model: llama.cpp-model.ggmlv3.q5_K_M.bin
@@ -124,7 +123,7 @@ llama_init_from_file: kv self size  =  512.00 MB
 
 There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation.
 
-Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatibility and package versions for dependencies across all variations of OS must be tested independently if desired, please refer to the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) documentation.
+Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatibility and package versions for dependencies across all variations of OS must be tested independently if desired, please refer to the [build]({{%relref "getting-started/build#Acceleration" %}}) documentation.
 
 ### Requirements
 
@@ -181,7 +180,6 @@ The devices in the following list have been tested with `hipblas` images running
 The following are examples of the ROCm specific configuration elements required.
 
 ```yaml
-# docker-compose.yaml
     # For full functionality select a non-'core' image, version locking the image is recommended for debug purposes.
     image: quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
     environment:
diff --git a/docs/content/docs/features/_index.en.md b/docs/content/features/_index.en.md
similarity index 67%
rename from docs/content/docs/features/_index.en.md
rename to docs/content/features/_index.en.md
index ddd106f5a858..3999d8df1953 100644
--- a/docs/content/docs/features/_index.en.md
+++ b/docs/content/features/_index.en.md
@@ -1,8 +1,8 @@
-
 +++
 disableToc = false
 title = "Features"
 weight = 8
-icon = "feature_search"
+icon = "lightbulb"
+type = "chapter"
 url = "/features/"
 +++
diff --git a/docs/content/docs/features/audio-to-text.md b/docs/content/features/audio-to-text.md
similarity index 100%
rename from docs/content/docs/features/audio-to-text.md
rename to docs/content/features/audio-to-text.md
diff --git a/docs/content/docs/features/backends.md b/docs/content/features/backends.md
similarity index 99%
rename from docs/content/docs/features/backends.md
rename to docs/content/features/backends.md
index 2c80b91a1080..9da6409935bc 100644
--- a/docs/content/docs/features/backends.md
+++ b/docs/content/features/backends.md
@@ -5,7 +5,6 @@ weight: 4
 url: "/backends/"
 ---
 
-# Backends
 
 LocalAI supports a variety of backends that can be used to run different types of AI models. There are core Backends which are included, and there are containerized applications that provide the runtime environment for specific model types, such as LLMs, diffusion models, or text-to-speech models.
 
@@ -53,7 +52,6 @@ Where URI is the path to an OCI container image.
 A backend gallery is a collection of YAML files, each defining a backend. Here's an example structure:
 
 ```yaml
-# backends/llm-backend.yaml
 name: "llm-backend"
 description: "A backend for running LLM models"
 uri: "quay.io/username/llm-backend:latest"
diff --git a/docs/content/docs/features/constrained_grammars.md b/docs/content/features/constrained_grammars.md
similarity index 98%
rename from docs/content/docs/features/constrained_grammars.md
rename to docs/content/features/constrained_grammars.md
index 5ffa3a231cb2..d91a22fead54 100644
--- a/docs/content/docs/features/constrained_grammars.md
+++ b/docs/content/features/constrained_grammars.md
@@ -9,9 +9,9 @@ url = "/features/constrained_grammars/"
 
 The `chat` endpoint supports the `grammar` parameter, which allows users to specify a grammar in Backus-Naur Form (BNF). This feature enables the Large Language Model (LLM) to generate outputs adhering to a user-defined schema, such as `JSON`, `YAML`, or any other format that can be defined using BNF. For more details about BNF, see [Backus-Naur Form on Wikipedia](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form).
 
-{{% alert note %}}
+{{% notice note %}}
 **Compatibility Notice:** This feature is only supported by models that use the [llama.cpp](https://github.com/ggerganov/llama.cpp) backend. For a complete list of compatible models, refer to the [Model Compatibility](docs/reference/compatibility-table) page. For technical details, see the related pull requests: [PR #1773](https://github.com/ggerganov/llama.cpp/pull/1773) and [PR #1887](https://github.com/ggerganov/llama.cpp/pull/1887).
-{{% /alert %}}
+ {{% /notice %}}
 
 ## Setup
 
diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/features/distributed_inferencing.md
similarity index 78%
rename from docs/content/docs/features/distributed_inferencing.md
rename to docs/content/features/distributed_inferencing.md
index a3837633154c..48f99477f53a 100644
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/features/distributed_inferencing.md
@@ -49,11 +49,11 @@ The instructions are displayed in the "Swarm" section of the WebUI, guiding you
 
 ### Workers mode
 
-{{% alert note %}}
+{{% notice note %}}
 This feature is available exclusively with llama-cpp compatible models.
 
 This feature was introduced in [LocalAI pull request #2324](https://github.com/mudler/LocalAI/pull/2324) and is based on the upstream work in [llama.cpp pull request #6829](https://github.com/ggerganov/llama.cpp/pull/6829).
-{{% /alert %}}
+ {{% /notice %}}
 
 To connect multiple workers to a single LocalAI instance, start first a server in p2p mode:
 
@@ -90,7 +90,6 @@ Use the WebUI to guide you in the process of starting new workers. This example
 
 ```bash
 ./local-ai run --p2p
-# Get the token in the Swarm section of the WebUI
 ```
 
 Copy the token from the WebUI or via API call (e.g., `curl http://localhost:8000/p2p/token`) and save it for later use.
@@ -101,19 +100,6 @@ To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKE
 
 ```bash
 TOKEN=XXX ./local-ai worker p2p-llama-cpp-rpc --llama-cpp-args="-m <memory>" 
-# 1:06AM INF loading environment variables from file envFile=.env
-# 1:06AM INF Setting logging to info
-# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:288","message":"connmanager disabled\n"}
-# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:295","message":" go-libp2p resource manager protection enabled"}
-# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:409","message":"max connections: 100\n"}
-# 1:06AM INF Starting llama-cpp-rpc-server on '127.0.0.1:34371'
-# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"node/node.go:118","message":" Starting EdgeVPN network"}
-# create_backend: using CPU backend
-# Starting RPC server on 127.0.0.1:34371, backend memory: 31913 MB
-# 2024/05/19 01:06:01 failed to sufficiently increase receive buffer size (was: 208 kiB, wanted: 2048 kiB, got: 416 kiB). # See https://github.com/quic-go/quic-go/wiki/UDP-Buffer-Sizes for details.
-# {"level":"INFO","time":"2024-05-19T01:06:01.805+0200","caller":"node/node.go:172","message":" Node ID: 12D3KooWJ7WQAbCWKfJgjw2oMMGGss9diw3Sov5hVWi8t4DMgx92"}
-# {"level":"INFO","time":"2024-05-19T01:06:01.806+0200","caller":"node/node.go:173","message":" Node Addresses: [/ip4/127.0.0.1/tcp/44931 /ip4/127.0.0.1/udp/33251/quic-v1/webtransport/certhash/uEiAWAhZ-W9yx2ZHnKQm3BE_ft5jjoc468z5-Rgr9XdfjeQ/certhash/uEiB8Uwn0M2TQBELaV2m4lqypIAY2S-2ZMf7lt_N5LS6ojw /ip4/127.0.0.1/udp/35660/quic-v1 /ip4/192.168.68.110/tcp/44931 /ip4/192.168.68.110/udp/33251/quic-v1/webtransport/certhash/uEiAWAhZ-W9yx2ZHnKQm3BE_ft5jjoc468z5-Rgr9XdfjeQ/certhash/uEiB8Uwn0M2TQBELaV2m4lqypIAY2S-2ZMf7lt_N5LS6ojw /ip4/192.168.68.110/udp/35660/quic-v1 /ip6/::1/tcp/41289 /ip6/::1/udp/33160/quic-v1/webtransport/certhash/uEiAWAhZ-W9yx2ZHnKQm3BE_ft5jjoc468z5-Rgr9XdfjeQ/certhash/uEiB8Uwn0M2TQBELaV2m4lqypIAY2S-2ZMf7lt_N5LS6ojw /ip6/::1/udp/35701/quic-v1]"}
-# {"level":"INFO","time":"2024-05-19T01:06:01.806+0200","caller":"discovery/dht.go:104","message":" Bootstrapping DHT"}
 ```
 
 (Note: You can also supply the token via command-line arguments)
@@ -129,7 +115,6 @@ The server logs should indicate that new workers are being discovered.
 
 There are options that can be tweaked or parameters that can be set using environment variables
 
-{{< table "table-responsive" >}}
 | Environment Variable | Description |
 |----------------------|-------------|
 | **LOCALAI_P2P** | Set to "true" to enable p2p |
@@ -143,7 +128,6 @@ There are options that can be tweaked or parameters that can be set using enviro
 | **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
 | **LOCALAI_P2P_LOGLEVEL** | Set the loglevel for the LocalAI p2p stack (default: info) |
 | **LOCALAI_P2P_LIB_LOGLEVEL** | Set the loglevel for the underlying libp2p stack (default: fatal) |
-{{< /table >}}
 
 
 ## Architecture
diff --git a/docs/content/docs/features/embeddings.md b/docs/content/features/embeddings.md
similarity index 97%
rename from docs/content/docs/features/embeddings.md
rename to docs/content/features/embeddings.md
index e6464634fd67..b00a45897eb9 100644
--- a/docs/content/docs/features/embeddings.md
+++ b/docs/content/features/embeddings.md
@@ -24,7 +24,6 @@ parameters:
   model: <model_file>
 backend: "<backend>"
 embeddings: true
-# .. other parameters
 ```
 
 ## Huggingface embeddings
@@ -41,7 +40,7 @@ parameters:
 
 The `sentencetransformers` backend uses Python [sentence-transformers](https://github.com/UKPLab/sentence-transformers). For a list of all pre-trained models available see here: https://github.com/UKPLab/sentence-transformers#pre-trained-models
 
-{{% alert note %}}
+{{% notice note %}}
 
 - The `sentencetransformers` backend is an optional backend of LocalAI and uses Python. If you are running `LocalAI` from the containers you are good to go and should be already configured for use.
 - For local execution, you also have to specify the extra backend in the `EXTERNAL_GRPC_BACKENDS` environment variable.
@@ -49,7 +48,7 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
 - The `sentencetransformers` backend does support only embeddings of text, and not of tokens. If you need to embed tokens you can use the `bert` backend or `llama.cpp`.
 - No models are required to be downloaded before using the `sentencetransformers` backend. The models will be downloaded automatically the first time the API is used.
 
-{{% /alert %}}
+ {{% /notice %}}
 
 ## Llama.cpp embeddings
 
@@ -61,7 +60,6 @@ backend: llama-cpp
 embeddings: true
 parameters:
   model: ggml-file.bin
-# ...
 ```
 
 Then you can use the API to generate embeddings:
diff --git a/docs/content/docs/features/gpt-vision.md b/docs/content/features/gpt-vision.md
similarity index 100%
rename from docs/content/docs/features/gpt-vision.md
rename to docs/content/features/gpt-vision.md
diff --git a/docs/content/docs/features/image-generation.md b/docs/content/features/image-generation.md
similarity index 98%
rename from docs/content/docs/features/image-generation.md
rename to docs/content/features/image-generation.md
index 71f1153fe1ff..c0340b61b1bc 100644
--- a/docs/content/docs/features/image-generation.md
+++ b/docs/content/features/image-generation.md
@@ -18,7 +18,6 @@ OpenAI docs: https://platform.openai.com/docs/api-reference/images/create
 To generate an image you can send a POST request to the `/v1/images/generations` endpoint with the instruction as the request body:
 
 ```bash
-# 512x512 is supported too
 curl http://localhost:8080/v1/images/generations -H "Content-Type: application/json" -d '{
   "prompt": "A cute baby sea otter",
   "size": "256x256"
@@ -92,7 +91,6 @@ parameters:
   model: Linaqruf/animagine-xl
 backend: diffusers
 
-# Force CPU usage - set to true for GPU
 f16: false
 diffusers:
   cuda: false # Enable for GPU usage (CUDA)
@@ -101,7 +99,7 @@ diffusers:
 
 #### Dependencies
 
-This is an extra backend - in the container is already available and there is nothing to do for the setup. Do not use *core* images (ending with `-core`). If you are building manually, see the [build instructions]({{%relref "docs/getting-started/build" %}}).
+This is an extra backend - in the container is already available and there is nothing to do for the setup. Do not use *core* images (ending with `-core`). If you are building manually, see the [build instructions]({{%relref "getting-started/build" %}}).
 
 #### Model setup
 
@@ -205,7 +203,6 @@ Additional arbitrarly parameters can be specified in the option field in key/val
 
 ```yaml
 name: animagine-xl
-# ...
 options:
 - "cfg_scale:6"
 ```
@@ -293,7 +290,6 @@ parameters:
   model: stabilityai/stable-diffusion-2-depth
 backend: diffusers
 step: 50
-# Force CPU usage
 f16: true
 cuda: true
 diffusers:
@@ -317,7 +313,6 @@ parameters:
   model: stabilityai/stable-video-diffusion-img2vid
 backend: diffusers
 step: 25
-# Force CPU usage
 f16: true
 cuda: true
 diffusers:
@@ -337,7 +332,6 @@ parameters:
   model: damo-vilab/text-to-video-ms-1.7b
 backend: diffusers
 step: 25
-# Force CPU usage
 f16: true
 cuda: true
 diffusers:
diff --git a/docs/content/docs/features/mcp.md b/docs/content/features/mcp.md
similarity index 99%
rename from docs/content/docs/features/mcp.md
rename to docs/content/features/mcp.md
index 2d0fe74f39dc..c8f869f72ef6 100644
--- a/docs/content/docs/features/mcp.md
+++ b/docs/content/features/mcp.md
@@ -7,7 +7,6 @@ tags = ["MCP", "Agents", "Tools", "Advanced"]
 categories = ["Features"]
 +++
 
-# Model Context Protocol (MCP) Support
 
 LocalAI now supports the **Model Context Protocol (MCP)**, enabling powerful agentic capabilities by connecting AI models to external tools and services. This feature allows your LocalAI models to interact with various MCP servers, providing access to real-time data, APIs, and specialized tools.
 
@@ -43,7 +42,6 @@ backend: llama-cpp
 parameters:
   model: qwen3-4b.gguf
 
-# MCP Configuration
 mcp:
   remote: |
     {
@@ -79,7 +77,6 @@ mcp:
       }
     }
 
-# Agent Configuration
 agent:
   max_attempts: 3        # Maximum number of tool execution attempts
   max_iterations: 3     # Maximum number of reasoning iterations
diff --git a/docs/content/docs/features/model-gallery.md b/docs/content/features/model-gallery.md
similarity index 97%
rename from docs/content/docs/features/model-gallery.md
rename to docs/content/features/model-gallery.md
index 6943866a7fa7..6847f4451387 100644
--- a/docs/content/docs/features/model-gallery.md
+++ b/docs/content/features/model-gallery.md
@@ -14,13 +14,13 @@ A list of the models available can also be browsed at [the Public LocalAI Galler
 LocalAI to ease out installations of models provide a way to preload models on start and downloading and installing them in runtime. You can install models manually by copying them over the `models` directory, or use the API or the Web interface to configure, download and verify the model assets for you. 
 
 
-{{% alert note %}}
+{{% notice note %}}
 The models in this gallery are not directly maintained by LocalAI. If you find a model that is not working, please open an issue on the model gallery repository.
-{{% /alert %}}
+ {{% /notice %}}
 
-{{% alert note %}}
+{{% notice note %}}
 GPT and text generation models might have a license which is not permissive for commercial use or might be questionable or without any license at all. Please check the model license before using it. The official gallery contains only open licensed models.
-{{% /alert %}}
+ {{% /notice %}}
 
 ![output](https://github.com/mudler/LocalAI/assets/2420543/7b16676e-d5b1-4c97-89bd-9fa5065c21ad)
 
@@ -68,10 +68,10 @@ where `github:mudler/localai/gallery/index.yaml` will be expanded automatically
 
 Note: the url are expanded automatically for `github` and `huggingface`, however `https://` and `http://` prefix works as well.
 
-{{% alert note %}}
+{{% notice note %}}
 
 If you want to build your own gallery, there is no documentation yet. However you can find the source of the default gallery in the [LocalAI repository](https://github.com/mudler/LocalAI/tree/master/gallery).
-{{% /alert %}}
+ {{% /notice %}}
 
 
 ### List Models
@@ -85,13 +85,10 @@ curl http://localhost:8080/models/available
 To search for a model, you can use `jq`:
 
 ```bash
-# Get all information about models with a name that contains "replit"
 curl http://localhost:8080/models/available | jq '.[] | select(.name | contains("replit"))'
 
-# Get the binary name of all local models (not hosted on Hugging Face)
 curl http://localhost:8080/models/available | jq '.[] | .name | select(contains("localmodels"))'
 
-# Get all of the model URLs that contains "orca"
 curl http://localhost:8080/models/available | jq '.[] | .urls | select(. != null) | add | select(contains("orca"))'
 ```
 
@@ -124,11 +121,9 @@ LOCALAI=http://localhost:8080
 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
      "config_url": "<MODEL_CONFIG_FILE_URL>"
    }' 
-# or if from a repository
 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
      "id": "<GALLERY>@<MODEL_NAME>"
    }' 
-# or from a gallery config
 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
      "url": "<MODEL_CONFIG_FILE_URL>"
    }' 
@@ -199,7 +194,7 @@ YAML:
 
 </details>
 
-{{% alert note %}}
+{{% notice note %}}
 
 You can find already some open licensed models in the [LocalAI gallery](https://github.com/mudler/LocalAI/tree/master/gallery).
 
@@ -223,7 +218,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
 
 </details>
 
-{{% /alert %}}
+ {{% /notice %}}
 
 ### Override a model name
 
diff --git a/docs/content/docs/features/object-detection.md b/docs/content/features/object-detection.md
similarity index 90%
rename from docs/content/docs/features/object-detection.md
rename to docs/content/features/object-detection.md
index 5126e3001d04..560310d9828e 100644
--- a/docs/content/docs/features/object-detection.md
+++ b/docs/content/features/object-detection.md
@@ -117,7 +117,7 @@ The RF-DETR backend is implemented as a Python-based gRPC service that integrate
 
 #### Available Models
 
-Currently, the following model is available in the [Model Gallery]({{%relref "docs/features/model-gallery" %}}):
+Currently, the following model is available in the [Model Gallery]({{%relref "features/model-gallery" %}}):
 
 - **rfdetr-base**: Base model with balanced performance and accuracy
 
@@ -128,7 +128,6 @@ You can browse and install this model through the LocalAI web interface or using
 ### Basic Object Detection
 
 ```bash
-# Detect objects in an image from URL
 curl -X POST http://localhost:8080/v1/detection \
   -H "Content-Type: application/json" \
   -d '{
@@ -140,7 +139,6 @@ curl -X POST http://localhost:8080/v1/detection \
 ### Base64 Image Detection
 
 ```bash
-# Convert image to base64 and send
 base64_image=$(base64 -w 0 image.jpg)
 curl -X POST http://localhost:8080/v1/detection \
   -H "Content-Type: application/json" \
@@ -187,7 +185,7 @@ Additional object detection models and backends will be added to this category i
 
 ## Related Features
 
-- [🎨 Image generation]({{%relref "docs/features/image-generation" %}}): Generate images with AI
-- [📖 Text generation]({{%relref "docs/features/text-generation" %}}): Generate text with language models
-- [🔍 GPT Vision]({{%relref "docs/features/gpt-vision" %}}): Analyze images with language models
-- [🚀 GPU acceleration]({{%relref "docs/features/GPU-acceleration" %}}): Optimize performance with GPU acceleration 
\ No newline at end of file
+- [🎨 Image generation]({{%relref "features/image-generation" %}}): Generate images with AI
+- [📖 Text generation]({{%relref "features/text-generation" %}}): Generate text with language models
+- [🔍 GPT Vision]({{%relref "features/gpt-vision" %}}): Analyze images with language models
+- [🚀 GPU acceleration]({{%relref "features/GPU-acceleration" %}}): Optimize performance with GPU acceleration 
\ No newline at end of file
diff --git a/docs/content/docs/features/openai-functions.md b/docs/content/features/openai-functions.md
similarity index 99%
rename from docs/content/docs/features/openai-functions.md
rename to docs/content/features/openai-functions.md
index 2f0c8c419f88..f4a20ab6ca42 100644
--- a/docs/content/docs/features/openai-functions.md
+++ b/docs/content/features/openai-functions.md
@@ -42,8 +42,6 @@ To use the functions with the OpenAI client in python:
 ```python
 from openai import OpenAI
 
-# ...
-# Send the conversation and available functions to GPT
 messages = [{"role": "user", "content": "What is the weather like in Beijing now?"}]
 tools = [
     {
diff --git a/docs/content/docs/features/reranker.md b/docs/content/features/reranker.md
similarity index 95%
rename from docs/content/docs/features/reranker.md
rename to docs/content/features/reranker.md
index 4bc01a7f2ddd..8cdb18367e5f 100644
--- a/docs/content/docs/features/reranker.md
+++ b/docs/content/features/reranker.md
@@ -25,10 +25,6 @@ backend: rerankers
 parameters:
   model: cross-encoder
 
-# optionally:
-# type: flashrank
-# diffusers:
-#  pipeline_type: en # to specify the english language
 ```
 
 and test it with:
diff --git a/docs/content/docs/features/stores.md b/docs/content/features/stores.md
similarity index 100%
rename from docs/content/docs/features/stores.md
rename to docs/content/features/stores.md
diff --git a/docs/content/docs/features/text-generation.md b/docs/content/features/text-generation.md
similarity index 90%
rename from docs/content/docs/features/text-generation.md
rename to docs/content/features/text-generation.md
index 70c2c7524b2e..2c339182f8db 100644
--- a/docs/content/docs/features/text-generation.md
+++ b/docs/content/features/text-generation.md
@@ -6,7 +6,7 @@ weight = 10
 url = "/features/text-generation/"
 +++
 
-LocalAI supports generating text with GPT with `llama.cpp` and other backends (such as `rwkv.cpp` as ) see also the [Model compatibility]({{%relref "docs/reference/compatibility-table" %}}) for an up-to-date list of the supported model families.
+LocalAI supports generating text with GPT with `llama.cpp` and other backends (such as `rwkv.cpp` as ) see also the [Model compatibility]({{%relref "reference/compatibility-table" %}}) for an up-to-date list of the supported model families.
 
 Note:
 
@@ -82,19 +82,19 @@ RWKV support is available through llama.cpp (see below)
 
 [llama.cpp](https://github.com/ggerganov/llama.cpp) is a popular port of Facebook's LLaMA model in C/C++.
 
-{{% alert note %}}
+{{% notice note %}}
 
 The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use a LocalAI version older than v2.25.0. For `gguf` models, use the `llama` backend. The go backend is deprecated as well but still available as `go-llama`.
 
-{{% /alert %}}
+ {{% /notice %}}
 
 #### Features
 
 The `llama.cpp` model supports the following features:
-- [📖 Text generation (GPT)]({{%relref "docs/features/text-generation" %}})
-- [🧠 Embeddings]({{%relref "docs/features/embeddings" %}})
-- [🔥 OpenAI functions]({{%relref "docs/features/openai-functions" %}})
-- [✍️ Constrained grammars]({{%relref "docs/features/constrained_grammars" %}})
+- [📖 Text generation (GPT)]({{%relref "features/text-generation" %}})
+- [🧠 Embeddings]({{%relref "features/embeddings" %}})
+- [🔥 OpenAI functions]({{%relref "features/openai-functions" %}})
+- [✍️ Constrained grammars]({{%relref "features/constrained_grammars" %}})
 
 #### Setup
 
@@ -104,7 +104,7 @@ LocalAI supports `llama.cpp` models out of the box. You can use the `llama.cpp`
 
 It is sufficient to copy the `ggml` or `gguf` model files in the `models` folder. You can refer to the model in the `model` parameter in the API calls.
 
-[You can optionally create an associated YAML]({{%relref "docs/advanced" %}}) model config file to tune the model's parameters or apply a template to the prompt.
+[You can optionally create an associated YAML]({{%relref "advanced" %}}) model config file to tune the model's parameters or apply a template to the prompt.
 
 Prompt templates are useful for models that are fine-tuned towards a specific prompt. 
 
@@ -124,7 +124,7 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 
 LocalAI will automatically download and configure the model in the `model` directory.
 
-Models can be also preloaded or downloaded on demand. To learn about model galleries, check out the [model gallery documentation]({{%relref "docs/features/model-gallery" %}}).
+Models can be also preloaded or downloaded on demand. To learn about model galleries, check out the [model gallery documentation]({{%relref "features/model-gallery" %}}).
 
 #### YAML configuration
 
@@ -189,8 +189,6 @@ name: exllama
 parameters:
   model: WizardLM-7B-uncensored-GPTQ
 backend: exllama
-# Note: you can also specify "exllama2" if it's an exllama2 model here
-# ...
 ```
 
 Test with:
@@ -220,22 +218,6 @@ backend: vllm
 parameters:
     model: "facebook/opt-125m"
 
-# Uncomment to specify a quantization method (optional)
-# quantization: "awq"
-# Uncomment to limit the GPU memory utilization (vLLM default is 0.9 for 90%)
-# gpu_memory_utilization: 0.5
-# Uncomment to trust remote code from huggingface
-# trust_remote_code: true
-# Uncomment to enable eager execution
-# enforce_eager: true
-# Uncomment to specify the size of the CPU swap space per GPU (in GiB)
-# swap_space: 2
-# Uncomment to specify the maximum length of a sequence (including prompt and output)
-# max_model_len: 32768
-# Uncomment and specify the number of Tensor divisions.
-# Allows you to partition and run large models. Performance gains are limited.
-# https://github.com/vllm-project/vllm/issues/1435
-# tensor_parallel_size: 2
 ```
 
 The backend will automatically download the required files in order to run the model.
diff --git a/docs/content/docs/features/text-to-audio.md b/docs/content/features/text-to-audio.md
similarity index 100%
rename from docs/content/docs/features/text-to-audio.md
rename to docs/content/features/text-to-audio.md
diff --git a/docs/content/docs/getting-started/_index.en.md b/docs/content/getting-started/_index.en.md
similarity index 73%
rename from docs/content/docs/getting-started/_index.en.md
rename to docs/content/getting-started/_index.en.md
index c115734c7691..fe96a3ff2475 100644
--- a/docs/content/docs/getting-started/_index.en.md
+++ b/docs/content/getting-started/_index.en.md
@@ -2,6 +2,7 @@
 +++
 disableToc = false
 title = "Getting started"
-weight = 2
+weight = 3
 icon = "rocket_launch"
+type = "chapter"
 +++
diff --git a/docs/content/docs/getting-started/build.md b/docs/content/getting-started/build.md
similarity index 91%
rename from docs/content/docs/getting-started/build.md
rename to docs/content/getting-started/build.md
index feef50826fb2..b2ce0b21282f 100644
--- a/docs/content/docs/getting-started/build.md
+++ b/docs/content/getting-started/build.md
@@ -81,7 +81,6 @@ Requirements:
 In order to build the `LocalAI` container image locally you can use `docker`, for example:
 
 ```
-# build the image
 docker build -t localai .
 docker run localai
 ```
@@ -95,30 +94,22 @@ The below has been tested by one mac user and found to work. Note that this does
 Install `xcode` from the Apps Store (needed for metalkit)
 
 ```
-# install build dependencies
 brew install abseil cmake go grpc protobuf wget protoc-gen-go protoc-gen-go-grpc
 
-# clone the repo
 git clone https://github.com/go-skynet/LocalAI.git
 
 cd LocalAI
 
-# build the binary
 make build
 
-# Download phi-2 to models/
 wget https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf -O models/phi-2.Q2_K
 
-# Use a template from the examples
 cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/phi-2.Q2_K.tmpl
 
-# Install the llama-cpp backend
 ./local-ai backends install llama-cpp
 
-# Run LocalAI
 ./local-ai --models-path=./models/ --debug=true
 
-# Now API is accessible at localhost:8080
 curl http://localhost:8080/v1/models
 
 curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
@@ -135,10 +126,8 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 - After the installation of Xcode, if you receive a xcrun error `'xcrun: error: unable to find utility "metal", not a developer tool or in PATH'`. You might have installed the Xcode command line tools before installing Xcode, the former one is pointing to an incomplete SDK.
 
 ```
-# print /Library/Developer/CommandLineTools, if command line tools were installed in advance
 xcode-select --print-path
 
-# point to a complete SDK
 sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
 ```
 
@@ -147,7 +136,6 @@ sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
 - If you get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again.
 
 ```
-# reinstall build dependencies
 brew reinstall go grpc protobuf wget
 
 make clean
@@ -168,10 +156,8 @@ In the LocalAI repository, for instance you can build `bark-cpp` by doing:
 ```
 git clone https://github.com/go-skynet/LocalAI.git
 
-# Build the bark-cpp backend (requires cmake)
 make -C LocalAI/backend/go/bark-cpp build package
 
-# Build vllm backend (requires python)
 make -C LocalAI/backend/python/vllm
 ```
 
@@ -184,7 +170,6 @@ In the LocalAI repository, you can build `bark-cpp` by doing:
 ```
 git clone https://github.com/go-skynet/LocalAI.git
 
-# Build the bark-cpp backend (requires docker)
 make docker-build-bark-cpp
 ```
 
diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/getting-started/container-images.md
similarity index 94%
rename from docs/content/docs/getting-started/container-images.md
rename to docs/content/getting-started/container-images.md
index 936909c9458d..134cdea8fe2f 100644
--- a/docs/content/docs/getting-started/container-images.md
+++ b/docs/content/getting-started/container-images.md
@@ -10,16 +10,16 @@ LocalAI provides a variety of images to support different environments. These im
 
 All-in-One images comes with a pre-configured set of models and backends, standard images instead do not have any model pre-configured and installed.
 
-For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "docs/getting-started/build" %}}).
+For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "getting-started/build" %}}).
 
-{{% alert icon="💡" %}}
+{{% notice tip %}}
 
 **Available Images Types**:
 
 - Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
 - Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration.
 
-{{% /alert %}}
+ {{% /notice %}}
 
 #### Prerequisites
 
@@ -29,11 +29,11 @@ Before you begin, ensure you have a container engine installed if you are not us
 - [Install Podman (Linux)](https://podman.io/getting-started/installation)
 - [Install Docker engine (Servers)](https://docs.docker.com/engine/install/#get-started)
 
-{{% alert icon="💡" %}}
+{{% notice tip %}}
 
 **Hardware Requirements:** The hardware requirements for LocalAI vary based on the model size and quantization method used. For performance benchmarks with different backends, such as `llama.cpp`, visit [this link](https://github.com/ggerganov/llama.cpp#memorydisk-requirements). The `rwkv` backend is noted for its lower resource consumption.
 
-{{% /alert %}}
+ {{% /notice %}}
 
 ## All-in-one images
 
@@ -41,7 +41,6 @@ All-In-One images are images that come pre-configured with a set of models and b
 
 In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below
 
-{{< table "table-responsive" >}}
 | Category | Model name | Real model (CPU) | Real model (GPU) |
 | ---- | ---- | ---- | ---- |
 | Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` |
@@ -50,18 +49,13 @@ In the AIO images there are models configured with the names of OpenAI models, h
 | Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same |
 | Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same |
 | Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` |
-{{< /table >}}
 
 ### Usage
 
 Select the image (CPU or GPU) and start the container with Docker:
 
 ```bash
-# CPU example
 docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
-# For Nvidia GPUs:
-# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-11
-# docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-12
 ```
 
 LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models).
@@ -103,7 +97,7 @@ services:
     #           capabilities: [gpu]
 ```
 
-{{% alert icon="💡" %}}
+{{% notice tip %}}
 
 **Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
 
@@ -122,7 +116,7 @@ docker volume create localai-models
 docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/localai:latest-aio-cpu
 ```
 
-{{% /alert %}}
+ {{% /notice %}}
 
 ### Available AIO images
 
@@ -142,7 +136,7 @@ The AIO Images are inheriting the same environment variables as the base images
 | Variable | Default | Description |
 | ---------------------| ------- | ----------- |
 | `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
-| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/models" %}})) |
+| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "getting-started/models" %}})) |
 
 
 ## Standard container images
@@ -210,7 +204,7 @@ Standard container images do not have pre-installed models.
 
 {{% tab tabName="Nvidia Linux for tegra" %}}
 
-These images are compatible with Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. For more information, see the [Nvidia L4T guide]({{%relref "docs/reference/nvidia-l4t" %}}).
+These images are compatible with Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. For more information, see the [Nvidia L4T guide]({{%relref "reference/nvidia-l4t" %}}).
 
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
@@ -224,4 +218,4 @@ These images are compatible with Nvidia ARM64 devices, such as the Jetson Nano,
 
 ## See Also
 
-- [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}})
+- [GPU acceleration]({{%relref "features/gpu-acceleration" %}})
diff --git a/docs/content/docs/getting-started/customize-model.md b/docs/content/getting-started/customize-model.md
similarity index 84%
rename from docs/content/docs/getting-started/customize-model.md
rename to docs/content/getting-started/customize-model.md
index eff83ebd2e5c..c5829469bc72 100644
--- a/docs/content/docs/getting-started/customize-model.md
+++ b/docs/content/getting-started/customize-model.md
@@ -6,17 +6,15 @@ icon = "rocket_launch"
 
 +++
 
-To customize the prompt template or the default settings of the model, a configuration file is utilized. This file must adhere to the LocalAI YAML configuration standards. For comprehensive syntax details, refer to the [advanced documentation]({{%relref "docs/advanced" %}}). The configuration file can be located either remotely (such as in a Github Gist) or within the local filesystem or a remote URL.
+To customize the prompt template or the default settings of the model, a configuration file is utilized. This file must adhere to the LocalAI YAML configuration standards. For comprehensive syntax details, refer to the [advanced documentation]({{%relref "advanced" %}}). The configuration file can be located either remotely (such as in a Github Gist) or within the local filesystem or a remote URL.
 
 LocalAI can be initiated using either its container image or binary, with a command that includes URLs of model config files or utilizes a shorthand format (like `huggingface://` or `github://`), which is then expanded into complete URLs.
 
 The configuration can also be set via an environment variable. For instance:
 
 ```
-# Command-Line Arguments
 local-ai github://owner/repo/file.yaml@branch
 
-# Environment Variable
 MODELS="github://owner/repo/file.yaml@branch,github://owner/repo/file.yaml@branch" local-ai
 ```
 
@@ -28,11 +26,11 @@ docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubuserc
 
 You can also check all the embedded models configurations [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
 
-{{% alert icon="" %}}
+{{% notice tip %}}
 The model configurations used in the quickstart are accessible here: [https://github.com/mudler/LocalAI/tree/master/embedded/models](https://github.com/mudler/LocalAI/tree/master/embedded/models). Contributions are welcome; please feel free to submit a Pull Request.
 
 The `phi-2` model configuration from the quickstart is expanded from [https://github.com/mudler/LocalAI/blob/master/examples/configurations/phi-2.yaml](https://github.com/mudler/LocalAI/blob/master/examples/configurations/phi-2.yaml).
-{{% /alert %}}
+ {{% /notice %}}
 
 ## Example: Customizing the Prompt Template
 
@@ -69,5 +67,5 @@ docker run -p 8080:8080 localai/localai:{{< version >}} https://gist.githubuserc
 
 ## Next Steps
 
-- Visit the [advanced section]({{%relref "docs/advanced" %}}) for more insights on prompt templates and configuration files.
-- To learn about fine-tuning an LLM model, check out the [fine-tuning section]({{%relref "docs/advanced/fine-tuning" %}}).
\ No newline at end of file
+- Visit the [advanced section]({{%relref "advanced" %}}) for more insights on prompt templates and configuration files.
+- To learn about fine-tuning an LLM model, check out the [fine-tuning section]({{%relref "advanced/fine-tuning" %}}).
\ No newline at end of file
diff --git a/docs/content/docs/getting-started/kubernetes.md b/docs/content/getting-started/kubernetes.md
similarity index 84%
rename from docs/content/docs/getting-started/kubernetes.md
rename to docs/content/getting-started/kubernetes.md
index bc3902c53f4f..8adfa8277dee 100644
--- a/docs/content/docs/getting-started/kubernetes.md
+++ b/docs/content/getting-started/kubernetes.md
@@ -22,16 +22,10 @@ kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI-examples/refs/
 Alternatively, the [helm chart](https://github.com/go-skynet/helm-charts) can be used as well:
 
 ```bash
-# Install the helm repository
 helm repo add go-skynet https://go-skynet.github.io/helm-charts/
-# Update the repositories
 helm repo update
-# Get the values
 helm show values go-skynet/local-ai > values.yaml
 
-# Edit the values if needed
-# vim values.yaml ...
 
-# Install the helm chart
 helm install local-ai go-skynet/local-ai -f values.yaml
 ```
diff --git a/docs/content/docs/getting-started/models.md b/docs/content/getting-started/models.md
similarity index 70%
rename from docs/content/docs/getting-started/models.md
rename to docs/content/getting-started/models.md
index 08a25e982dbb..c3aef9607f17 100644
--- a/docs/content/docs/getting-started/models.md
+++ b/docs/content/getting-started/models.md
@@ -7,7 +7,7 @@ icon = "rocket_launch"
 
 To install models with LocalAI, you can:
 
-- Browse the Model Gallery from the Web Interface and install models with a couple of clicks. For more details, refer to the [Gallery Documentation]({{% relref "docs/features/model-gallery" %}}).
+- Browse the Model Gallery from the Web Interface and install models with a couple of clicks. For more details, refer to the [Gallery Documentation]({{% relref "features/model-gallery" %}}).
 - Specify a model from the LocalAI gallery during startup, e.g., `local-ai run <model_gallery_name>`.
 - Use a URI to specify a model file (e.g., `huggingface://...`, `oci://`, or `ollama://`) when starting LocalAI, e.g., `local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf`.
 - Specify a URL to a model configuration file when starting LocalAI, e.g., `local-ai run https://gist.githubusercontent.com/.../phi-2.yaml`.
@@ -29,7 +29,7 @@ To install only the model, use:
 local-ai models install hermes-2-theta-llama-3-8b
 ```
 
-Note: The galleries available in LocalAI can be customized to point to a different URL or a local directory. For more information on how to setup your own gallery, see the [Gallery Documentation]({{% relref "docs/features/model-gallery" %}}).
+Note: The galleries available in LocalAI can be customized to point to a different URL or a local directory. For more information on how to setup your own gallery, see the [Gallery Documentation]({{% relref "features/model-gallery" %}}).
 
 ## Run Models via URI
 
@@ -40,18 +40,14 @@ To run models via URI, specify a URI to a model file or a configuration file whe
 - From OCIs: `oci://container_image:tag`, `ollama://model_id:tag`
 - From configuration files: `https://gist.githubusercontent.com/.../phi-2.yaml`
 
-Configuration files can be used to customize the model defaults and settings. For advanced configurations, refer to the [Customize Models section]({{% relref "docs/getting-started/customize-model" %}}).
+Configuration files can be used to customize the model defaults and settings. For advanced configurations, refer to the [Customize Models section]({{% relref "getting-started/customize-model" %}}).
 
 ### Examples
 
 ```bash
-# Start LocalAI with the phi-2 model
 local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
-# Install and run a model from the Ollama OCI registry
 local-ai run ollama://gemma:2b
-# Run a model from a configuration file
 local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
-# Install and run a model from a standard OCI registry (e.g., Docker Hub)
 local-ai run oci://localai/phi-2:latest
 ```
 
@@ -60,10 +56,10 @@ local-ai run oci://localai/phi-2:latest
 Follow these steps to manually run models using LocalAI:
 
 1. **Prepare Your Model and Configuration Files**:
-   Ensure you have a model file and, if necessary, a configuration YAML file. Customize model defaults and settings with a configuration file. For advanced configurations, refer to the [Advanced Documentation]({{% relref "docs/advanced" %}}).
+   Ensure you have a model file and, if necessary, a configuration YAML file. Customize model defaults and settings with a configuration file. For advanced configurations, refer to the [Advanced Documentation]({{% relref "advanced" %}}).
 
 2. **GPU Acceleration**:
-   For instructions on GPU acceleration, visit the [GPU Acceleration]({{% relref "docs/features/gpu-acceleration" %}}) page.
+   For instructions on GPU acceleration, visit the [GPU Acceleration]({{% relref "features/gpu-acceleration" %}}) page.
 
 3. **Run LocalAI**:
    Choose one of the following methods to run LocalAI:
@@ -72,26 +68,13 @@ Follow these steps to manually run models using LocalAI:
 {{% tab tabName="Docker" %}}
 
 ```bash
-# Prepare the models into the `models` directory
 mkdir models
 
-# Copy your models to the directory
 cp your-model.gguf models/
 
-# Run the LocalAI container
 docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4
 
-# Expected output:
-# ┌───────────────────────────────────────────────────┐
-# │                   Fiber v2.42.0                   │
-# │               http://127.0.0.1:8080               │
-# │       (bound on host 0.0.0.0 and port 8080)       │
-# │                                                   │
-# │ Handlers ............. 1  Processes ........... 1 │
-# │ Prefork ....... Disabled  PID ................. 1 │
-# └───────────────────────────────────────────────────┘
-
-# Test the endpoint with curl
+
 curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
      "model": "your-model.gguf",
      "prompt": "A long time ago in a galaxy far, far away",
@@ -99,68 +82,52 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
    }'
 ```
 
-{{% alert icon="💡" %}}
+{{% notice tip %}}
 **Other Docker Images**:
 
-For other Docker images, please refer to the table in [the container images section]({{% relref "docs/getting-started/container-images" %}}).
-{{% /alert %}}
+For other Docker images, please refer to the table in [the container images section]({{% relref "getting-started/container-images" %}}).
+ {{% /notice %}}
 
 ### Example:
 
 ```bash
 mkdir models
 
-# Download luna-ai-llama2 to models/
 wget https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_0.gguf -O models/luna-ai-llama2
 
-# Use a template from the examples, if needed
 cp -rf prompt-templates/getting_started.tmpl models/luna-ai-llama2.tmpl
 
 docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4
 
-# Now the API is accessible at localhost:8080
 curl http://localhost:8080/v1/models
-# {"object":"list","data":[{"id":"luna-ai-llama2","object":"model"}]}
 
 curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
      "model": "luna-ai-llama2",
      "messages": [{"role": "user", "content": "How are you?"}],
      "temperature": 0.9
    }'
-# {"model":"luna-ai-llama2","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
 ```
 
-{{% alert note %}}
-- If running on Apple Silicon (ARM), it is **not** recommended to run on Docker due to emulation. Follow the [build instructions]({{% relref "docs/getting-started/build" %}}) to use Metal acceleration for full GPU support.
+{{% notice note %}}
+- If running on Apple Silicon (ARM), it is **not** recommended to run on Docker due to emulation. Follow the [build instructions]({{% relref "getting-started/build" %}}) to use Metal acceleration for full GPU support.
 - If you are running on Apple x86_64, you can use Docker without additional gain from building it from source.
-{{% /alert %}}
+ {{% /notice %}}
 
 {{% /tab %}}
 {{% tab tabName="Docker Compose" %}}
 
 ```bash
-# Clone LocalAI
 git clone https://github.com/go-skynet/LocalAI
 
 cd LocalAI
 
-# (Optional) Checkout a specific LocalAI tag
-# git checkout -b build <TAG>
 
-# Copy your models to the models directory
 cp your-model.gguf models/
 
-# (Optional) Edit the .env file to set parameters like context size and threads
-# vim .env
 
-# Start with Docker Compose
 docker compose up -d --pull always
-# Or build the images with:
-# docker compose up -d --build
 
-# Now the API is accessible at localhost:8080
 curl http://localhost:8080/v1/models
-# {"object":"list","data":[{"id":"your-model.gguf","object":"model"}]}
 
 curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
      "model": "your-model.gguf",
@@ -169,25 +136,25 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
    }'
 ```
 
-{{% alert icon="💡" %}}
+{{% notice tip %}}
 **Other Docker Images**:
 
 For other Docker images, please refer to the table in [Getting Started](https://localai.io/basics/getting_started/#container-images).
-{{% /alert %}}
+ {{% /notice %}}
 
 Note: If you are on Windows, ensure the project is on the Linux filesystem to avoid slow model loading. For more information, see the [Microsoft Docs](https://learn.microsoft.com/en-us/windows/wsl/filesystems).
 
 {{% /tab %}}
 {{% tab tabName="Kubernetes" %}}
 
-For Kubernetes deployment, see the [Kubernetes section]({{% relref "docs/getting-started/kubernetes" %}}).
+For Kubernetes deployment, see the [Kubernetes section]({{% relref "getting-started/kubernetes" %}}).
 
 {{% /tab %}}
 {{% tab tabName="From Binary" %}}
 
 LocalAI binary releases are available on [GitHub](https://github.com/go-skynet/LocalAI/releases).
 
-{{% alert icon="⚠️" %}}
+{{% notice tip %}}
 If installing on macOS, you might encounter a message saying:
 
 > "local-ai-git-Darwin-arm64" (or the name you gave the binary) can't be opened because Apple cannot check it for malicious software.
@@ -197,12 +164,12 @@ Hit OK, then go to Settings > Privacy & Security > Security and look for the mes
 > "local-ai-git-Darwin-arm64" was blocked from use because it is not from an identified developer.
 
 Press "Allow Anyway."
-{{% /alert %}}
+ {{% /notice %}}
 
 {{% /tab %}}
 {{% tab tabName="From Source" %}}
 
-For instructions on building LocalAI from source, see the [Build Section]({{% relref "docs/getting-started/build" %}}).
+For instructions on building LocalAI from source, see the [Build Section]({{% relref "getting-started/build" %}}).
 
 {{% /tab %}}
 {{< /tabs >}}
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/getting-started/quickstart.md
similarity index 74%
rename from docs/content/docs/getting-started/quickstart.md
rename to docs/content/getting-started/quickstart.md
index eeeb409e91a6..49e86257fb0c 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/getting-started/quickstart.md
@@ -6,28 +6,27 @@ url = '/basics/getting_started/'
 icon = "rocket_launch"
 +++
 
-**LocalAI** is a free, open-source alternative to OpenAI (Anthropic, etc.), functioning as a drop-in replacement REST API for local inferencing. It allows you to run [LLMs]({{% relref "docs/features/text-generation" %}}), generate images, and produce audio, all locally or on-premises with consumer-grade hardware, supporting multiple model families and architectures.
+**LocalAI** is a free, open-source alternative to OpenAI (Anthropic, etc.), functioning as a drop-in replacement REST API for local inferencing. It allows you to run [LLMs]({{% relref "features/text-generation" %}}), generate images, and produce audio, all locally or on-premises with consumer-grade hardware, supporting multiple model families and architectures.
 
-{{% alert icon="💡" %}}
+{{% notice tip %}}
 
 **Security considerations**
 
 If you are exposing LocalAI remotely, make sure you protect the API endpoints adequately with a mechanism which allows to protect from the incoming traffic or alternatively, run LocalAI with `API_KEY` to gate the access with an API key. The API key guarantees a total access to the features (there is no role separation), and it is to be considered as likely as an admin role.
 
-{{% /alert %}}
+ {{% /notice %}}
 
 ## Quickstart
 
 ### Using the Bash Installer
 
 ```bash
-# Basic installation
 curl https://localai.io/install.sh | sh
 ```
 
 The bash installer, if docker is not detected, will install automatically as a systemd service.
 
-See [Installer]({{% relref "docs/advanced/installer" %}}) for all the supported options
+See [Installer]({{% relref "advanced/installer" %}}) for all the supported options
 
 ### macOS Download
 
@@ -41,14 +40,14 @@ For MacOS a DMG is available:
 
 ### Run with docker
 
-{{% alert icon="💡" %}}
+{{% notice tip %}}
 **Docker Run vs Docker Start**
 
 - `docker run` creates and starts a new container. If a container with the same name already exists, this command will fail.
 - `docker start` starts an existing container that was previously created with `docker run`.
 
 If you've already run LocalAI before and want to start it again, use: `docker start -i local-ai`
-{{% /alert %}}
+ {{% /notice %}}
 
 The following commands will automatically start with a web interface and a Rest API on port `8080`.
 
@@ -61,14 +60,10 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
 #### NVIDIA GPU Images:
 
 ```bash
-# CUDA 12.0
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
 
-# CUDA 11.7
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
 
-# NVIDIA Jetson (L4T) ARM64
-# First, you need to have installed the nvidia container toolkit: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-ap
 docker run -ti --name local-ai -p 8080:8080 --runtime nvidia --gpus all localai/localai:latest-nvidia-l4t-arm64
 ```
 
@@ -93,19 +88,14 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
 #### AIO Images (pre-downloaded models):
 
 ```bash
-# CPU version
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 
-# NVIDIA CUDA 12 version
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
 
-# NVIDIA CUDA 11 version
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
 
-# Intel GPU version
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
 
-# AMD GPU version
 docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
 ```
 
@@ -114,31 +104,26 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri
 When starting LocalAI (either via Docker or via CLI) you can specify as argument a list of models to install automatically before starting the API, for example:
 
 ```bash
-# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
 local-ai run llama-3.2-1b-instruct:q4_k_m
-# Start LocalAI with the phi-2 model directly from huggingface
 local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
-# Install and run a model from the Ollama OCI registry
 local-ai run ollama://gemma:2b
-# Run a model from a configuration file
 local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
-# Install and run a model from a standard OCI registry (e.g., Docker Hub)
 local-ai run oci://localai/phi-2:latest
 ```
 
-{{% alert icon="⚡" %}}
-**Automatic Backend Detection**: When you install models from the gallery or YAML files, LocalAI automatically detects your system's GPU capabilities (NVIDIA, AMD, Intel) and downloads the appropriate backend. For advanced configuration options, see [GPU Acceleration]({{% relref "docs/features/gpu-acceleration#automatic-backend-detection" %}}).
-{{% /alert %}}
+{{% notice tip %}}
+**Automatic Backend Detection**: When you install models from the gallery or YAML files, LocalAI automatically detects your system's GPU capabilities (NVIDIA, AMD, Intel) and downloads the appropriate backend. For advanced configuration options, see [GPU Acceleration]({{% relref "features/gpu-acceleration#automatic-backend-detection" %}}).
+ {{% /notice %}}
 
-For a full list of options, you can run LocalAI with `--help` or refer to the [Installer Options]({{% relref "docs/advanced/installer" %}}) documentation.
+For a full list of options, you can run LocalAI with `--help` or refer to the [Installer Options]({{% relref "advanced/installer" %}}) documentation.
 
-Binaries can also be [manually downloaded]({{% relref "docs/reference/binaries" %}}).
+Binaries can also be [manually downloaded]({{% relref "reference/binaries" %}}).
 
 ## Using Homebrew on MacOS
 
-{{% alert icon="⚠️" %}}
+{{% notice tip %}}
 The Homebrew formula currently doesn't have the same options than the bash script
-{{% /alert %}}
+ {{% /notice %}}
 
 You can install Homebrew's [LocalAI](https://formulae.brew.sh/formula/localai) with the following command:
 
@@ -151,17 +136,17 @@ brew install localai
 
 LocalAI is available as a container image compatible with various container engines such as Docker, Podman, and Kubernetes. Container images are published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) and [Docker Hub](https://hub.docker.com/r/localai/localai).
 
-For detailed instructions, see [Using container images]({{% relref "docs/getting-started/container-images" %}}). For Kubernetes deployment, see [Run with Kubernetes]({{% relref "docs/getting-started/kubernetes" %}}).
+For detailed instructions, see [Using container images]({{% relref "getting-started/container-images" %}}). For Kubernetes deployment, see [Run with Kubernetes]({{% relref "getting-started/kubernetes" %}}).
 
 ## Running LocalAI with All-in-One (AIO) Images
 
-> _Already have a model file? Skip to [Run models manually]({{% relref "docs/getting-started/models" %}})_.
+> _Already have a model file? Skip to [Run models manually]({{% relref "getting-started/models" %}})_.
 
-LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the features of LocalAI. If pre-configured models are not required, you can use the standard [images]({{% relref "docs/getting-started/container-images" %}}).
+LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the features of LocalAI. If pre-configured models are not required, you can use the standard [images]({{% relref "getting-started/container-images" %}}).
 
 These images are available for both CPU and GPU environments. AIO images are designed for ease of use and require no additional configuration.
 
-It is recommended to use AIO images if you prefer not to configure the models manually or via the web interface. For running specific models, refer to the [manual method]({{% relref "docs/getting-started/models" %}}).
+It is recommended to use AIO images if you prefer not to configure the models manually or via the web interface. For running specific models, refer to the [manual method]({{% relref "getting-started/models" %}}).
 
 The AIO images come pre-configured with the following features:
 - Text to Speech (TTS)
@@ -171,7 +156,7 @@ The AIO images come pre-configured with the following features:
 - Image generation
 - Embedding server
 
-For instructions on using AIO images, see [Using container images]({{% relref "docs/getting-started/container-images#all-in-one-images" %}}).
+For instructions on using AIO images, see [Using container images]({{% relref "getting-started/container-images#all-in-one-images" %}}).
 
 ## Using LocalAI and the full stack with LocalAGI
 
@@ -182,23 +167,17 @@ LocalAI is part of the Local family stack, along with LocalAGI and LocalRecall.
 ### Quick Start
 
 ```bash
-# Clone the repository
 git clone https://github.com/mudler/LocalAGI
 cd LocalAGI
 
-# CPU setup (default)
 docker compose up
 
-# NVIDIA GPU setup
 docker compose -f docker-compose.nvidia.yaml up
 
-# Intel GPU setup (for Intel Arc and integrated GPUs)
 docker compose -f docker-compose.intel.yaml up
 
-# Start with a specific model (see available models in models.localai.io, or localai.io to use any model in huggingface)
 MODEL_NAME=gemma-3-12b-it docker compose up
 
-# NVIDIA GPU setup with custom multimodal and image models
 MODEL_NAME=gemma-3-12b-it \
 MULTIMODAL_MODEL=minicpm-v-4_5 \
 IMAGE_MODEL=flux.1-dev-ggml \
@@ -226,13 +205,13 @@ For more advanced configuration and API documentation, visit the [LocalAGI GitHu
 
 ## What's Next?
 
-There is much more to explore with LocalAI! You can run any model from Hugging Face, perform video generation, and also voice cloning. For a comprehensive overview, check out the [features]({{% relref "docs/features" %}}) section.
+There is much more to explore with LocalAI! You can run any model from Hugging Face, perform video generation, and also voice cloning. For a comprehensive overview, check out the [features]({{% relref "features" %}}) section.
 
 Explore additional resources and community contributions:
 
-- [Installer Options]({{% relref "docs/advanced/installer" %}})
-- [Run from Container images]({{% relref "docs/getting-started/container-images" %}})
-- [Examples to try from the CLI]({{% relref "docs/getting-started/try-it-out" %}})
-- [Build LocalAI and the container image]({{% relref "docs/getting-started/build" %}})
-- [Run models manually]({{% relref "docs/getting-started/models" %}})
+- [Installer Options]({{% relref "advanced/installer" %}})
+- [Run from Container images]({{% relref "getting-started/container-images" %}})
+- [Examples to try from the CLI]({{% relref "getting-started/try-it-out" %}})
+- [Build LocalAI and the container image]({{% relref "getting-started/build" %}})
+- [Run models manually]({{% relref "getting-started/models" %}})
 - [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples)
diff --git a/docs/content/docs/getting-started/try-it-out.md b/docs/content/getting-started/try-it-out.md
similarity index 94%
rename from docs/content/docs/getting-started/try-it-out.md
rename to docs/content/getting-started/try-it-out.md
index 26fb6e45f92c..fdcf21e5765f 100644
--- a/docs/content/docs/getting-started/try-it-out.md
+++ b/docs/content/getting-started/try-it-out.md
@@ -9,16 +9,16 @@ icon = "rocket_launch"
 
 Once LocalAI is installed, you can start it (either by using docker, or the cli, or the systemd service).
 
-By default the LocalAI WebUI should be accessible from http://localhost:8080. You can also use 3rd party projects to interact with LocalAI as you would use OpenAI (see also [Integrations]({{%relref "docs/integrations" %}}) ). 
+By default the LocalAI WebUI should be accessible from http://localhost:8080. You can also use 3rd party projects to interact with LocalAI as you would use OpenAI (see also [Integrations]({{%relref "integrations" %}}) ). 
 
 After installation, install new models by navigating the model gallery, or by using the `local-ai` CLI. 
 
-{{% alert icon="🚀" %}}
-To install models with the WebUI, see the [Models section]({{%relref "docs/features/model-gallery" %}}).
+{{% notice tip %}}
+To install models with the WebUI, see the [Models section]({{%relref "features/model-gallery" %}}).
 With the CLI you can list the models with `local-ai models list` and install them with `local-ai models install <model-name>`.
 
-You can also [run models manually]({{%relref "docs/getting-started/models" %}}) by copying files into the `models` directory.
-{{% /alert %}}
+You can also [run models manually]({{%relref "getting-started/models" %}}) by copying files into the `models` directory.
+ {{% /notice %}}
 
 You can test out the API endpoints using `curl`, few examples are listed below. The models we are referring here (`gpt-4`, `gpt-4-vision-preview`, `tts-1`, `whisper-1`) are the default models that come with the AIO images - you can also use any other model you have installed.
 
@@ -187,10 +187,10 @@ curl http://localhost:8080/embeddings \
 
 </details>
 
-{{% alert icon="💡" %}}
+{{% notice tip %}}
 
 Don't use the model file as `model` in the request unless you want to handle the prompt template for yourself.
 
 Use the model names like you would do with OpenAI like in the examples below. For instance `gpt-4-vision-preview`, or `gpt-4`.
 
-{{% /alert %}}
+ {{% /notice %}}
diff --git a/docs/content/installation/_index.en.md b/docs/content/installation/_index.en.md
new file mode 100644
index 000000000000..c95df6287fc2
--- /dev/null
+++ b/docs/content/installation/_index.en.md
@@ -0,0 +1,24 @@
+---
+weight: 2
+title: "Installation"
+description: "How to install LocalAI"
+type: chapter
+icon: download
+---
+
+
+LocalAI can be installed in multiple ways depending on your platform and preferences. Choose the installation method that best suits your needs:
+
+- **[macOS](macos/)**: Download and install the DMG application for macOS
+- **[Docker](docker/)**: Run LocalAI using Docker containers (recommended for most users)
+- **[Linux](linux/)**: Install on Linux using the one-liner script or binaries
+- **[Kubernetes](kubernetes/)**: Deploy LocalAI on Kubernetes clusters
+- **[Build from Source](build/)**: Build LocalAI from source code
+
+## Quick Start
+
+The fastest way to get started depends on your platform:
+
+- **macOS**: Download the [DMG](macos/)
+- **Linux**: Use the `curl https://localai.io/install.sh | sh` [one-liner](linux/)
+- **Docker**: Run `docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu`
diff --git a/docs/content/installation/build.md b/docs/content/installation/build.md
new file mode 100644
index 000000000000..7e50d30f4b72
--- /dev/null
+++ b/docs/content/installation/build.md
@@ -0,0 +1,185 @@
++++
+disableToc = false
+title = "Build LocalAI"
+icon = "model_training"
+weight = 5
+url = '/basics/build/'
++++
+
+
+### Build
+
+LocalAI can be built as a container image or as a single, portable binary. Note that some model architectures might require Python libraries, which are not included in the binary.
+
+LocalAI's extensible architecture allows you to add your own backends, which can be written in any language, and as such the container images contains also the Python dependencies to run all the available backends (for example, in order to run backends like __Diffusers__ that allows to generate images and videos from text).
+
+This section contains instructions on how to build LocalAI from source.
+
+#### Build LocalAI locally
+
+##### Requirements
+
+In order to build LocalAI locally, you need the following requirements:
+
+- Golang >= 1.21
+- GCC
+- GRPC
+
+To install the dependencies follow the instructions below:
+
+{{< tabs tabTotal="3"  >}}
+{{% tab tabName="Apple" %}}
+
+Install `xcode` from the App Store
+
+```bash
+brew install go protobuf protoc-gen-go protoc-gen-go-grpc wget
+```
+
+{{% /tab %}}
+{{% tab tabName="Debian" %}}
+
+```bash
+apt install golang make protobuf-compiler-grpc
+```
+
+After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands
+
+```bash
+go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+
+```
+
+{{% /tab %}}
+{{% tab tabName="From source" %}}
+
+```bash
+make build
+```
+
+{{% /tab %}}
+{{< /tabs >}}
+
+##### Build
+To build LocalAI with `make`:
+
+```
+git clone https://github.com/go-skynet/LocalAI
+cd LocalAI
+make build
+```
+
+This should produce the binary `local-ai`
+
+#### Container image
+
+Requirements:
+
+- Docker or podman, or a container engine
+
+In order to build the `LocalAI` container image locally you can use `docker`, for example:
+
+```
+docker build -t localai .
+docker run localai
+```
+
+### Example: Build on mac
+
+Building on Mac (M1, M2 or M3) works, but you may need to install some prerequisites using `brew`. 
+
+The below has been tested by one mac user and found to work. Note that this doesn't use Docker to run the server:
+
+Install `xcode` from the Apps Store (needed for metalkit)
+
+```
+brew install abseil cmake go grpc protobuf wget protoc-gen-go protoc-gen-go-grpc
+
+git clone https://github.com/go-skynet/LocalAI.git
+
+cd LocalAI
+
+make build
+
+wget https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf -O models/phi-2.Q2_K
+
+cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/phi-2.Q2_K.tmpl
+
+./local-ai backends install llama-cpp
+
+./local-ai --models-path=./models/ --debug=true
+
+curl http://localhost:8080/v1/models
+
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+     "model": "phi-2.Q2_K",
+     "messages": [{"role": "user", "content": "How are you?"}],
+     "temperature": 0.9 
+   }'
+```
+
+#### Troubleshooting mac
+
+- If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store.
+
+- After the installation of Xcode, if you receive a xcrun error `'xcrun: error: unable to find utility "metal", not a developer tool or in PATH'`. You might have installed the Xcode command line tools before installing Xcode, the former one is pointing to an incomplete SDK.
+
+```
+xcode-select --print-path
+
+sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
+```
+
+- If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256).
+
+- If you get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again.
+
+```
+brew reinstall go grpc protobuf wget
+
+make clean
+
+make build
+```
+
+## Build backends
+
+LocalAI have several backends available for installation in the backend gallery. The backends can be also built by source. As backends might vary from language and dependencies that they require, the documentation will provide generic guidance for few of the backends, which can be applied with some slight modifications also to the others.
+
+### Manually
+
+Typically each backend include a Makefile which allow to package the backend.
+
+In the LocalAI repository, for instance you can build `bark-cpp` by doing:
+
+```
+git clone https://github.com/go-skynet/LocalAI.git
+
+make -C LocalAI/backend/go/bark-cpp build package
+
+make -C LocalAI/backend/python/vllm
+```
+
+### With Docker
+
+Building with docker is simpler as abstracts away all the requirement, and focuses on building the final OCI images that are available in the gallery. This allows for instance also to build locally a backend and install it with LocalAI. You can refer to [Backends](https://localai.io/backends/) for general guidance on how to install and develop backends.
+
+In the LocalAI repository, you can build `bark-cpp` by doing:
+
+```
+git clone https://github.com/go-skynet/LocalAI.git
+
+make docker-build-bark-cpp
+```
+
+Note that `make` is only by convenience, in reality it just runs a simple `docker` command as:
+
+```bash
+docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark-cpp -f LocalAI/backend/Dockerfile.golang --build-arg BACKEND=bark-cpp .               
+```
+
+Note:
+
+- BUILD_TYPE can be either: `cublas`, `hipblas`, `sycl_f16`, `sycl_f32`, `metal`.
+- BASE_IMAGE is tested on `ubuntu:22.04` (and defaults to it) and `quay.io/go-skynet/intel-oneapi-base:latest` for intel/sycl
diff --git a/docs/content/installation/kubernetes.md b/docs/content/installation/kubernetes.md
new file mode 100644
index 000000000000..f3047851d81c
--- /dev/null
+++ b/docs/content/installation/kubernetes.md
@@ -0,0 +1,31 @@
++++
+disableToc = false
+title = "Run with Kubernetes"
+weight = 4
+url = '/basics/kubernetes/'
+ico = "rocket_launch"
++++
+
+
+For installing LocalAI in Kubernetes, the deployment file from the `examples` can be used and customized as preferred:
+
+```
+kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI-examples/refs/heads/main/kubernetes/deployment.yaml
+```
+
+For Nvidia GPUs:
+
+```
+kubectl apply -f https://raw.githubusercontent.com/mudler/LocalAI-examples/refs/heads/main/kubernetes/deployment-nvidia.yaml
+```
+
+Alternatively, the [helm chart](https://github.com/go-skynet/helm-charts) can be used as well:
+
+```bash
+helm repo add go-skynet https://go-skynet.github.io/helm-charts/
+helm repo update
+helm show values go-skynet/local-ai > values.yaml
+
+
+helm install local-ai go-skynet/local-ai -f values.yaml
+```
diff --git a/docs/content/installation/linux.md b/docs/content/installation/linux.md
new file mode 100644
index 000000000000..2992c55ceb50
--- /dev/null
+++ b/docs/content/installation/linux.md
@@ -0,0 +1,65 @@
+---
+title: "Linux Installation"
+description: "Install LocalAI on Linux using the installer script or binaries"
+weight: 3
+url: '/installation/linux/'
+---
+
+
+## One-Line Installer (Recommended)
+
+The fastest way to install LocalAI on Linux is with the installation script:
+
+```bash
+curl https://localai.io/install.sh | sh
+```
+
+This script will:
+- Detect your system architecture
+- Download the appropriate LocalAI binary
+- Set up the necessary configuration
+- Start LocalAI automatically
+
+## Manual Installation
+
+### Download Binary
+
+You can manually download the appropriate binary for your system from the [releases page](https://github.com/mudler/LocalAI/releases):
+
+1. Go to  [GitHub Releases](https://github.com/mudler/LocalAI/releases)
+2. Download the binary for your architecture (amd64, arm64, etc.)
+3. Make it executable:
+
+```bash
+chmod +x local-ai-*
+```
+
+4. Run LocalAI:
+
+```bash
+./local-ai-*
+```
+
+### System Requirements
+
+Hardware requirements vary based on:
+- Model size
+- Quantization method
+- Backend used
+
+For performance benchmarks with different backends like `llama.cpp`, visit [this link](https://github.com/ggerganov/llama.cpp#memorydisk-requirements).
+
+## Configuration
+
+After installation, you can:
+
+- Access the WebUI at `http://localhost:8080`
+- Configure models in the models directory
+- Customize settings via environment variables or config files
+
+## Next Steps
+
+- [Try it out with examples](/basics/try/)
+- [Learn about available models](/models/)
+- [Configure GPU acceleration](/features/gpu-acceleration/)
+- [Customize your configuration](/advanced/model-configuration/)
diff --git a/docs/content/installation/macos.md b/docs/content/installation/macos.md
new file mode 100644
index 000000000000..fc254cedb581
--- /dev/null
+++ b/docs/content/installation/macos.md
@@ -0,0 +1,40 @@
+---
+title: "macOS Installation"
+description: "Install LocalAI on macOS using the DMG application"
+weight: 1
+---
+
+
+The easiest way to install LocalAI on macOS is using the DMG application.
+
+## Download
+
+Download the latest DMG from GitHub releases:
+
+<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
+  <img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
+</a>
+
+## Installation Steps
+
+1. Download the `LocalAI.dmg` file from the link above
+2. Open the downloaded DMG file
+3. Drag the LocalAI application to your Applications folder
+4. Launch LocalAI from your Applications folder
+
+## Known Issues
+
+> **Note**: The DMGs are not signed by Apple and may show as quarantined.
+>
+> **Workaround**: See [this issue](https://github.com/mudler/LocalAI/issues/6268) for details on how to bypass the quarantine.
+>
+> **Fix tracking**: The signing issue is being tracked in [this issue](https://github.com/mudler/LocalAI/issues/6244).
+
+## Next Steps
+
+After installing LocalAI, you can:
+
+- Access the WebUI at `http://localhost:8080`
+- [Try it out with examples](/basics/try/)
+- [Learn about available models](/models/)
+- [Customize your configuration](/advanced/model-configuration/)
diff --git a/docs/content/docs/integrations.md b/docs/content/integrations.md
similarity index 100%
rename from docs/content/docs/integrations.md
rename to docs/content/integrations.md
diff --git a/docs/content/docs/overview.md b/docs/content/overview.md
similarity index 99%
rename from docs/content/docs/overview.md
rename to docs/content/overview.md
index 5e3e6d742fb2..79b109597e08 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/overview.md
@@ -9,7 +9,6 @@ author = "Ettore Di Giacinto"
 icon = "info"
 +++
 
-# Welcome to LocalAI
 
 LocalAI is your complete AI stack for running AI models locally. It's designed to be simple, efficient, and accessible, providing a drop-in replacement for OpenAI's API while keeping your data private and secure.
 
diff --git a/docs/content/docs/reference/_index.en.md b/docs/content/reference/_index.en.md
similarity index 92%
rename from docs/content/docs/reference/_index.en.md
rename to docs/content/reference/_index.en.md
index d8a8f2a71a06..ea6a150bf1fd 100644
--- a/docs/content/docs/reference/_index.en.md
+++ b/docs/content/reference/_index.en.md
@@ -2,6 +2,7 @@
 weight: 23
 title: "References"
 description: "Reference"
+type: chapter
 icon: menu_book
 lead: ""
 date: 2020-10-06T08:49:15+00:00
diff --git a/docs/content/docs/reference/architecture.md b/docs/content/reference/architecture.md
similarity index 96%
rename from docs/content/docs/reference/architecture.md
rename to docs/content/reference/architecture.md
index 23abe11171cb..9f701bc5d325 100644
--- a/docs/content/docs/reference/architecture.md
+++ b/docs/content/reference/architecture.md
@@ -7,7 +7,7 @@ weight = 25
 
 LocalAI is an API written in Go that serves as an OpenAI shim, enabling software already developed with OpenAI SDKs to seamlessly integrate with LocalAI. It can be effortlessly implemented as a substitute, even on consumer-grade hardware. This capability is achieved by employing various C++ backends, including [ggml](https://github.com/ggerganov/ggml), to perform inference on LLMs using both CPU and, if desired, GPU. Internally LocalAI backends are just gRPC server, indeed you can specify and build your own gRPC server and extend LocalAI in runtime as well. It is possible to specify external gRPC server and/or binaries that LocalAI will manage internally.
 
-LocalAI uses a mixture of backends written in various languages (C++, Golang, Python, ...). You can check [the model compatibility table]({{%relref "docs/reference/compatibility-table" %}}) to learn about all the components of LocalAI.
+LocalAI uses a mixture of backends written in various languages (C++, Golang, Python, ...). You can check [the model compatibility table]({{%relref "reference/compatibility-table" %}}) to learn about all the components of LocalAI.
 
 ![localai](https://github.com/go-skynet/localai-website/assets/2420543/6492e685-8282-4217-9daa-e229a31548bc)
 
diff --git a/docs/content/docs/reference/binaries.md b/docs/content/reference/binaries.md
similarity index 96%
rename from docs/content/docs/reference/binaries.md
rename to docs/content/reference/binaries.md
index aa818ec7468e..224c72685311 100644
--- a/docs/content/docs/reference/binaries.md
+++ b/docs/content/reference/binaries.md
@@ -32,10 +32,10 @@ Otherwise, here are the links to the binaries:
 | MacOS (arm64)  | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Darwin-arm64) |
 
 
-{{% alert icon="⚡" context="warning" %}}
+{{% notice icon="⚡" context="warning" %}}
 Binaries do have limited support compared to container images:
 
 - Python-based backends are not shipped with binaries (e.g. `bark`, `diffusers` or `transformers`)
 - MacOS binaries and Linux-arm64 do not ship TTS nor `stablediffusion-cpp` backends
 - Linux binaries do not ship `stablediffusion-cpp` backend
-{{% /alert %}}
+ {{% /notice %}}
diff --git a/docs/content/docs/reference/cli-reference.md b/docs/content/reference/cli-reference.md
similarity index 91%
rename from docs/content/docs/reference/cli-reference.md
rename to docs/content/reference/cli-reference.md
index 60569b66b746..356e7a14e045 100644
--- a/docs/content/docs/reference/cli-reference.md
+++ b/docs/content/reference/cli-reference.md
@@ -7,21 +7,18 @@ url = '/reference/cli-reference'
 
 Complete reference for all LocalAI command-line interface (CLI) parameters and environment variables.
 
-> **Note:** All CLI flags can also be set via environment variables. Environment variables take precedence over CLI flags. See [.env files]({{%relref "docs/advanced/advanced-usage#env-files" %}}) for configuration file support.
+> **Note:** All CLI flags can also be set via environment variables. Environment variables take precedence over CLI flags. See [.env files]({{%relref "advanced/advanced-usage#env-files" %}}) for configuration file support.
 
 ## Global Flags
 
-{{< table "table-responsive" >}}
 | Parameter | Default | Description | Environment Variable |
 |-----------|---------|-------------|----------------------|
 | `-h, --help` | | Show context-sensitive help | |
 | `--log-level` | `info` | Set the level of logs to output [error,warn,info,debug,trace] | `$LOCALAI_LOG_LEVEL` |
 | `--debug` | `false` | **DEPRECATED** - Use `--log-level=debug` instead. Enable debug logging | `$LOCALAI_DEBUG`, `$DEBUG` |
-{{< /table >}}
 
 ## Storage Flags
 
-{{< table "table-responsive" >}}
 | Parameter | Default | Description | Environment Variable |
 |-----------|---------|-------------|----------------------|
 | `--models-path` | `BASEPATH/models` | Path containing models used for inferencing | `$LOCALAI_MODELS_PATH`, `$MODELS_PATH` |
@@ -30,11 +27,9 @@ Complete reference for all LocalAI command-line interface (CLI) parameters and e
 | `--localai-config-dir` | `BASEPATH/configuration` | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | `$LOCALAI_CONFIG_DIR` |
 | `--localai-config-dir-poll-interval` | | Time duration to poll the LocalAI Config Dir if your system has broken fsnotify events (example: `1m`) | `$LOCALAI_CONFIG_DIR_POLL_INTERVAL` |
 | `--models-config-file` | | YAML file containing a list of model backend configs (alias: `--config-file`) | `$LOCALAI_MODELS_CONFIG_FILE`, `$CONFIG_FILE` |
-{{< /table >}}
 
 ## Backend Flags
 
-{{< table "table-responsive" >}}
 | Parameter | Default | Description | Environment Variable |
 |-----------|---------|-------------|----------------------|
 | `--backends-path` | `BASEPATH/backends` | Path containing backends used for inferencing | `$LOCALAI_BACKENDS_PATH`, `$BACKENDS_PATH` |
@@ -50,13 +45,11 @@ Complete reference for all LocalAI command-line interface (CLI) parameters and e
 | `--watchdog-idle-timeout` | `15m` | Threshold beyond which an idle backend should be stopped | `$LOCALAI_WATCHDOG_IDLE_TIMEOUT`, `$WATCHDOG_IDLE_TIMEOUT` |
 | `--enable-watchdog-busy` | `false` | Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout | `$LOCALAI_WATCHDOG_BUSY`, `$WATCHDOG_BUSY` |
 | `--watchdog-busy-timeout` | `5m` | Threshold beyond which a busy backend should be stopped | `$LOCALAI_WATCHDOG_BUSY_TIMEOUT`, `$WATCHDOG_BUSY_TIMEOUT` |
-{{< /table >}}
 
-For more information on VRAM management, see [VRAM and Memory Management]({{%relref "docs/advanced/vram-management" %}}).
+For more information on VRAM management, see [VRAM and Memory Management]({{%relref "advanced/vram-management" %}}).
 
 ## Models Flags
 
-{{< table "table-responsive" >}}
 | Parameter | Default | Description | Environment Variable |
 |-----------|---------|-------------|----------------------|
 | `--galleries` | | JSON list of galleries | `$LOCALAI_GALLERIES`, `$GALLERIES` |
@@ -65,23 +58,19 @@ For more information on VRAM management, see [VRAM and Memory Management]({{%rel
 | `--models` | | A list of model configuration URLs to load | `$LOCALAI_MODELS`, `$MODELS` |
 | `--preload-models-config` | | A list of models to apply at startup. Path to a YAML config file | `$LOCALAI_PRELOAD_MODELS_CONFIG`, `$PRELOAD_MODELS_CONFIG` |
 | `--load-to-memory` | | A list of models to load into memory at startup | `$LOCALAI_LOAD_TO_MEMORY`, `$LOAD_TO_MEMORY` |
-{{< /table >}}
 
 > **Note:** You can also pass model configuration URLs as positional arguments: `local-ai run MODEL_URL1 MODEL_URL2 ...`
 
 ## Performance Flags
 
-{{< table "table-responsive" >}}
 | Parameter | Default | Description | Environment Variable |
 |-----------|---------|-------------|----------------------|
 | `--f16` | `false` | Enable GPU acceleration | `$LOCALAI_F16`, `$F16` |
 | `-t, --threads` | | Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested | `$LOCALAI_THREADS`, `$THREADS` |
 | `--context-size` | | Default context size for models | `$LOCALAI_CONTEXT_SIZE`, `$CONTEXT_SIZE` |
-{{< /table >}}
 
 ## API Flags
 
-{{< table "table-responsive" >}}
 | Parameter | Default | Description | Environment Variable |
 |-----------|---------|-------------|----------------------|
 | `--address` | `:8080` | Bind address for the API server | `$LOCALAI_ADDRESS`, `$ADDRESS` |
@@ -94,11 +83,9 @@ For more information on VRAM management, see [VRAM and Memory Management]({{%rel
 | `--disable-gallery-endpoint` | `false` | Disable the gallery endpoints | `$LOCALAI_DISABLE_GALLERY_ENDPOINT`, `$DISABLE_GALLERY_ENDPOINT` |
 | `--disable-metrics-endpoint` | `false` | Disable the `/metrics` endpoint | `$LOCALAI_DISABLE_METRICS_ENDPOINT`, `$DISABLE_METRICS_ENDPOINT` |
 | `--machine-tag` | | If not empty, add that string to Machine-Tag header in each response. Useful to track response from different machines using multiple P2P federated nodes | `$LOCALAI_MACHINE_TAG`, `$MACHINE_TAG` |
-{{< /table >}}
 
 ## Hardening Flags
 
-{{< table "table-responsive" >}}
 | Parameter | Default | Description | Environment Variable |
 |-----------|---------|-------------|----------------------|
 | `--disable-predownload-scan` | `false` | If true, disables the best-effort security scanner before downloading any files | `$LOCALAI_DISABLE_PREDOWNLOAD_SCAN` |
@@ -106,11 +93,9 @@ For more information on VRAM management, see [VRAM and Memory Management]({{%rel
 | `--use-subtle-key-comparison` | `false` | If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resilience against timing attacks | `$LOCALAI_SUBTLE_KEY_COMPARISON` |
 | `--disable-api-key-requirement-for-http-get` | `false` | If true, a valid API key is not required to issue GET requests to portions of the web UI. This should only be enabled in secure testing environments | `$LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET` |
 | `--http-get-exempted-endpoints` | `^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$` | If `--disable-api-key-requirement-for-http-get` is overridden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review | `$LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS` |
-{{< /table >}}
 
 ## P2P Flags
 
-{{< table "table-responsive" >}}
 | Parameter | Default | Description | Environment Variable |
 |-----------|---------|-------------|----------------------|
 | `--p2p` | `false` | Enable P2P mode | `$LOCALAI_P2P`, `$P2P` |
@@ -119,7 +104,6 @@ For more information on VRAM management, see [VRAM and Memory Management]({{%rel
 | `--p2ptoken` | | Token for P2P mode (optional) | `$LOCALAI_P2P_TOKEN`, `$P2P_TOKEN`, `$TOKEN` |
 | `--p2p-network-id` | | Network ID for P2P mode, can be set arbitrarily by the user for grouping a set of instances | `$LOCALAI_P2P_NETWORK_ID`, `$P2P_NETWORK_ID` |
 | `--federated` | `false` | Enable federated instance | `$LOCALAI_FEDERATED`, `$FEDERATED` |
-{{< /table >}}
 
 ## Other Commands
 
@@ -142,20 +126,16 @@ Use `local-ai <command> --help` for more information on each command.
 ### Basic Usage
 
 ```bash
-# Start LocalAI with default settings
 ./local-ai run
 
-# Start with custom model path and address
 ./local-ai run --models-path /path/to/models --address :9090
 
-# Start with GPU acceleration
 ./local-ai run --f16
 ```
 
 ### Environment Variables
 
 ```bash
-# Using environment variables
 export LOCALAI_MODELS_PATH=/path/to/models
 export LOCALAI_ADDRESS=:9090
 export LOCALAI_F16=true
@@ -165,7 +145,6 @@ export LOCALAI_F16=true
 ### Advanced Configuration
 
 ```bash
-# Start with multiple models, watchdog, and P2P enabled
 ./local-ai run \
   --models model1.yaml model2.yaml \
   --enable-watchdog-idle \
@@ -176,6 +155,6 @@ export LOCALAI_F16=true
 
 ## Related Documentation
 
-- See [Advanced Usage]({{%relref "docs/advanced/advanced-usage" %}}) for configuration examples
-- See [VRAM and Memory Management]({{%relref "docs/advanced/vram-management" %}}) for memory management options
+- See [Advanced Usage]({{%relref "advanced/advanced-usage" %}}) for configuration examples
+- See [VRAM and Memory Management]({{%relref "advanced/vram-management" %}}) for memory management options
 
diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/reference/compatibility-table.md
similarity index 92%
rename from docs/content/docs/reference/compatibility-table.md
rename to docs/content/reference/compatibility-table.md
index 87c9dc5fe1bb..2511afcd5986 100644
--- a/docs/content/docs/reference/compatibility-table.md
+++ b/docs/content/reference/compatibility-table.md
@@ -8,29 +8,26 @@ url = "/model-compatibility/"
 
 Besides llama based models, LocalAI is compatible also with other architectures. The table below lists all the backends, compatible models families and the associated repository.
 
-{{% alert note %}}
+{{% notice note %}}
 
-LocalAI will attempt to automatically load models which are not explicitly configured for a specific backend. You can specify the backend to use by configuring a model with a YAML file. See [the advanced section]({{%relref "docs/advanced" %}}) for more details.
+LocalAI will attempt to automatically load models which are not explicitly configured for a specific backend. You can specify the backend to use by configuring a model with a YAML file. See [the advanced section]({{%relref "advanced" %}}) for more details.
 
-{{% /alert %}}
+ {{% /notice %}}
 
 ## Text Generation & Language Models
 
-{{< table "table-responsive" >}}
 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
-| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}})        | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes | yes                  | CUDA 11/12, ROCm, Intel SYCL, Vulkan, Metal, CPU |
+| [llama.cpp]({{%relref "features/text-generation#llama.cpp" %}})        | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes | yes                  | CUDA 11/12, ROCm, Intel SYCL, Vulkan, Metal, CPU |
 | [vLLM](https://github.com/vllm-project/vllm)        | Various GPTs and quantization formats | yes                      | GPT             | no | no                  | CUDA 12, ROCm, Intel |
 | [transformers](https://github.com/huggingface/transformers) | Various GPTs and quantization formats  | yes                      | GPT, embeddings, Audio generation            | yes | yes*                  | CUDA 11/12, ROCm, Intel, CPU |
 | [exllama2](https://github.com/turboderp-org/exllamav2)  | GPTQ                   | yes                       | GPT only                  | no                               | no                   | CUDA 12 |
 | [MLX](https://github.com/ml-explore/mlx-lm)        | Various LLMs               | yes                       | GPT                        | no                                | no                   | Metal (Apple Silicon) |
 | [MLX-VLM](https://github.com/Blaizzy/mlx-vlm)        | Vision-Language Models               | yes                       | Multimodal GPT                        | no                                | no                   | Metal (Apple Silicon) |
 | [langchain-huggingface](https://github.com/tmc/langchaingo)                                                                    | Any text generators available on HuggingFace through API | yes                      | GPT                        | no                                | no                   | N/A |
-{{< /table >}}
 
 ## Audio & Speech Processing
 
-{{< table "table-responsive" >}}
 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
 | [whisper.cpp](https://github.com/ggml-org/whisper.cpp)         | whisper               | no                       | Audio transcription                 | no                                | no                   | CUDA 12, ROCm, Intel SYCL, Vulkan, CPU |
@@ -45,28 +42,23 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 | [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD    | no                       | Voice Activity Detection    | no                               | no                   | CPU |
 | [neutts](https://github.com/neuphonic/neuttsair) | NeuTTSAir    | no                       | Text-to-speech with voice cloning    | no                               | no                   | CUDA 12, ROCm, CPU |
 | [mlx-audio](https://github.com/Blaizzy/mlx-audio) | MLX | no                       | Text-tospeech    | no                               | no                   | Metal (Apple Silicon) |
-{{< /table >}}
 
 ## Image & Video Generation
 
-{{< table "table-responsive" >}}
 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
 | [stablediffusion.cpp](https://github.com/leejet/stable-diffusion.cpp)         | stablediffusion-1, stablediffusion-2, stablediffusion-3, flux, PhotoMaker               | no                       | Image                 | no                                | no                   | CUDA 12, Intel SYCL, Vulkan, CPU |
 | [diffusers](https://github.com/huggingface/diffusers)  | SD, various diffusion models,...                   | no                       | Image/Video generation    | no                               | no                   | CUDA 11/12, ROCm, Intel, Metal, CPU |
 | [transformers-musicgen](https://github.com/huggingface/transformers)  | MusicGen                    | no                       | Audio generation                | no                               | no                   | CUDA, CPU |
-{{< /table >}}
 
 ## Specialized AI Tasks
 
-{{< table "table-responsive" >}}
 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
 | [rfdetr](https://github.com/roboflow/rf-detr) | RF-DETR    | no                       | Object Detection    | no                               | no                   | CUDA 12, Intel, CPU |
 | [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API    | no                       | Reranking   | no                               | no                   | CUDA 11/12, ROCm, Intel, CPU |
 | [local-store](https://github.com/mudler/LocalAI) | Vector database    | no                       | Vector storage   | yes                               | no                   | CPU |
 | [huggingface](https://huggingface.co/docs/hub/en/api) | HuggingFace API models    | yes                       | Various AI tasks   | yes                               | yes                   | API-based |
-{{< /table >}}
 
 ## Acceleration Support Summary
 
@@ -87,6 +79,6 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 - **Quantization**: 4-bit, 5-bit, 8-bit integer quantization support
 - **Mixed Precision**: F16/F32 mixed precision support
 
-Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})).
+Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "advanced" %}})).
 
 - \* Only for CUDA and OpenVINO CPU/XPU acceleration.
diff --git a/docs/content/docs/reference/nvidia-l4t.md b/docs/content/reference/nvidia-l4t.md
similarity index 100%
rename from docs/content/docs/reference/nvidia-l4t.md
rename to docs/content/reference/nvidia-l4t.md
diff --git a/docs/content/docs/whats-new.md b/docs/content/whats-new.md
similarity index 88%
rename from docs/content/docs/whats-new.md
rename to docs/content/whats-new.md
index 320d0dca198e..04b7dc91f8f7 100644
--- a/docs/content/docs/whats-new.md
+++ b/docs/content/whats-new.md
@@ -10,7 +10,6 @@ Release notes have been now moved completely over Github releases.
 
 You can see the release notes [here](https://github.com/mudler/LocalAI/releases).
 
-# Older release notes
 
 ## 04-12-2023: __v2.0.0__
 
@@ -74,7 +73,7 @@ From this release the `llama` backend supports only `gguf` files (see {{< pr "94
 
 ### Image generation enhancements
 
-The [Diffusers]({{%relref "docs/features/image-generation" %}}) backend got now various enhancements, including support to generate images from images, longer prompts, and support for more kernels schedulers. See the [Diffusers]({{%relref "docs/features/image-generation" %}}) documentation for more information.
+The [Diffusers]({{%relref "features/image-generation" %}}) backend got now various enhancements, including support to generate images from images, longer prompts, and support for more kernels schedulers. See the [Diffusers]({{%relref "features/image-generation" %}}) documentation for more information.
 
 ### Lora adapters
 
@@ -137,7 +136,7 @@ The full changelog is available [here](https://github.com/go-skynet/LocalAI/rele
 
 ## 🔥🔥🔥🔥 12-08-2023: __v1.24.0__ 🔥🔥🔥🔥
 
-This is release brings four(!) new additional backends to LocalAI: [🐶 Bark]({{%relref "docs/features/text-to-audio#bark" %}}), 🦙 [AutoGPTQ]({{%relref "docs/features/text-generation#autogptq" %}}), [🧨 Diffusers]({{%relref "docs/features/image-generation" %}}), 🦙 [exllama]({{%relref "docs/features/text-generation#exllama" %}}) and a lot of improvements!
+This is release brings four(!) new additional backends to LocalAI: [🐶 Bark]({{%relref "features/text-to-audio#bark" %}}), 🦙 [AutoGPTQ]({{%relref "features/text-generation#autogptq" %}}), [🧨 Diffusers]({{%relref "features/image-generation" %}}), 🦙 [exllama]({{%relref "features/text-generation#exllama" %}}) and a lot of improvements!
 
 ### Major improvements:
 
@@ -149,23 +148,23 @@ This is release brings four(!) new additional backends to LocalAI: [🐶 Bark]({
 
 ### 🐶 Bark
 
-[Bark]({{%relref "docs/features/text-to-audio#bark" %}}) is a text-prompted generative audio model - it combines GPT techniques to generate Audio from text. It is a great addition to LocalAI, and it's available in the container images by default.
+[Bark]({{%relref "features/text-to-audio#bark" %}}) is a text-prompted generative audio model - it combines GPT techniques to generate Audio from text. It is a great addition to LocalAI, and it's available in the container images by default.
 
 It can also generate music, see the example: [lion.webm](https://user-images.githubusercontent.com/5068315/230684766-97f5ea23-ad99-473c-924b-66b6fab24289.webm)
 
 ### 🦙 AutoGPTQ
 
-[AutoGPTQ]({{%relref "docs/features/text-generation#autogptq" %}}) is an easy-to-use LLMs quantization package with user-friendly apis, based on GPTQ algorithm.
+[AutoGPTQ]({{%relref "features/text-generation#autogptq" %}}) is an easy-to-use LLMs quantization package with user-friendly apis, based on GPTQ algorithm.
 
-It is targeted mainly for GPU usage only. Check out the [ documentation]({{%relref "docs/features/text-generation" %}}) for usage.
+It is targeted mainly for GPU usage only. Check out the [ documentation]({{%relref "features/text-generation" %}}) for usage.
 
 ### 🦙 Exllama
 
-[Exllama]({{%relref "docs/features/text-generation#exllama" %}}) is a "A more memory-efficient rewrite of the HF transformers implementation of Llama for use with quantized weights". It is a faster alternative to run LLaMA models on GPU.Check out the [Exllama documentation]({{%relref "docs/features/text-generation#exllama" %}}) for usage.
+[Exllama]({{%relref "features/text-generation#exllama" %}}) is a "A more memory-efficient rewrite of the HF transformers implementation of Llama for use with quantized weights". It is a faster alternative to run LLaMA models on GPU.Check out the [Exllama documentation]({{%relref "features/text-generation#exllama" %}}) for usage.
 
 ### 🧨 Diffusers
 
-[Diffusers]({{%relref "docs/features/image-generation#diffusers" %}}) is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. Currently it is experimental, and supports generation only of images so you might encounter some issues on models which weren't tested yet. Check out the [Diffusers documentation]({{%relref "docs/features/image-generation" %}}) for usage.
+[Diffusers]({{%relref "features/image-generation#diffusers" %}}) is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. Currently it is experimental, and supports generation only of images so you might encounter some issues on models which weren't tested yet. Check out the [Diffusers documentation]({{%relref "features/image-generation" %}}) for usage.
 
 ### 🔑 API Keys
 
@@ -201,11 +200,11 @@ Most notably, this release brings important fixes for CUDA (and not only):
 * fix: select function calls if 'name' is set in the request by {{< github "mudler" >}} in {{< pr "827" >}}
 * fix: symlink libphonemize in the container by {{< github "mudler" >}} in {{< pr "831" >}}
   
-{{% alert note %}}
+{{% notice note %}}
 
-From this release [OpenAI functions]({{%relref "docs/features/openai-functions" %}}) are available in the `llama` backend. The `llama-grammar` has been deprecated. See also [OpenAI functions]({{%relref "docs/features/openai-functions" %}}).
+From this release [OpenAI functions]({{%relref "features/openai-functions" %}}) are available in the `llama` backend. The `llama-grammar` has been deprecated. See also [OpenAI functions]({{%relref "features/openai-functions" %}}).
 
-{{% /alert %}}
+ {{% /notice %}}
 
 The full [changelog is available here](https://github.com/go-skynet/LocalAI/releases/tag/v1.23.0)
 
@@ -219,15 +218,15 @@ The full [changelog is available here](https://github.com/go-skynet/LocalAI/rele
 * feat: backends improvements by {{< github "mudler" >}} in {{< pr "778" >}}
 * feat(llama2): add template for chat messages by {{< github "dave-gray101" >}}  in {{< pr "782" >}}
 
-{{% alert note %}}
+{{% notice note %}}
 
-From this release to use the OpenAI functions you need to use the `llama-grammar` backend. It has been added a `llama` backend for tracking `llama.cpp` master and `llama-grammar` for the grammar functionalities that have not been merged yet upstream. See also [OpenAI functions]({{%relref "docs/features/openai-functions" %}}). Until the feature is merged we will have two llama backends.
+From this release to use the OpenAI functions you need to use the `llama-grammar` backend. It has been added a `llama` backend for tracking `llama.cpp` master and `llama-grammar` for the grammar functionalities that have not been merged yet upstream. See also [OpenAI functions]({{%relref "features/openai-functions" %}}). Until the feature is merged we will have two llama backends.
 
-{{% /alert %}}
+ {{% /notice %}}
 
 ## Huggingface embeddings
 
-In this release is now possible to specify to LocalAI external `gRPC` backends that can be used for inferencing {{< pr "778" >}}. It is now possible to write internal backends in any language, and a `huggingface-embeddings` backend is now available in the container image to be used with https://github.com/UKPLab/sentence-transformers. See also [Embeddings]({{%relref "docs/features/embeddings" %}}).
+In this release is now possible to specify to LocalAI external `gRPC` backends that can be used for inferencing {{< pr "778" >}}. It is now possible to write internal backends in any language, and a `huggingface-embeddings` backend is now available in the container image to be used with https://github.com/UKPLab/sentence-transformers. See also [Embeddings]({{%relref "features/embeddings" %}}).
 
 ## LLaMa 2 has been released!
 
@@ -272,7 +271,7 @@ The former, ggml-based backend has been renamed to `falcon-ggml`.
 
 ### Default pre-compiled binaries
 
-From this release the default behavior of images has changed. Compilation is not triggered on start automatically, to recompile `local-ai` from scratch on start and switch back to the old behavior, you can set `REBUILD=true` in the environment variables. Rebuilding can be necessary if your CPU and/or architecture is old and the pre-compiled binaries are not compatible with your platform. See the [build section]({{%relref "docs/getting-started/build" %}}) for more information.
+From this release the default behavior of images has changed. Compilation is not triggered on start automatically, to recompile `local-ai` from scratch on start and switch back to the old behavior, you can set `REBUILD=true` in the environment variables. Rebuilding can be necessary if your CPU and/or architecture is old and the pre-compiled binaries are not compatible with your platform. See the [build section]({{%relref "getting-started/build" %}}) for more information.
 
 [Full release changelog](https://github.com/go-skynet/LocalAI/releases/tag/v1.21.0)
 
@@ -282,8 +281,8 @@ From this release the default behavior of images has changed. Compilation is not
 
 ### Exciting New Features 🎉
 
-* Add Text-to-Audio generation with `go-piper` by {{< github "mudler" >}} in {{< pr "649" >}} See [API endpoints]({{%relref "docs/features/text-to-audio" %}}) in our documentation.
-* Add gallery repository by {{< github "mudler" >}} in {{< pr "663" >}}. See [models]({{%relref "docs/features/model-gallery" %}}) for documentation.
+* Add Text-to-Audio generation with `go-piper` by {{< github "mudler" >}} in {{< pr "649" >}} See [API endpoints]({{%relref "features/text-to-audio" %}}) in our documentation.
+* Add gallery repository by {{< github "mudler" >}} in {{< pr "663" >}}. See [models]({{%relref "features/model-gallery" %}}) for documentation.
 
 ### Container images
 - Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:v1.20.0`
@@ -295,7 +294,7 @@ From this release the default behavior of images has changed. Compilation is not
 
 Updates to `llama.cpp`, `go-transformers`, `gpt4all.cpp` and `rwkv.cpp`.
 
-The NUMA option was enabled by {{< github "mudler" >}} in {{< pr "684" >}}, along with many new parameters (`mmap`,`mmlock`, ..). See [advanced]({{%relref "docs/advanced" %}}) for the full list of parameters.
+The NUMA option was enabled by {{< github "mudler" >}} in {{< pr "684" >}}, along with many new parameters (`mmap`,`mmlock`, ..). See [advanced]({{%relref "advanced" %}}) for the full list of parameters.
 
 ### Gallery repositories
 
@@ -319,13 +318,13 @@ or a `tts` voice with:
 curl http://localhost:8080/models/apply -H "Content-Type: application/json" -d '{ "id": "model-gallery@voice-en-us-kathleen-low" }'
 ```
 
-See also [models]({{%relref "docs/features/model-gallery" %}}) for a complete documentation.
+See also [models]({{%relref "features/model-gallery" %}}) for a complete documentation.
 
 ### Text to Audio
 
 Now `LocalAI` uses [piper](https://github.com/rhasspy/piper) and [go-piper](https://github.com/mudler/go-piper) to generate audio from text. This is an experimental feature, and it requires `GO_TAGS=tts` to be set during build. It is enabled by default in the pre-built container images.
 
-To setup audio models, you can use the new galleries, or setup the models manually as described in [the API section of the documentation]({{%relref "docs/features/text-to-audio" %}}).
+To setup audio models, you can use the new galleries, or setup the models manually as described in [the API section of the documentation]({{%relref "features/text-to-audio" %}}).
 
 You can check the full changelog in [Github](https://github.com/go-skynet/LocalAI/releases/tag/v1.20.0)
 
@@ -353,7 +352,7 @@ We now support a vast variety of models, while being backward compatible with pr
 ### New features
 
 - ✨ Added support for `falcon`-based model families (7b)  ( [mudler](https://github.com/mudler) )
-- ✨ Experimental support for Metal Apple Silicon GPU - ( [mudler](https://github.com/mudler) and thanks to [Soleblaze](https://github.com/Soleblaze) for testing! ). See the [build section]({{%relref "docs/getting-started/build#Acceleration" %}}).
+- ✨ Experimental support for Metal Apple Silicon GPU - ( [mudler](https://github.com/mudler) and thanks to [Soleblaze](https://github.com/Soleblaze) for testing! ). See the [build section]({{%relref "getting-started/build#Acceleration" %}}).
 - ✨ Support for token stream in the `/v1/completions` endpoint ( [samm81](https://github.com/samm81) )
 - ✨ Added huggingface backend ( [Evilfreelancer](https://github.com/EvilFreelancer) )
 - 📷 Stablediffusion now can output `2048x2048` images size with `esrgan`! ( [mudler](https://github.com/mudler) )
@@ -394,7 +393,7 @@ Two new projects offer now direct integration with LocalAI!
 
 Support for OpenCL has been added while building from sources.
 
-You can now build LocalAI from source with `BUILD_TYPE=clblas` to have an OpenCL build. See also the [build section]({{%relref "docs/getting-started/build#Acceleration" %}}).
+You can now build LocalAI from source with `BUILD_TYPE=clblas` to have an OpenCL build. See also the [build section]({{%relref "getting-started/build#Acceleration" %}}).
 
 For instructions on how to install OpenCL/CLBlast see [here](https://github.com/ggerganov/llama.cpp#blas-build).
 
@@ -415,16 +414,13 @@ PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name":
 `llama.cpp` models now can also automatically save the prompt cache state as well by specifying in the model YAML configuration file:
 
 ```yaml
-# Enable prompt caching
 
-# This is a file that will be used to save/load the cache. relative to the models directory.
 prompt_cache_path: "alpaca-cache"
 
-# Always enable prompt cache
 prompt_cache_all: true
 ```
 
-See also the [advanced section]({{%relref "docs/advanced" %}}).
+See also the [advanced section]({{%relref "advanced" %}}).
 
 ## Media, Blogs, Social
 
@@ -437,7 +433,7 @@ See also the [advanced section]({{%relref "docs/advanced" %}}).
 
 - 23-05-2023: __v1.15.0__ released. `go-gpt2.cpp` backend got renamed to `go-ggml-transformers.cpp` updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. This impacts RedPajama, GptNeoX, MPT(not `gpt4all-mpt`), Dolly, GPT2 and Starcoder based models. [Binary releases available](https://github.com/go-skynet/LocalAI/releases), various fixes, including {{< pr "341" >}} .
 - 21-05-2023: __v1.14.0__ released. Minor updates to the `/models/apply` endpoint, `llama.cpp` backend updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. `gpt4all` is still compatible with the old format. 
-- 19-05-2023: __v1.13.0__ released! 🔥🔥 updates to the `gpt4all` and `llama` backend, consolidated CUDA support ( {{< pr "310" >}} thanks to @bubthegreat and @Thireus ), preliminar support for [installing models via API]({{%relref "docs/advanced#" %}}).
+- 19-05-2023: __v1.13.0__ released! 🔥🔥 updates to the `gpt4all` and `llama` backend, consolidated CUDA support ( {{< pr "310" >}} thanks to @bubthegreat and @Thireus ), preliminar support for [installing models via API]({{%relref "advanced#" %}}).
 - 17-05-2023:  __v1.12.0__ released! 🔥🔥 Minor fixes, plus CUDA ({{< pr "258" >}}) support for `llama.cpp`-compatible models and image generation ({{< pr "272" >}}).
 - 16-05-2023: 🔥🔥🔥 Experimental support for CUDA ({{< pr "258" >}}) in the `llama.cpp` backend and Stable diffusion CPU image generation ({{< pr "272" >}}) in `master`.
 
diff --git a/docs/go.mod b/docs/go.mod
index 35b89dd11e43..c66a63a300b2 100644
--- a/docs/go.mod
+++ b/docs/go.mod
@@ -1,3 +1,5 @@
-module github.com/McShelby/hugo-theme-relearn.git
+module github.com/mudler/LocalAI/docs
 
 go 1.19
+
+require github.com/McShelby/hugo-theme-relearn v0.0.0-20251117214752-f69a085322cc // indirect
diff --git a/docs/go.sum b/docs/go.sum
index e69de29bb2d1..582cbb5d1e39 100644
--- a/docs/go.sum
+++ b/docs/go.sum
@@ -0,0 +1,2 @@
+github.com/McShelby/hugo-theme-relearn v0.0.0-20251117214752-f69a085322cc h1:8BvuabGtqXqhT4H01SS7s0zXea0B2R5ZOFEcPugMbNg=
+github.com/McShelby/hugo-theme-relearn v0.0.0-20251117214752-f69a085322cc/go.mod h1:mKQQdxZNIlLvAj8X3tMq+RzntIJSr9z7XdzuMomt0IM=
diff --git a/docs/hugo.toml b/docs/hugo.toml
new file mode 100644
index 000000000000..21ba751838c3
--- /dev/null
+++ b/docs/hugo.toml
@@ -0,0 +1,104 @@
+baseURL = 'https://localai.io/'
+languageCode = 'en-GB'
+defaultContentLanguage = 'en'
+
+title = 'LocalAI'
+
+# Theme configuration
+theme = 'hugo-theme-relearn'
+
+# Enable Git info
+enableGitInfo = true
+enableEmoji = true
+
+[outputs]
+  home = ['html', 'rss', 'print', 'search']
+  section = ['html', 'rss', 'print']
+  page = ['html', 'print']
+
+[markup]
+  defaultMarkdownHandler = 'goldmark'
+  [markup.tableOfContents]
+    endLevel = 3
+    startLevel = 1
+  [markup.goldmark]
+    [markup.goldmark.renderer]
+      unsafe = true
+  [markup.goldmark.parser.attribute]
+    block = true
+    title = true
+
+[params]
+  # Relearn theme parameters
+  editURL = 'https://github.com/mudler/LocalAI/edit/master/docs/content/'
+  description = 'LocalAI documentation'
+  author = 'Ettore Di Giacinto'
+  showVisitedLinks = true
+  disableBreadcrumb = false
+  disableNextPrev = false
+  disableLandingPageButton = false
+  titleSeparator = '::'
+  disableSeoHiddenPages = true
+  
+  # Additional theme options
+  disableSearch = false
+  disableGenerator = false
+  disableLanguageSwitchingButton = true
+  
+  # Theme variant - dark/blue style
+  themeVariant = [ 'neon', 'auto' ]
+  
+  # ordersectionsby = 'weight'
+
+[languages]
+  [languages.en]
+    title = 'LocalAI'
+    languageName = 'English'
+    weight = 10
+    contentDir = 'content'
+    [languages.en.params]
+      landingPageName = '<i class="fa-fw fas fa-home"></i> Home'
+
+# Menu shortcuts
+[[languages.en.menu.shortcuts]]
+  name = '<i class="fab fa-fw fa-github"></i> GitHub'
+  identifier = 'github'
+  url = 'https://github.com/mudler/LocalAI'
+  weight = 10
+
+[[languages.en.menu.shortcuts]]
+  name = '<i class="fab fa-fw fa-discord"></i> Discord'
+  identifier = 'discord'
+  url = 'https://discord.gg/uJAeKSAGDy'
+  weight = 20
+
+[[languages.en.menu.shortcuts]]
+  name = '<i class="fab fa-fw fa-x-twitter"></i> X/Twitter'
+  identifier = 'twitter'
+  url = 'https://twitter.com/LocalAI_API'
+  weight = 20
+
+
+# Module configuration for theme
+[module]
+  [[module.mounts]]
+    source = 'content'
+    target = 'content'
+  [[module.mounts]]
+    source = 'static'
+    target = 'static'
+  [[module.mounts]]
+    source = 'layouts'
+    target = 'layouts'
+  [[module.mounts]]
+    source = 'data'
+    target = 'data'
+  [[module.mounts]]
+    source = 'assets'
+    target = 'assets'
+  [[module.mounts]]
+    source = '../images'
+    target = 'static/images'
+  [[module.mounts]]
+    source = 'i18n'
+    target = 'i18n'
diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
deleted file mode 100644
index 72f933727e11..000000000000
--- a/docs/themes/hugo-theme-relearn
+++ /dev/null
@@ -1 +0,0 @@
-9a020e7eadb7d8203f5b01b18756c72d94773ec9
\ No newline at end of file
diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
new file mode 160000
index 000000000000..f69a085322cc
--- /dev/null
+++ b/docs/themes/hugo-theme-relearn
@@ -0,0 +1 @@
+Subproject commit f69a085322cc70b3e14577c0823c5bc781ec6bb2
diff --git a/docs/themes/lotusdocs b/docs/themes/lotusdocs
deleted file mode 160000
index 975da91e839c..000000000000
--- a/docs/themes/lotusdocs
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 975da91e839cfdb5c20fb66961468e77b8a9f8fd

From cf1549776311bae97fd90219c5b90896835184c6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 19 Nov 2025 21:31:58 +0100
Subject: [PATCH 2/5] Small enhancements

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 docs/content/getting-started/_index.en.md     |  13 +
 docs/content/getting-started/build.md         | 177 +------------
 .../getting-started/customize-model.md        |   1 +
 docs/content/getting-started/quickstart.md    | 126 +--------
 docs/content/installation/_index.en.md        |  33 ++-
 docs/content/installation/docker.md           | 241 ++++++++++++++++++
 docs/content/overview.md                      |  30 +--
 7 files changed, 298 insertions(+), 323 deletions(-)
 create mode 100644 docs/content/installation/docker.md

diff --git a/docs/content/getting-started/_index.en.md b/docs/content/getting-started/_index.en.md
index fe96a3ff2475..ba3c437cf1aa 100644
--- a/docs/content/getting-started/_index.en.md
+++ b/docs/content/getting-started/_index.en.md
@@ -6,3 +6,16 @@ weight = 3
 icon = "rocket_launch"
 type = "chapter"
 +++
+
+Welcome to LocalAI! This section will help you get up and running with LocalAI.
+
+## First Steps
+
+Before you can start using LocalAI, you'll need to install it. **Docker is the recommended installation method** for most users.
+
+- **[Install LocalAI](/installation/)** - Choose your installation method (Docker recommended)
+- **[Quickstart Guide](quickstart/)** - Get started quickly after installation
+- **[Install and Run Models](models/)** - Learn how to work with AI models
+- **[Try It Out](try-it-out/)** - Explore examples and use cases
+
+For detailed installation instructions, see the [Installation guide](/installation/).
diff --git a/docs/content/getting-started/build.md b/docs/content/getting-started/build.md
index b2ce0b21282f..1e885d272590 100644
--- a/docs/content/getting-started/build.md
+++ b/docs/content/getting-started/build.md
@@ -7,179 +7,6 @@ url = '/basics/build/'
 ico = "rocket_launch"
 +++
 
-### Build
+Building LocalAI from source is an installation method that allows you to compile LocalAI yourself, which is useful for custom configurations, development, or when you need specific build options.
 
-LocalAI can be built as a container image or as a single, portable binary. Note that some model architectures might require Python libraries, which are not included in the binary.
-
-LocalAI's extensible architecture allows you to add your own backends, which can be written in any language, and as such the container images contains also the Python dependencies to run all the available backends (for example, in order to run backends like __Diffusers__ that allows to generate images and videos from text).
-
-This section contains instructions on how to build LocalAI from source.
-
-#### Build LocalAI locally
-
-##### Requirements
-
-In order to build LocalAI locally, you need the following requirements:
-
-- Golang >= 1.21
-- GCC
-- GRPC
-
-To install the dependencies follow the instructions below:
-
-{{< tabs tabTotal="3"  >}}
-{{% tab tabName="Apple" %}}
-
-Install `xcode` from the App Store
-
-```bash
-brew install go protobuf protoc-gen-go protoc-gen-go-grpc wget
-```
-
-{{% /tab %}}
-{{% tab tabName="Debian" %}}
-
-```bash
-apt install golang make protobuf-compiler-grpc
-```
-
-After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands
-
-```bash
-go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
-go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
-
-```
-
-{{% /tab %}}
-{{% tab tabName="From source" %}}
-
-```bash
-make build
-```
-
-{{% /tab %}}
-{{< /tabs >}}
-
-##### Build
-To build LocalAI with `make`:
-
-```
-git clone https://github.com/go-skynet/LocalAI
-cd LocalAI
-make build
-```
-
-This should produce the binary `local-ai`
-
-#### Container image
-
-Requirements:
-
-- Docker or podman, or a container engine
-
-In order to build the `LocalAI` container image locally you can use `docker`, for example:
-
-```
-docker build -t localai .
-docker run localai
-```
-
-### Example: Build on mac
-
-Building on Mac (M1, M2 or M3) works, but you may need to install some prerequisites using `brew`. 
-
-The below has been tested by one mac user and found to work. Note that this doesn't use Docker to run the server:
-
-Install `xcode` from the Apps Store (needed for metalkit)
-
-```
-brew install abseil cmake go grpc protobuf wget protoc-gen-go protoc-gen-go-grpc
-
-git clone https://github.com/go-skynet/LocalAI.git
-
-cd LocalAI
-
-make build
-
-wget https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf -O models/phi-2.Q2_K
-
-cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/phi-2.Q2_K.tmpl
-
-./local-ai backends install llama-cpp
-
-./local-ai --models-path=./models/ --debug=true
-
-curl http://localhost:8080/v1/models
-
-curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-     "model": "phi-2.Q2_K",
-     "messages": [{"role": "user", "content": "How are you?"}],
-     "temperature": 0.9 
-   }'
-```
-
-#### Troubleshooting mac
-
-- If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store.
-
-- After the installation of Xcode, if you receive a xcrun error `'xcrun: error: unable to find utility "metal", not a developer tool or in PATH'`. You might have installed the Xcode command line tools before installing Xcode, the former one is pointing to an incomplete SDK.
-
-```
-xcode-select --print-path
-
-sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
-```
-
-- If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256).
-
-- If you get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again.
-
-```
-brew reinstall go grpc protobuf wget
-
-make clean
-
-make build
-```
-
-## Build backends
-
-LocalAI have several backends available for installation in the backend gallery. The backends can be also built by source. As backends might vary from language and dependencies that they require, the documentation will provide generic guidance for few of the backends, which can be applied with some slight modifications also to the others.
-
-### Manually
-
-Typically each backend include a Makefile which allow to package the backend.
-
-In the LocalAI repository, for instance you can build `bark-cpp` by doing:
-
-```
-git clone https://github.com/go-skynet/LocalAI.git
-
-make -C LocalAI/backend/go/bark-cpp build package
-
-make -C LocalAI/backend/python/vllm
-```
-
-### With Docker
-
-Building with docker is simpler as abstracts away all the requirement, and focuses on building the final OCI images that are available in the gallery. This allows for instance also to build locally a backend and install it with LocalAI. You can refer to [Backends](https://localai.io/backends/) for general guidance on how to install and develop backends.
-
-In the LocalAI repository, you can build `bark-cpp` by doing:
-
-```
-git clone https://github.com/go-skynet/LocalAI.git
-
-make docker-build-bark-cpp
-```
-
-Note that `make` is only by convenience, in reality it just runs a simple `docker` command as:
-
-```bash
-docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark-cpp -f LocalAI/backend/Dockerfile.golang --build-arg BACKEND=bark-cpp .               
-```
-
-Note:
-
-- BUILD_TYPE can be either: `cublas`, `hipblas`, `sycl_f16`, `sycl_f32`, `metal`.
-- BASE_IMAGE is tested on `ubuntu:22.04` (and defaults to it) and `quay.io/go-skynet/intel-oneapi-base:latest` for intel/sycl
+For complete build instructions, see the [Build from Source](/installation/build/) documentation in the Installation section.
diff --git a/docs/content/getting-started/customize-model.md b/docs/content/getting-started/customize-model.md
index c5829469bc72..cd5af2a0b898 100644
--- a/docs/content/getting-started/customize-model.md
+++ b/docs/content/getting-started/customize-model.md
@@ -2,6 +2,7 @@
 disableToc = false
 title = "Customizing the Model"
 weight = 5
+url = "/docs/getting-started/customize-model"
 icon = "rocket_launch"
 
 +++
diff --git a/docs/content/getting-started/quickstart.md b/docs/content/getting-started/quickstart.md
index 49e86257fb0c..c4f2872af741 100644
--- a/docs/content/getting-started/quickstart.md
+++ b/docs/content/getting-started/quickstart.md
@@ -18,86 +18,19 @@ If you are exposing LocalAI remotely, make sure you protect the API endpoints ad
 
 ## Quickstart
 
-### Using the Bash Installer
+Before you begin, you'll need to install LocalAI. **Docker is the recommended installation method** for most users.
 
-```bash
-curl https://localai.io/install.sh | sh
-```
-
-The bash installer, if docker is not detected, will install automatically as a systemd service.
-
-See [Installer]({{% relref "advanced/installer" %}}) for all the supported options
-
-### macOS Download
-
-For MacOS a DMG is available:
-
-<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
-  <img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
-</a>
-
-> Note: the DMGs are not signed by Apple and shows quarantined after install. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
-
-### Run with docker
-
-{{% notice tip %}}
-**Docker Run vs Docker Start**
-
-- `docker run` creates and starts a new container. If a container with the same name already exists, this command will fail.
-- `docker start` starts an existing container that was previously created with `docker run`.
-
-If you've already run LocalAI before and want to start it again, use: `docker start -i local-ai`
- {{% /notice %}}
-
-The following commands will automatically start with a web interface and a Rest API on port `8080`.
-
-#### CPU only image:
-
-```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
-```
-
-#### NVIDIA GPU Images:
-
-```bash
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
-
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
-
-docker run -ti --name local-ai -p 8080:8080 --runtime nvidia --gpus all localai/localai:latest-nvidia-l4t-arm64
-```
-
-#### AMD GPU Images (ROCm):
-
-```bash
-docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
-```
-
-#### Intel GPU Images (oneAPI):
-
-```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel
-```
-
-#### Vulkan GPU Images:
-
-```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
-```
-
-#### AIO Images (pre-downloaded models):
-
-```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
-
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
+### Installation
 
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
+Choose the installation method that best suits your needs:
 
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
+- **[Docker Installation](/installation/docker/)** (Recommended) - Works on all platforms, easiest setup
+- **[macOS Installation](/installation/macos/)** - Download the DMG application
+- **[Linux Installation](/installation/linux/)** - Use the one-liner script or binaries
+- **[Kubernetes Installation](/installation/kubernetes/)** - Deploy on Kubernetes clusters
+- **[Build from Source](/installation/build/)** - Build LocalAI from source code
 
-docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
-```
+For detailed installation instructions, see the [Installation guide](/installation/).
 
 ### Downloading models on start
 
@@ -117,47 +50,6 @@ local-ai run oci://localai/phi-2:latest
 
 For a full list of options, you can run LocalAI with `--help` or refer to the [Installer Options]({{% relref "advanced/installer" %}}) documentation.
 
-Binaries can also be [manually downloaded]({{% relref "reference/binaries" %}}).
-
-## Using Homebrew on MacOS
-
-{{% notice tip %}}
-The Homebrew formula currently doesn't have the same options than the bash script
- {{% /notice %}}
-
-You can install Homebrew's [LocalAI](https://formulae.brew.sh/formula/localai) with the following command:
-
-```
-brew install localai
-```
-
-
-## Using Container Images or Kubernetes
-
-LocalAI is available as a container image compatible with various container engines such as Docker, Podman, and Kubernetes. Container images are published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) and [Docker Hub](https://hub.docker.com/r/localai/localai).
-
-For detailed instructions, see [Using container images]({{% relref "getting-started/container-images" %}}). For Kubernetes deployment, see [Run with Kubernetes]({{% relref "getting-started/kubernetes" %}}).
-
-## Running LocalAI with All-in-One (AIO) Images
-
-> _Already have a model file? Skip to [Run models manually]({{% relref "getting-started/models" %}})_.
-
-LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the features of LocalAI. If pre-configured models are not required, you can use the standard [images]({{% relref "getting-started/container-images" %}}).
-
-These images are available for both CPU and GPU environments. AIO images are designed for ease of use and require no additional configuration.
-
-It is recommended to use AIO images if you prefer not to configure the models manually or via the web interface. For running specific models, refer to the [manual method]({{% relref "getting-started/models" %}}).
-
-The AIO images come pre-configured with the following features:
-- Text to Speech (TTS)
-- Speech to Text
-- Function calling
-- Large Language Models (LLM) for text generation
-- Image generation
-- Embedding server
-
-For instructions on using AIO images, see [Using container images]({{% relref "getting-started/container-images#all-in-one-images" %}}).
-
 ## Using LocalAI and the full stack with LocalAGI
 
 LocalAI is part of the Local family stack, along with LocalAGI and LocalRecall.
diff --git a/docs/content/installation/_index.en.md b/docs/content/installation/_index.en.md
index c95df6287fc2..a2fd7e475baa 100644
--- a/docs/content/installation/_index.en.md
+++ b/docs/content/installation/_index.en.md
@@ -6,19 +6,36 @@ type: chapter
 icon: download
 ---
 
+LocalAI can be installed in multiple ways depending on your platform and preferences.
 
-LocalAI can be installed in multiple ways depending on your platform and preferences. Choose the installation method that best suits your needs:
+{{% notice tip %}}
+**Recommended: Docker Installation**
 
-- **[macOS](macos/)**: Download and install the DMG application for macOS
-- **[Docker](docker/)**: Run LocalAI using Docker containers (recommended for most users)
-- **[Linux](linux/)**: Install on Linux using the one-liner script or binaries
-- **[Kubernetes](kubernetes/)**: Deploy LocalAI on Kubernetes clusters
-- **[Build from Source](build/)**: Build LocalAI from source code
+**Docker is the recommended installation method** for most users as it works across all platforms (Linux, macOS, Windows) and provides the easiest setup experience. It's the fastest way to get started with LocalAI.
+{{% /notice %}}
+
+## Installation Methods
+
+Choose the installation method that best suits your needs:
+
+1. **[Docker](docker/)** ⭐ **Recommended** - Works on all platforms, easiest setup
+2. **[macOS](macos/)** - Download and install the DMG application
+3. **[Linux](linux/)** - Install on Linux using the one-liner script or binaries
+4. **[Kubernetes](kubernetes/)** - Deploy LocalAI on Kubernetes clusters
+5. **[Build from Source](build/)** - Build LocalAI from source code
 
 ## Quick Start
 
-The fastest way to get started depends on your platform:
+**Recommended: Docker (works on all platforms)**
 
+```bash
+docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
+```
+
+This will start LocalAI with pre-configured models. The API will be available at `http://localhost:8080`.
+
+For other platforms:
 - **macOS**: Download the [DMG](macos/)
 - **Linux**: Use the `curl https://localai.io/install.sh | sh` [one-liner](linux/)
-- **Docker**: Run `docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu`
+
+For detailed instructions, see the [Docker installation guide](docker/).
diff --git a/docs/content/installation/docker.md b/docs/content/installation/docker.md
new file mode 100644
index 000000000000..ce32317e2e66
--- /dev/null
+++ b/docs/content/installation/docker.md
@@ -0,0 +1,241 @@
+---
+title: "Docker Installation"
+description: "Install LocalAI using Docker containers - the recommended installation method"
+weight: 1
+url: '/installation/docker/'
+---
+
+{{% notice tip %}}
+**Recommended Installation Method**
+
+Docker is the recommended way to install LocalAI as it works across all platforms (Linux, macOS, Windows) and provides the easiest setup experience.
+{{% /notice %}}
+
+LocalAI provides Docker images that work with Docker, Podman, and other container engines. These images are available on [Docker Hub](https://hub.docker.com/r/localai/localai) and [Quay.io](https://quay.io/repository/go-skynet/local-ai).
+
+## Prerequisites
+
+Before you begin, ensure you have Docker or Podman installed:
+
+- [Install Docker Desktop](https://docs.docker.com/get-docker/) (Mac, Windows, Linux)
+- [Install Podman](https://podman.io/getting-started/installation) (Linux alternative)
+- [Install Docker Engine](https://docs.docker.com/engine/install/) (Linux servers)
+
+## Quick Start
+
+The fastest way to get started is with the CPU image:
+
+```bash
+docker run -p 8080:8080 --name local-ai -ti localai/localai:latest
+```
+
+This will:
+- Start LocalAI with pre-configured models
+- Make the API available at `http://localhost:8080`
+
+{{% notice tip %}}
+**Docker Run vs Docker Start**
+
+- `docker run` creates and starts a new container. If a container with the same name already exists, this command will fail.
+- `docker start` starts an existing container that was previously created with `docker run`.
+
+If you've already run LocalAI before and want to start it again, use: `docker start -i local-ai`
+{{% /notice %}}
+
+## Image Types
+
+LocalAI provides several image types to suit different needs:
+
+### All-in-One (AIO) Images
+
+**Recommended for beginners** - These images come pre-configured with models and backends, ready to use immediately.
+
+#### CPU Image
+
+```bash
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
+```
+
+#### GPU Images
+
+**NVIDIA CUDA 12:**
+```bash
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
+```
+
+**NVIDIA CUDA 11:**
+```bash
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
+```
+
+**AMD GPU (ROCm):**
+```bash
+docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
+```
+
+**Intel GPU:**
+```bash
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
+```
+
+### Standard Images
+
+Standard images don't include pre-configured models. Use these if you want to configure models manually.
+
+#### CPU Image
+
+```bash
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
+```
+
+#### GPU Images
+
+**NVIDIA CUDA 12:**
+```bash
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
+```
+
+**NVIDIA CUDA 11:**
+```bash
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
+```
+
+**AMD GPU (ROCm):**
+```bash
+docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
+```
+
+**Intel GPU:**
+```bash
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel
+```
+
+**Vulkan:**
+```bash
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
+```
+
+**NVIDIA Jetson (L4T ARM64):**
+```bash
+docker run -ti --name local-ai -p 8080:8080 --runtime nvidia --gpus all localai/localai:latest-nvidia-l4t-arm64
+```
+
+## Using Docker Compose
+
+For a more manageable setup, especially with persistent volumes, use Docker Compose:
+
+```yaml
+version: "3.9"
+services:
+  api:
+    image: localai/localai:latest-aio-cpu
+    # For GPU support, use one of:
+    # image: localai/localai:latest-aio-gpu-nvidia-cuda-12
+    # image: localai/localai:latest-aio-gpu-nvidia-cuda-11
+    # image: localai/localai:latest-aio-gpu-hipblas
+    # image: localai/localai:latest-aio-gpu-intel
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
+      interval: 1m
+      timeout: 20m
+      retries: 5
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+    volumes:
+      - ./models:/models:cached
+    # For NVIDIA GPUs, uncomment:
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: 1
+    #           capabilities: [gpu]
+```
+
+Save this as `docker-compose.yml` and run:
+
+```bash
+docker compose up -d
+```
+
+## Persistent Storage
+
+To persist models and configurations, mount a volume:
+
+```bash
+docker run -ti --name local-ai -p 8080:8080 \
+  -v $PWD/models:/models \
+  localai/localai:latest-aio-cpu
+```
+
+Or use a named volume:
+
+```bash
+docker volume create localai-models
+docker run -ti --name local-ai -p 8080:8080 \
+  -v localai-models:/models \
+  localai/localai:latest-aio-cpu
+```
+
+## What's Included in AIO Images
+
+All-in-One images come pre-configured with:
+
+- **Text Generation**: LLM models for chat and completion
+- **Image Generation**: Stable Diffusion models
+- **Text to Speech**: TTS models
+- **Speech to Text**: Whisper models
+- **Embeddings**: Vector embedding models
+- **Function Calling**: Support for OpenAI-compatible function calling
+
+The AIO images use OpenAI-compatible model names (like `gpt-4`, `gpt-4-vision-preview`) but are backed by open-source models. See the [container images documentation](/getting-started/container-images/#all-in-one-images) for the complete mapping.
+
+## Next Steps
+
+After installation:
+
+1. Access the WebUI at `http://localhost:8080`
+2. Check available models: `curl http://localhost:8080/v1/models`
+3. [Install additional models](/getting-started/models/)
+4. [Try out examples](/getting-started/try-it-out/)
+
+## Advanced Configuration
+
+For detailed information about:
+- All available image tags and versions
+- Advanced Docker configuration options
+- Custom image builds
+- Backend management
+
+See the [Container Images documentation](/getting-started/container-images/).
+
+## Troubleshooting
+
+### Container won't start
+
+- Check Docker is running: `docker ps`
+- Check port 8080 is available: `netstat -an | grep 8080` (Linux/Mac)
+- View logs: `docker logs local-ai`
+
+### GPU not detected
+
+- Ensure Docker has GPU access: `docker run --rm --gpus all nvidia/cuda:12.0.0-base-ubuntu22.04 nvidia-smi`
+- For NVIDIA: Install [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
+- For AMD: Ensure devices are accessible: `ls -la /dev/kfd /dev/dri`
+
+### Models not downloading
+
+- Check internet connection
+- Verify disk space: `df -h`
+- Check Docker logs for errors: `docker logs local-ai`
+
+## See Also
+
+- [Container Images Reference](/getting-started/container-images/) - Complete image reference
+- [Install Models](/getting-started/models/) - Install and configure models
+- [GPU Acceleration](/features/gpu-acceleration/) - GPU setup and optimization
+- [Kubernetes Installation](/installation/kubernetes/) - Deploy on Kubernetes
+
diff --git a/docs/content/overview.md b/docs/content/overview.md
index 79b109597e08..850991b98d91 100644
--- a/docs/content/overview.md
+++ b/docs/content/overview.md
@@ -5,6 +5,7 @@ toc = true
 description = "What is LocalAI?"
 tags = ["Beginners"]
 categories = [""]
+url = "/docs/overview"
 author = "Ettore Di Giacinto"
 icon = "info"
 +++
@@ -50,34 +51,17 @@ LocalAI is more than just a single tool - it's a complete ecosystem:
 
 ## Getting Started
 
+LocalAI can be installed in several ways. **Docker is the recommended installation method** for most users as it provides the easiest setup and works across all platforms.
 
-### macOS Download
+### Recommended: Docker Installation
 
-You can use the DMG application for Mac:
-
-<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
-  <img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
-</a>
-
-> Note: the DMGs are not signed by Apple shows as quarantined. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
-
-## Docker
-
-You can use Docker for a quick start:
+The quickest way to get started with LocalAI is using Docker:
 
 ```bash
-docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
+docker run -p 8080:8080 --name local-ai -ti localai/localai:latest
 ```
 
-For more detailed installation options and configurations, see our [Getting Started guide](/basics/getting_started/).
-
-## One-liner
-
-The fastest way to get started is with our one-line installer (Linux):
-
-```bash
-curl https://localai.io/install.sh | sh
-```
+For complete installation instructions including Docker, macOS, Linux, Kubernetes, and building from source, see the [Installation guide](/installation/).
 
 ## Key Features
 
@@ -103,7 +87,7 @@ LocalAI is a community-driven project. You can:
 
 Ready to dive in? Here are some recommended next steps:
 
-1. [Install LocalAI](/basics/getting_started/)
+1. **[Install LocalAI](/installation/)** - Start with [Docker installation](/installation/docker/) (recommended) or choose another method
 2. [Explore available models](https://models.localai.io)
 3. [Model compatibility](/model-compatibility/)
 4. [Try out examples](https://github.com/mudler/LocalAI-examples)

From f0df5d3d758d3afe64bb46c530311c21ad30b16f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 19 Nov 2025 21:50:12 +0100
Subject: [PATCH 3/5] Enhancements

---
 docs/content/_index.md                        |  62 ++++++-
 docs/content/advanced/installer.md            |  52 ------
 docs/content/features/GPU-acceleration.md     |   2 +-
 docs/content/features/image-generation.md     |   2 +-
 docs/content/getting-started/_index.en.md     |  19 ++-
 .../getting-started/container-images.md       | 157 +++++++++---------
 docs/content/getting-started/models.md        |  18 +-
 docs/content/getting-started/quickstart.md    |  22 ++-
 docs/content/installation/_index.en.md        |   4 +-
 docs/content/installation/build.md            |   8 +-
 docs/content/installation/docker.md           |  50 +++---
 docs/content/installation/linux.md            |  40 +++++
 docs/content/whats-new.md                     |   4 +-
 docs/data/landing.yaml                        |   2 +-
 docs/go.mod                                   |   5 +-
 docs/go.sum                                   |   4 +
 16 files changed, 251 insertions(+), 200 deletions(-)
 delete mode 100644 docs/content/advanced/installer.md

diff --git a/docs/content/_index.md b/docs/content/_index.md
index 2260afe8f043..e4f8e7eb2e9b 100644
--- a/docs/content/_index.md
+++ b/docs/content/_index.md
@@ -1,5 +1,61 @@
 +++
-linktitle = 'Purple Pulpo'
-title = 'The Purple Pulpo'
-type = 'home'
+title = "LocalAI"
+description = "The free, OpenAI, Anthropic alternative. Your All-in-One Complete AI Stack"
+type = "home"
 +++
+
+**The free, OpenAI, Anthropic alternative. Your All-in-One Complete AI Stack** - Run powerful language models, autonomous agents, and document intelligence **locally** on your hardware. 
+
+**No cloud, no limits, no compromise.**
+
+{{% notice tip %}}
+**⭐ 33.3k+ stars on GitHub!**
+
+**Drop-in replacement for OpenAI API** - modular suite of tools that work seamlessly together or independently. 
+
+Start with **[LocalAI](https://localai.io)**'s OpenAI-compatible API, extend with **[LocalAGI](https://github.com/mudler/LocalAGI)**'s autonomous agents, and enhance with **[LocalRecall](https://github.com/mudler/LocalRecall)**'s semantic search - all running locally on your hardware.
+
+**Open Source** MIT Licensed.
+{{% /notice %}}
+
+## Why Choose LocalAI?
+
+**OpenAI API Compatible** - Run AI models locally with our modular ecosystem. From language models to autonomous agents and semantic search, build your complete AI stack without the cloud.
+
+### Key Features
+
+- **LLM Inferencing**: LocalAI is a free, **Open Source** OpenAI alternative. Run **LLMs**, generate **images**, **audio** and more **locally** with consumer grade hardware.
+- **Agentic-first**: Extend LocalAI with LocalAGI, an autonomous AI agent platform that runs locally, no coding required. Build and deploy autonomous agents with ease.
+- **Memory and Knowledge base**: Extend LocalAI with LocalRecall, A local rest api for semantic search and memory management. Perfect for AI applications.
+- **OpenAI Compatible**: Drop-in replacement for OpenAI API. Compatible with existing applications and libraries.
+- **No GPU Required**: Run on consumer grade hardware. No need for expensive GPUs or cloud services.
+- **Multiple Models**: Support for various model families including LLMs, image generation, and audio models. Supports multiple backends for inferencing.
+- **Privacy Focused**: Keep your data local. No data leaves your machine, ensuring complete privacy.
+- **Easy Setup**: Simple installation and configuration. Get started in minutes with Binaries installation, Docker, Podman, Kubernetes or local installation.
+- **Community Driven**: Active community support and regular updates. Contribute and help shape the future of LocalAI.
+
+## Quick Start
+
+**Docker is the recommended installation method** for most users:
+
+```bash
+docker run -p 8080:8080 --name local-ai -ti localai/localai:latest
+```
+
+For complete installation instructions, see the [Installation guide](/installation/).
+
+## Get Started
+
+1. **[Install LocalAI](/installation/)** - Choose your installation method (Docker recommended)
+2. **[Quickstart Guide](/getting-started/quickstart/)** - Get started quickly after installation
+3. **[Install and Run Models](/getting-started/models/)** - Learn how to work with AI models
+4. **[Try It Out](/getting-started/try-it-out/)** - Explore examples and use cases
+
+## Learn More
+
+- [Explore available models](https://models.localai.io)
+- [Model compatibility](/model-compatibility/)
+- [Try out examples](https://github.com/mudler/LocalAI-examples)
+- [Join the community](https://discord.gg/uJAeKSAGDy)
+- [Check the LocalAI Github repository](https://github.com/mudler/LocalAI)
+- [Check the LocalAGI Github repository](https://github.com/mudler/LocalAGI)
diff --git a/docs/content/advanced/installer.md b/docs/content/advanced/installer.md
deleted file mode 100644
index 9ac6f2d039f3..000000000000
--- a/docs/content/advanced/installer.md
+++ /dev/null
@@ -1,52 +0,0 @@
-
-+++
-disableToc = false
-title = "Installer options"
-weight = 24
-+++
-
-An installation script is available for quick and hassle-free installations, streamlining the setup process for new users.
-
-Can be used with the following command:
-```bash
-curl https://localai.io/install.sh | sh
-```
-
-Installation can be configured with Environment variables, for example: 
-
-```bash
-curl https://localai.io/install.sh | VAR=value sh
-```
-
-List of the Environment Variables:
-| Environment Variable | Description                                                  |
-|----------------------|--------------------------------------------------------------|
-| **DOCKER_INSTALL**       | Set to "true" to enable the installation of Docker images.    |
-| **USE_AIO**              | Set to "true" to use the all-in-one LocalAI Docker image.    |
-| **USE_VULKAN**           | Set to "true" to use Vulkan GPU support.                     |
-| **API_KEY**              | Specify an API key for accessing LocalAI, if required.       |
-| **PORT**                 | Specifies the port on which LocalAI will run (default is 8080). |
-| **THREADS**              | Number of processor threads the application should use. Defaults to the number of logical cores minus one. |
-| **VERSION**              | Specifies the version of LocalAI to install. Defaults to the latest available version. |
-| **MODELS_PATH**          | Directory path where LocalAI models are stored (default is /usr/share/local-ai/models). |
-| **P2P_TOKEN** | Token to use for the federation or for starting workers see [documentation]({{%relref "features/distributed_inferencing" %}}) |
-| **WORKER** | Set to "true" to make the instance a worker (p2p token is required see [documentation]({{%relref "features/distributed_inferencing" %}})) |
-| **FEDERATED** | Set to "true" to share the instance with the federation (p2p token is required see [documentation]({{%relref "features/distributed_inferencing" %}}))  |
-| **FEDERATED_SERVER** | Set to "true" to run the instance as a federation server which forwards requests to the federation (p2p token is required see [documentation]({{%relref "features/distributed_inferencing" %}}))  |
-
-## Image Selection
-
-The installer will automatically detect your GPU and select the appropriate image. By default, it uses the standard images without extra Python dependencies. You can customize the image selection using the following environment variables:
-
-- `USE_AIO=true`: Use all-in-one images that include all dependencies
-- `USE_VULKAN=true`: Use Vulkan GPU support instead of vendor-specific GPU support
-
-## Uninstallation
-
-To uninstall, run:
-
-```
-curl https://localai.io/install.sh | sh -s -- --uninstall
-```
-
-We are looking into improving the installer, and as this is a first iteration any feedback is welcome! Open up an [issue](https://github.com/mudler/LocalAI/issues/new/choose) if something doesn't work for you!
\ No newline at end of file
diff --git a/docs/content/features/GPU-acceleration.md b/docs/content/features/GPU-acceleration.md
index d8bfd7d1897c..4d3ba5f3a07f 100644
--- a/docs/content/features/GPU-acceleration.md
+++ b/docs/content/features/GPU-acceleration.md
@@ -12,7 +12,7 @@ Section under construction
 This section contains instruction on how to use LocalAI with GPU acceleration.
 
 {{% notice icon="⚡" context="warning" %}}
-For acceleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "getting-started/build#Acceleration" %}})
+For acceleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "installation/build#Acceleration" %}})
  {{% /notice %}}
 
 ## Automatic Backend Detection
diff --git a/docs/content/features/image-generation.md b/docs/content/features/image-generation.md
index c0340b61b1bc..a6d50cf9169c 100644
--- a/docs/content/features/image-generation.md
+++ b/docs/content/features/image-generation.md
@@ -99,7 +99,7 @@ diffusers:
 
 #### Dependencies
 
-This is an extra backend - in the container is already available and there is nothing to do for the setup. Do not use *core* images (ending with `-core`). If you are building manually, see the [build instructions]({{%relref "getting-started/build" %}}).
+This is an extra backend - in the container is already available and there is nothing to do for the setup. Do not use *core* images (ending with `-core`). If you are building manually, see the [build instructions]({{%relref "installation/build" %}}).
 
 #### Model setup
 
diff --git a/docs/content/getting-started/_index.en.md b/docs/content/getting-started/_index.en.md
index ba3c437cf1aa..3ee30ffa584b 100644
--- a/docs/content/getting-started/_index.en.md
+++ b/docs/content/getting-started/_index.en.md
@@ -7,15 +7,18 @@ icon = "rocket_launch"
 type = "chapter"
 +++
 
-Welcome to LocalAI! This section will help you get up and running with LocalAI.
+Welcome to LocalAI! This section covers everything you need to know **after installation** to start using LocalAI effectively.
 
-## First Steps
+{{% notice tip %}}
+**Haven't installed LocalAI yet?**
 
-Before you can start using LocalAI, you'll need to install it. **Docker is the recommended installation method** for most users.
+See the [Installation guide](/installation/) to install LocalAI first. **Docker is the recommended installation method** for most users.
+{{% /notice %}}
 
-- **[Install LocalAI](/installation/)** - Choose your installation method (Docker recommended)
-- **[Quickstart Guide](quickstart/)** - Get started quickly after installation
-- **[Install and Run Models](models/)** - Learn how to work with AI models
-- **[Try It Out](try-it-out/)** - Explore examples and use cases
+## What's in This Section
 
-For detailed installation instructions, see the [Installation guide](/installation/).
+- **[Quickstart Guide](quickstart/)** - Get started quickly with your first API calls and model downloads
+- **[Install and Run Models](models/)** - Learn how to install, configure, and run AI models
+- **[Customize Models](customize-model/)** - Customize model configurations and prompt templates
+- **[Container Images Reference](container-images/)** - Complete reference for available Docker images
+- **[Try It Out](try-it-out/)** - Explore examples and use cases
diff --git a/docs/content/getting-started/container-images.md b/docs/content/getting-started/container-images.md
index 134cdea8fe2f..a4e94d8b8f05 100644
--- a/docs/content/getting-started/container-images.md
+++ b/docs/content/getting-started/container-images.md
@@ -10,7 +10,7 @@ LocalAI provides a variety of images to support different environments. These im
 
 All-in-One images comes with a pre-configured set of models and backends, standard images instead do not have any model pre-configured and installed.
 
-For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "getting-started/build" %}}).
+For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "installation/build" %}}).
 
 {{% notice tip %}}
 
@@ -35,6 +35,83 @@ Before you begin, ensure you have a container engine installed if you are not us
 
  {{% /notice %}}
 
+## Standard container images
+
+Standard container images do not have pre-installed models. Use these if you want to configure models manually.
+
+{{< tabs >}}
+{{% tab title="Vanilla / CPU Images" %}}
+
+| Description | Quay | Docker Hub                                   |
+| --- | --- |-----------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master` | `localai/localai:master`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest` | `localai/localai:latest`                  |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}` | `localai/localai:{{< version >}}`             |
+
+{{% /tab %}}
+
+{{% tab title="GPU Images CUDA 11" %}}
+
+| Description | Quay | Docker Hub                                                  |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda-11` | `localai/localai:master-gpu-nvidia-cuda-11`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11`                      |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda-11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda-11`             |
+
+{{% /tab %}}
+
+{{% tab title="GPU Images CUDA 12" %}}
+
+| Description | Quay | Docker Hub                                                  |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda-12` | `localai/localai:master-gpu-nvidia-cuda-12`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-12` | `localai/localai:latest-gpu-nvidia-cuda-12`                 |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda-12` | `localai/localai:{{< version >}}-gpu-nvidia-cuda-12`             |
+
+{{% /tab %}}
+
+{{% tab title="Intel GPU" %}}
+
+| Description | Quay | Docker Hub                                                  |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel` | `localai/localai:master-gpu-intel`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel` | `localai/localai:latest-gpu-intel`                      |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel` | `localai/localai:{{< version >}}-gpu-intel`             |
+
+{{% /tab %}}
+
+{{% tab title="AMD GPU" %}}
+
+| Description | Quay | Docker Hub                                                  |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-hipblas` | `localai/localai:master-gpu-hipblas`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-hipblas` | `localai/localai:latest-gpu-hipblas`                      |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-hipblas` | `localai/localai:{{< version >}}-gpu-hipblas`             |
+
+{{% /tab %}}
+
+{{% tab title="Vulkan Images" %}}
+| Description | Quay | Docker Hub                                                  |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-vulkan` | `localai/localai:master-vulkan`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-vulkan` | `localai/localai:latest-gpu-vulkan`                 |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-vulkan` | `localai/localai:{{< version >}}-vulkan`             |
+{{% /tab %}}
+
+{{% tab title="Nvidia Linux for tegra" %}}
+
+These images are compatible with Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. For more information, see the [Nvidia L4T guide]({{%relref "reference/nvidia-l4t" %}}).
+
+| Description | Quay | Docker Hub                                                  |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64` | `localai/localai:master-nvidia-l4t-arm64`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-nvidia-l4t-arm64` | `localai/localai:latest-nvidia-l4t-arm64`                 |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-nvidia-l4t-arm64` | `localai/localai:{{< version >}}-nvidia-l4t-arm64`             |
+
+{{% /tab %}}
+
+{{< /tabs >}}
+
 ## All-in-one images
 
 All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and require no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size.
@@ -138,84 +215,6 @@ The AIO Images are inheriting the same environment variables as the base images
 | `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
 | `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "getting-started/models" %}})) |
 
-
-## Standard container images
-
-Standard container images do not have pre-installed models. 
-
-{{< tabs tabTotal="8" >}}
-{{% tab tabName="Vanilla / CPU Images" %}}
-
-| Description | Quay | Docker Hub                                   |
-| --- | --- |-----------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master` | `localai/localai:master`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest` | `localai/localai:latest`                  |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}` | `localai/localai:{{< version >}}`             |
-
-{{% /tab %}}
-
-{{% tab tabName="GPU Images CUDA 11" %}}
-
-| Description | Quay | Docker Hub                                                  |
-| --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda-11` | `localai/localai:master-gpu-nvidia-cuda-11`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda-11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda-11`             |
-
-{{% /tab %}}
-
-{{% tab tabName="GPU Images CUDA 12" %}}
-
-| Description | Quay | Docker Hub                                                  |
-| --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda-12` | `localai/localai:master-gpu-nvidia-cuda-12`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-12` | `localai/localai:latest-gpu-nvidia-cuda-12`                 |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda-12` | `localai/localai:{{< version >}}-gpu-nvidia-cuda-12`             |
-
-{{% /tab %}}
-
-{{% tab tabName="Intel GPU" %}}
-
-| Description | Quay | Docker Hub                                                  |
-| --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel` | `localai/localai:master-gpu-intel`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel` | `localai/localai:latest-gpu-intel`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel` | `localai/localai:{{< version >}}-gpu-intel`             |
-
-{{% /tab %}}
-
-{{% tab tabName="AMD GPU" %}}
-
-| Description | Quay | Docker Hub                                                  |
-| --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-hipblas` | `localai/localai:master-gpu-hipblas`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-hipblas` | `localai/localai:latest-gpu-hipblas`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-hipblas` | `localai/localai:{{< version >}}-gpu-hipblas`             |
-
-{{% /tab %}}
-
-{{% tab tabName="Vulkan Images" %}}
-| Description | Quay | Docker Hub                                                  |
-| --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-vulkan` | `localai/localai:master-vulkan`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-vulkan` | `localai/localai:latest-gpu-vulkan`                 |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-vulkan` | `localai/localai:{{< version >}}-vulkan`             |
-{{% /tab %}}
-
-{{% tab tabName="Nvidia Linux for tegra" %}}
-
-These images are compatible with Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. For more information, see the [Nvidia L4T guide]({{%relref "reference/nvidia-l4t" %}}).
-
-| Description | Quay | Docker Hub                                                  |
-| --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64` | `localai/localai:master-nvidia-l4t-arm64`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-nvidia-l4t-arm64` | `localai/localai:latest-nvidia-l4t-arm64`                 |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-nvidia-l4t-arm64` | `localai/localai:{{< version >}}-nvidia-l4t-arm64`             |
-
-{{% /tab %}}
-
-{{< /tabs >}}
-
 ## See Also
 
 - [GPU acceleration]({{%relref "features/gpu-acceleration" %}})
diff --git a/docs/content/getting-started/models.md b/docs/content/getting-started/models.md
index c3aef9607f17..e96f5b41ca08 100644
--- a/docs/content/getting-started/models.md
+++ b/docs/content/getting-started/models.md
@@ -64,8 +64,8 @@ Follow these steps to manually run models using LocalAI:
 3. **Run LocalAI**:
    Choose one of the following methods to run LocalAI:
 
-{{< tabs tabTotal="5" >}}
-{{% tab tabName="Docker" %}}
+{{< tabs >}}
+{{% tab title="Docker" %}}
 
 ```bash
 mkdir models
@@ -109,12 +109,12 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 ```
 
 {{% notice note %}}
-- If running on Apple Silicon (ARM), it is **not** recommended to run on Docker due to emulation. Follow the [build instructions]({{% relref "getting-started/build" %}}) to use Metal acceleration for full GPU support.
+- If running on Apple Silicon (ARM), it is **not** recommended to run on Docker due to emulation. Follow the [build instructions]({{% relref "installation/build" %}}) to use Metal acceleration for full GPU support.
 - If you are running on Apple x86_64, you can use Docker without additional gain from building it from source.
  {{% /notice %}}
 
 {{% /tab %}}
-{{% tab tabName="Docker Compose" %}}
+{{% tab title="Docker Compose" %}}
 
 ```bash
 git clone https://github.com/go-skynet/LocalAI
@@ -145,12 +145,12 @@ For other Docker images, please refer to the table in [Getting Started](https://
 Note: If you are on Windows, ensure the project is on the Linux filesystem to avoid slow model loading. For more information, see the [Microsoft Docs](https://learn.microsoft.com/en-us/windows/wsl/filesystems).
 
 {{% /tab %}}
-{{% tab tabName="Kubernetes" %}}
+{{% tab title="Kubernetes" %}}
 
-For Kubernetes deployment, see the [Kubernetes section]({{% relref "getting-started/kubernetes" %}}).
+For Kubernetes deployment, see the [Kubernetes installation guide]({{% relref "installation/kubernetes" %}}).
 
 {{% /tab %}}
-{{% tab tabName="From Binary" %}}
+{{% tab title="From Binary" %}}
 
 LocalAI binary releases are available on [GitHub](https://github.com/go-skynet/LocalAI/releases).
 
@@ -167,9 +167,9 @@ Press "Allow Anyway."
  {{% /notice %}}
 
 {{% /tab %}}
-{{% tab tabName="From Source" %}}
+{{% tab title="From Source" %}}
 
-For instructions on building LocalAI from source, see the [Build Section]({{% relref "getting-started/build" %}}).
+For instructions on building LocalAI from source, see the [Build from Source guide]({{% relref "installation/build" %}}).
 
 {{% /tab %}}
 {{< /tabs >}}
diff --git a/docs/content/getting-started/quickstart.md b/docs/content/getting-started/quickstart.md
index c4f2872af741..e3999ae0ae25 100644
--- a/docs/content/getting-started/quickstart.md
+++ b/docs/content/getting-started/quickstart.md
@@ -18,19 +18,17 @@ If you are exposing LocalAI remotely, make sure you protect the API endpoints ad
 
 ## Quickstart
 
-Before you begin, you'll need to install LocalAI. **Docker is the recommended installation method** for most users.
+This guide assumes you have already [installed LocalAI](/installation/). If you haven't installed it yet, see the [Installation guide](/installation/) first.
 
-### Installation
+### Starting LocalAI
 
-Choose the installation method that best suits your needs:
+Once installed, start LocalAI. For Docker installations:
 
-- **[Docker Installation](/installation/docker/)** (Recommended) - Works on all platforms, easiest setup
-- **[macOS Installation](/installation/macos/)** - Download the DMG application
-- **[Linux Installation](/installation/linux/)** - Use the one-liner script or binaries
-- **[Kubernetes Installation](/installation/kubernetes/)** - Deploy on Kubernetes clusters
-- **[Build from Source](/installation/build/)** - Build LocalAI from source code
+```bash
+docker run -p 8080:8080 --name local-ai -ti localai/localai:latest
+```
 
-For detailed installation instructions, see the [Installation guide](/installation/).
+The API will be available at `http://localhost:8080`.
 
 ### Downloading models on start
 
@@ -48,7 +46,7 @@ local-ai run oci://localai/phi-2:latest
 **Automatic Backend Detection**: When you install models from the gallery or YAML files, LocalAI automatically detects your system's GPU capabilities (NVIDIA, AMD, Intel) and downloads the appropriate backend. For advanced configuration options, see [GPU Acceleration]({{% relref "features/gpu-acceleration#automatic-backend-detection" %}}).
  {{% /notice %}}
 
-For a full list of options, you can run LocalAI with `--help` or refer to the [Installer Options]({{% relref "advanced/installer" %}}) documentation.
+For a full list of options, you can run LocalAI with `--help` or refer to the [Linux Installation guide]({{% relref "installation/linux" %}}) for installer configuration options.
 
 ## Using LocalAI and the full stack with LocalAGI
 
@@ -101,9 +99,9 @@ There is much more to explore with LocalAI! You can run any model from Hugging F
 
 Explore additional resources and community contributions:
 
-- [Installer Options]({{% relref "advanced/installer" %}})
+- [Linux Installation Options]({{% relref "installation/linux" %}})
 - [Run from Container images]({{% relref "getting-started/container-images" %}})
 - [Examples to try from the CLI]({{% relref "getting-started/try-it-out" %}})
-- [Build LocalAI and the container image]({{% relref "getting-started/build" %}})
+- [Build LocalAI from source]({{% relref "installation/build" %}})
 - [Run models manually]({{% relref "getting-started/models" %}})
 - [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples)
diff --git a/docs/content/installation/_index.en.md b/docs/content/installation/_index.en.md
index a2fd7e475baa..4f04ebd95d27 100644
--- a/docs/content/installation/_index.en.md
+++ b/docs/content/installation/_index.en.md
@@ -29,10 +29,10 @@ Choose the installation method that best suits your needs:
 **Recommended: Docker (works on all platforms)**
 
 ```bash
-docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
+docker run -p 8080:8080 --name local-ai -ti localai/localai:latest
 ```
 
-This will start LocalAI with pre-configured models. The API will be available at `http://localhost:8080`.
+This will start LocalAI. The API will be available at `http://localhost:8080`. For images with pre-configured models, see [All-in-One images](/getting-started/container-images/#all-in-one-images).
 
 For other platforms:
 - **macOS**: Download the [DMG](macos/)
diff --git a/docs/content/installation/build.md b/docs/content/installation/build.md
index 7e50d30f4b72..ec39416e0c76 100644
--- a/docs/content/installation/build.md
+++ b/docs/content/installation/build.md
@@ -27,8 +27,8 @@ In order to build LocalAI locally, you need the following requirements:
 
 To install the dependencies follow the instructions below:
 
-{{< tabs tabTotal="3"  >}}
-{{% tab tabName="Apple" %}}
+{{< tabs >}}
+{{% tab title="Apple" %}}
 
 Install `xcode` from the App Store
 
@@ -37,7 +37,7 @@ brew install go protobuf protoc-gen-go protoc-gen-go-grpc wget
 ```
 
 {{% /tab %}}
-{{% tab tabName="Debian" %}}
+{{% tab title="Debian" %}}
 
 ```bash
 apt install golang make protobuf-compiler-grpc
@@ -52,7 +52,7 @@ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f1
 ```
 
 {{% /tab %}}
-{{% tab tabName="From source" %}}
+{{% tab title="From source" %}}
 
 ```bash
 make build
diff --git a/docs/content/installation/docker.md b/docs/content/installation/docker.md
index ce32317e2e66..0024427c53c8 100644
--- a/docs/content/installation/docker.md
+++ b/docs/content/installation/docker.md
@@ -30,7 +30,7 @@ docker run -p 8080:8080 --name local-ai -ti localai/localai:latest
 ```
 
 This will:
-- Start LocalAI with pre-configured models
+- Start LocalAI (you'll need to install models separately)
 - Make the API available at `http://localhost:8080`
 
 {{% notice tip %}}
@@ -46,78 +46,78 @@ If you've already run LocalAI before and want to start it again, use: `docker st
 
 LocalAI provides several image types to suit different needs:
 
-### All-in-One (AIO) Images
+### Standard Images
 
-**Recommended for beginners** - These images come pre-configured with models and backends, ready to use immediately.
+Standard images don't include pre-configured models. Use these if you want to configure models manually.
 
 #### CPU Image
 
 ```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
 ```
 
 #### GPU Images
 
 **NVIDIA CUDA 12:**
 ```bash
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
 ```
 
 **NVIDIA CUDA 11:**
 ```bash
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
 ```
 
 **AMD GPU (ROCm):**
 ```bash
-docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
+docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
 ```
 
 **Intel GPU:**
 ```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel
 ```
 
-### Standard Images
+**Vulkan:**
+```bash
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
+```
 
-Standard images don't include pre-configured models. Use these if you want to configure models manually.
+**NVIDIA Jetson (L4T ARM64):**
+```bash
+docker run -ti --name local-ai -p 8080:8080 --runtime nvidia --gpus all localai/localai:latest-nvidia-l4t-arm64
+```
+
+### All-in-One (AIO) Images
+
+**Recommended for beginners** - These images come pre-configured with models and backends, ready to use immediately.
 
 #### CPU Image
 
 ```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 ```
 
 #### GPU Images
 
 **NVIDIA CUDA 12:**
 ```bash
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
 ```
 
 **NVIDIA CUDA 11:**
 ```bash
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
 ```
 
 **AMD GPU (ROCm):**
 ```bash
-docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
+docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
 ```
 
 **Intel GPU:**
 ```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel
-```
-
-**Vulkan:**
-```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
-```
-
-**NVIDIA Jetson (L4T ARM64):**
-```bash
-docker run -ti --name local-ai -p 8080:8080 --runtime nvidia --gpus all localai/localai:latest-nvidia-l4t-arm64
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
 ```
 
 ## Using Docker Compose
diff --git a/docs/content/installation/linux.md b/docs/content/installation/linux.md
index 2992c55ceb50..cdacac5a981c 100644
--- a/docs/content/installation/linux.md
+++ b/docs/content/installation/linux.md
@@ -20,6 +20,46 @@ This script will:
 - Set up the necessary configuration
 - Start LocalAI automatically
 
+### Installer Configuration Options
+
+The installer can be configured using environment variables:
+
+```bash
+curl https://localai.io/install.sh | VAR=value sh
+```
+
+#### Environment Variables
+
+| Environment Variable | Description |
+|----------------------|-------------|
+| **DOCKER_INSTALL** | Set to `"true"` to enable the installation of Docker images |
+| **USE_AIO** | Set to `"true"` to use the all-in-one LocalAI Docker image |
+| **USE_VULKAN** | Set to `"true"` to use Vulkan GPU support |
+| **API_KEY** | Specify an API key for accessing LocalAI, if required |
+| **PORT** | Specifies the port on which LocalAI will run (default is 8080) |
+| **THREADS** | Number of processor threads the application should use. Defaults to the number of logical cores minus one |
+| **VERSION** | Specifies the version of LocalAI to install. Defaults to the latest available version |
+| **MODELS_PATH** | Directory path where LocalAI models are stored (default is `/usr/share/local-ai/models`) |
+| **P2P_TOKEN** | Token to use for the federation or for starting workers. See [distributed inferencing documentation]({{%relref "features/distributed_inferencing" %}}) |
+| **WORKER** | Set to `"true"` to make the instance a worker (p2p token is required) |
+| **FEDERATED** | Set to `"true"` to share the instance with the federation (p2p token is required) |
+| **FEDERATED_SERVER** | Set to `"true"` to run the instance as a federation server which forwards requests to the federation (p2p token is required) |
+
+#### Image Selection
+
+The installer will automatically detect your GPU and select the appropriate image. By default, it uses the standard images without extra Python dependencies. You can customize the image selection:
+
+- `USE_AIO=true`: Use all-in-one images that include all dependencies
+- `USE_VULKAN=true`: Use Vulkan GPU support instead of vendor-specific GPU support
+
+#### Uninstallation
+
+To uninstall LocalAI installed via the script:
+
+```bash
+curl https://localai.io/install.sh | sh -s -- --uninstall
+```
+
 ## Manual Installation
 
 ### Download Binary
diff --git a/docs/content/whats-new.md b/docs/content/whats-new.md
index 04b7dc91f8f7..f3b57c17898b 100644
--- a/docs/content/whats-new.md
+++ b/docs/content/whats-new.md
@@ -271,7 +271,7 @@ The former, ggml-based backend has been renamed to `falcon-ggml`.
 
 ### Default pre-compiled binaries
 
-From this release the default behavior of images has changed. Compilation is not triggered on start automatically, to recompile `local-ai` from scratch on start and switch back to the old behavior, you can set `REBUILD=true` in the environment variables. Rebuilding can be necessary if your CPU and/or architecture is old and the pre-compiled binaries are not compatible with your platform. See the [build section]({{%relref "getting-started/build" %}}) for more information.
+From this release the default behavior of images has changed. Compilation is not triggered on start automatically, to recompile `local-ai` from scratch on start and switch back to the old behavior, you can set `REBUILD=true` in the environment variables. Rebuilding can be necessary if your CPU and/or architecture is old and the pre-compiled binaries are not compatible with your platform. See the [build section]({{%relref "installation/build" %}}) for more information.
 
 [Full release changelog](https://github.com/go-skynet/LocalAI/releases/tag/v1.21.0)
 
@@ -352,7 +352,7 @@ We now support a vast variety of models, while being backward compatible with pr
 ### New features
 
 - ✨ Added support for `falcon`-based model families (7b)  ( [mudler](https://github.com/mudler) )
-- ✨ Experimental support for Metal Apple Silicon GPU - ( [mudler](https://github.com/mudler) and thanks to [Soleblaze](https://github.com/Soleblaze) for testing! ). See the [build section]({{%relref "getting-started/build#Acceleration" %}}).
+- ✨ Experimental support for Metal Apple Silicon GPU - ( [mudler](https://github.com/mudler) and thanks to [Soleblaze](https://github.com/Soleblaze) for testing! ). See the [build section]({{%relref "installation/build#Acceleration" %}}).
 - ✨ Support for token stream in the `/v1/completions` endpoint ( [samm81](https://github.com/samm81) )
 - ✨ Added huggingface backend ( [Evilfreelancer](https://github.com/EvilFreelancer) )
 - 📷 Stablediffusion now can output `2048x2048` images size with `esrgan`! ( [mudler](https://github.com/mudler) )
diff --git a/docs/data/landing.yaml b/docs/data/landing.yaml
index 95a55f465e0f..1376f16cc6aa 100644
--- a/docs/data/landing.yaml
+++ b/docs/data/landing.yaml
@@ -40,7 +40,7 @@ hero:
   ctaButton:
     icon: rocket_launch
     btnText: "Get Started"
-    url: "/basics/getting_started/"
+    url: "/installation/"
   cta2Button:
     icon: code
     btnText: "View on GitHub"
diff --git a/docs/go.mod b/docs/go.mod
index c66a63a300b2..b8e4544d7d36 100644
--- a/docs/go.mod
+++ b/docs/go.mod
@@ -2,4 +2,7 @@ module github.com/mudler/LocalAI/docs
 
 go 1.19
 
-require github.com/McShelby/hugo-theme-relearn v0.0.0-20251117214752-f69a085322cc // indirect
+require (
+	github.com/McShelby/hugo-theme-relearn v0.0.0-20251117214752-f69a085322cc // indirect
+	github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20400 // indirect
+)
diff --git a/docs/go.sum b/docs/go.sum
index 582cbb5d1e39..a891abc884ad 100644
--- a/docs/go.sum
+++ b/docs/go.sum
@@ -1,2 +1,6 @@
 github.com/McShelby/hugo-theme-relearn v0.0.0-20251117214752-f69a085322cc h1:8BvuabGtqXqhT4H01SS7s0zXea0B2R5ZOFEcPugMbNg=
 github.com/McShelby/hugo-theme-relearn v0.0.0-20251117214752-f69a085322cc/go.mod h1:mKQQdxZNIlLvAj8X3tMq+RzntIJSr9z7XdzuMomt0IM=
+github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20400 h1:L6+F22i76xmeWWwrtijAhUbf3BiRLmpO5j34bgl1ggU=
+github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20400/go.mod h1:uekq1D4ebeXgduLj8VIZy8TgfTjrLdSl6nPtVczso78=
+github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2 v2.21100.20000/go.mod h1:mFberT6ZtcchrsDtfvJM7aAH2bDKLdOnruUHl0hlapI=
+github.com/twbs/bootstrap v5.3.3+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0=

From 4ae3ab7c1dae6c8688c2759b14fd283f50d5bff6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 19 Nov 2025 21:53:43 +0100
Subject: [PATCH 4/5] Default to zen-dark

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 docs/hugo.toml                         | 2 +-
 docs/layouts/partials/menu-footer.html | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 docs/layouts/partials/menu-footer.html

diff --git a/docs/hugo.toml b/docs/hugo.toml
index 21ba751838c3..0f415f3f8ba5 100644
--- a/docs/hugo.toml
+++ b/docs/hugo.toml
@@ -46,7 +46,7 @@ enableEmoji = true
   disableLanguageSwitchingButton = true
   
   # Theme variant - dark/blue style
-  themeVariant = [ 'neon', 'auto' ]
+  themeVariant = [ 'zen-dark' , 'neon', 'auto' ]
   
   # ordersectionsby = 'weight'
 
diff --git a/docs/layouts/partials/menu-footer.html b/docs/layouts/partials/menu-footer.html
new file mode 100644
index 000000000000..d2a822617156
--- /dev/null
+++ b/docs/layouts/partials/menu-footer.html
@@ -0,0 +1,2 @@
+<p>© 2023-2025 <a href="https://mudler.pm">Ettore Di Giacinto</a></p>
+

From 8dd5820e46e9e96fe633c5ff4fb24c4258d17b11 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 19 Nov 2025 22:17:51 +0100
Subject: [PATCH 5/5] fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 docs/content/features/GPU-acceleration.md     |  2 +-
 docs/content/features/_index.en.md            | 30 +++++++++++++
 docs/content/features/audio-to-text.md        |  2 +-
 docs/content/features/backends.md             |  2 +-
 docs/content/features/constrained_grammars.md | 44 +++++++++++++++++--
 .../features/distributed_inferencing.md       |  4 +-
 docs/content/features/embeddings.md           |  2 +-
 docs/content/features/gpt-vision.md           |  3 +-
 docs/content/features/image-generation.md     |  2 +-
 docs/content/features/mcp.md                  |  2 +-
 docs/content/features/model-gallery.md        |  1 -
 docs/content/features/object-detection.md     |  2 +-
 docs/content/features/openai-functions.md     |  2 +-
 docs/content/features/reranker.md             |  2 +-
 docs/content/features/stores.md               |  1 -
 docs/content/features/text-generation.md      |  2 +-
 docs/content/features/text-to-audio.md        |  2 +-
 17 files changed, 84 insertions(+), 21 deletions(-)

diff --git a/docs/content/features/GPU-acceleration.md b/docs/content/features/GPU-acceleration.md
index 4d3ba5f3a07f..7c619962b450 100644
--- a/docs/content/features/GPU-acceleration.md
+++ b/docs/content/features/GPU-acceleration.md
@@ -123,7 +123,7 @@ llama_init_from_file: kv self size  =  512.00 MB
 
 There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation.
 
-Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatibility and package versions for dependencies across all variations of OS must be tested independently if desired, please refer to the [build]({{%relref "getting-started/build#Acceleration" %}}) documentation.
+Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatibility and package versions for dependencies across all variations of OS must be tested independently if desired, please refer to the [build]({{%relref "installation/build#Acceleration" %}}) documentation.
 
 ### Requirements
 
diff --git a/docs/content/features/_index.en.md b/docs/content/features/_index.en.md
index 3999d8df1953..98be6d04ab54 100644
--- a/docs/content/features/_index.en.md
+++ b/docs/content/features/_index.en.md
@@ -6,3 +6,33 @@ icon = "lightbulb"
 type = "chapter"
 url = "/features/"
 +++
+
+LocalAI provides a comprehensive set of features for running AI models locally. This section covers all the capabilities and functionalities available in LocalAI.
+
+## Core Features
+
+- **[Text Generation](text-generation/)** - Generate text with GPT-compatible models using various backends
+- **[Image Generation](image-generation/)** - Create images with Stable Diffusion and other diffusion models
+- **[Audio Processing](audio-to-text/)** - Transcribe audio to text and generate speech from text
+- **[Embeddings](embeddings/)** - Generate vector embeddings for semantic search and RAG applications
+- **[GPT Vision](gpt-vision/)** - Analyze and understand images with vision-language models
+
+## Advanced Features
+
+- **[OpenAI Functions](openai-functions/)** - Use function calling and tools API with local models
+- **[Constrained Grammars](constrained_grammars/)** - Control model output format with BNF grammars
+- **[GPU Acceleration](GPU-acceleration/)** - Optimize performance with GPU support
+- **[Distributed Inference](distributed_inferencing/)** - Scale inference across multiple nodes
+- **[Model Context Protocol (MCP)](mcp/)** - Enable agentic capabilities with MCP integration
+
+## Specialized Features
+
+- **[Object Detection](object-detection/)** - Detect and locate objects in images
+- **[Reranker](reranker/)** - Improve retrieval accuracy with cross-encoder models
+- **[Stores](stores/)** - Vector similarity search for embeddings
+- **[Model Gallery](model-gallery/)** - Browse and install pre-configured models
+- **[Backends](backends/)** - Learn about available backends and how to manage them
+
+## Getting Started
+
+To start using these features, make sure you have [LocalAI installed](/installation/) and have [downloaded some models](/getting-started/models/). Then explore the feature pages above to learn how to use each capability.
diff --git a/docs/content/features/audio-to-text.md b/docs/content/features/audio-to-text.md
index db8aa1e74091..2b91a8071e66 100644
--- a/docs/content/features/audio-to-text.md
+++ b/docs/content/features/audio-to-text.md
@@ -41,4 +41,4 @@ curl http://localhost:8080/v1/audio/transcriptions -H "Content-Type: multipart/f
 
 ## Result
 {"text":"My fellow Americans, this day has brought terrible news and great sadness to our country.At nine o'clock this morning, Mission Control in Houston lost contact with our Space ShuttleColumbia.A short time later, debris was seen falling from the skies above Texas.The Columbia's lost.There are no survivors.One board was a crew of seven.Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark, Captain DavidBrown, Commander William McCool, Dr. Kultna Shavla, and Elon Ramon, a colonel in the IsraeliAir Force.These men and women assumed great risk in the service to all humanity.In an age when spaceflight has come to seem almost routine, it is easy to overlook thedangers of travel by rocket and the difficulties of navigating the fierce outer atmosphere ofthe Earth.These astronauts knew the dangers, and they faced them willingly, knowing they had a highand noble purpose in life.Because of their courage and daring and idealism, we will miss them all the more.All Americans today are thinking as well of the families of these men and women who havebeen given this sudden shock and grief.You're not alone.Our entire nation agrees with you, and those you loved will always have the respect andgratitude of this country.The cause in which they died will continue.Mankind has led into the darkness beyond our world by the inspiration of discovery andthe longing to understand.Our journey into space will go on.In the skies today, we saw destruction and tragedy.As farther than we can see, there is comfort and hope.In the words of the prophet Isaiah, \"Lift your eyes and look to the heavens who createdall these, he who brings out the starry hosts one by one and calls them each by name.\"Because of his great power and mighty strength, not one of them is missing.The same creator who names the stars also knows the names of the seven souls we mourntoday.The crew of the shuttle Columbia did not return safely to Earth yet we can pray that all aresafely home.May God bless the grieving families and may God continue to bless America.[BLANK_AUDIO]"}
-```
+```
\ No newline at end of file
diff --git a/docs/content/features/backends.md b/docs/content/features/backends.md
index 9da6409935bc..ef71a87075b8 100644
--- a/docs/content/features/backends.md
+++ b/docs/content/features/backends.md
@@ -1,5 +1,5 @@
 ---
-title: "Backends"
+title: "⚙️ Backends"
 description: "Learn how to use, manage, and develop backends in LocalAI"
 weight: 4
 url: "/backends/"
diff --git a/docs/content/features/constrained_grammars.md b/docs/content/features/constrained_grammars.md
index d91a22fead54..33d50c900ba5 100644
--- a/docs/content/features/constrained_grammars.md
+++ b/docs/content/features/constrained_grammars.md
@@ -10,12 +10,12 @@ url = "/features/constrained_grammars/"
 The `chat` endpoint supports the `grammar` parameter, which allows users to specify a grammar in Backus-Naur Form (BNF). This feature enables the Large Language Model (LLM) to generate outputs adhering to a user-defined schema, such as `JSON`, `YAML`, or any other format that can be defined using BNF. For more details about BNF, see [Backus-Naur Form on Wikipedia](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form).
 
 {{% notice note %}}
-**Compatibility Notice:** This feature is only supported by models that use the [llama.cpp](https://github.com/ggerganov/llama.cpp) backend. For a complete list of compatible models, refer to the [Model Compatibility](docs/reference/compatibility-table) page. For technical details, see the related pull requests: [PR #1773](https://github.com/ggerganov/llama.cpp/pull/1773) and [PR #1887](https://github.com/ggerganov/llama.cpp/pull/1887).
+**Compatibility Notice:** This feature is only supported by models that use the [llama.cpp](https://github.com/ggerganov/llama.cpp) backend. For a complete list of compatible models, refer to the [Model Compatibility]({{%relref "reference/compatibility-table" %}}) page. For technical details, see the related pull requests: [PR #1773](https://github.com/ggerganov/llama.cpp/pull/1773) and [PR #1887](https://github.com/ggerganov/llama.cpp/pull/1887).
  {{% /notice %}}
 
 ## Setup
 
-To use this feature, follow the installation and setup instructions on the [LocalAI Functions](docs/features/openai-functions) page. Ensure that your local setup meets all the prerequisites specified for the llama.cpp backend.
+To use this feature, follow the installation and setup instructions on the [LocalAI Functions]({{%relref "features/openai-functions" %}}) page. Ensure that your local setup meets all the prerequisites specified for the llama.cpp backend.
 
 ## 💡 Usage Example
 
@@ -31,4 +31,42 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 }'
 ```
 
-In this example, the `grammar` parameter is set to a simple choice between "yes" and "no", ensuring that the model's response adheres strictly to one of these options regardless of the context.
\ No newline at end of file
+In this example, the `grammar` parameter is set to a simple choice between "yes" and "no", ensuring that the model's response adheres strictly to one of these options regardless of the context.
+
+### Example: JSON Output Constraint
+
+You can also use grammars to enforce JSON output format:
+
+```bash
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+  "model": "gpt-4",
+  "messages": [{"role": "user", "content": "Generate a person object with name and age"}],
+  "grammar": "root ::= \"{\" \"\\\"name\\\":\" string \",\\\"age\\\":\" number \"}\"\nstring ::= \"\\\"\" [a-z]+ \"\\\"\"\nnumber ::= [0-9]+"
+}'
+```
+
+### Example: YAML Output Constraint
+
+Similarly, you can enforce YAML format:
+
+```bash
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+  "model": "gpt-4",
+  "messages": [{"role": "user", "content": "Generate a YAML list of fruits"}],
+  "grammar": "root ::= \"fruits:\" newline (\"  - \" string newline)+\nstring ::= [a-z]+\nnewline ::= \"\\n\""
+}'
+```
+
+## Advanced Usage
+
+For more complex grammars, you can define multi-line BNF rules. The grammar parser supports:
+- Alternation (`|`)
+- Repetition (`*`, `+`)
+- Optional elements (`?`)
+- Character classes (`[a-z]`)
+- String literals (`"text"`)
+
+## Related Features
+
+- [OpenAI Functions]({{%relref "features/openai-functions" %}}) - Function calling with structured outputs
+- [Text Generation]({{%relref "features/text-generation" %}}) - General text generation capabilities
\ No newline at end of file
diff --git a/docs/content/features/distributed_inferencing.md b/docs/content/features/distributed_inferencing.md
index 48f99477f53a..9b42dea59d2f 100644
--- a/docs/content/features/distributed_inferencing.md
+++ b/docs/content/features/distributed_inferencing.md
@@ -151,6 +151,4 @@ LOCALAI_P2P_LOGLEVEL=debug LOCALAI_P2P_LIB_LOGLEVEL=debug LOCALAI_P2P_ENABLE_LIM
 - If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
 - Only a single model is supported currently.
 - Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
-- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
-
-
+- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
\ No newline at end of file
diff --git a/docs/content/features/embeddings.md b/docs/content/features/embeddings.md
index b00a45897eb9..192c4c87b3b0 100644
--- a/docs/content/features/embeddings.md
+++ b/docs/content/features/embeddings.md
@@ -73,4 +73,4 @@ curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json
 
 ## 💡 Examples
 
-- Example that uses LLamaIndex and LocalAI as embedding: [here](https://github.com/mudler/LocalAI-examples/tree/main/query_data).
+- Example that uses LLamaIndex and LocalAI as embedding: [here](https://github.com/mudler/LocalAI-examples/tree/main/query_data).
\ No newline at end of file
diff --git a/docs/content/features/gpt-vision.md b/docs/content/features/gpt-vision.md
index 52911aaf1619..1652aac1a834 100644
--- a/docs/content/features/gpt-vision.md
+++ b/docs/content/features/gpt-vision.md
@@ -34,5 +34,4 @@ Grammars and function tools can be used as well in conjunction with vision APIs:
 
 All-in-One images have already shipped the llava model as `gpt-4-vision-preview`, so no setup is needed in this case. 
 
-To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI-examples/blob/main/configurations/llava/llava.yaml).
-
+To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI-examples/blob/main/configurations/llava/llava.yaml).
\ No newline at end of file
diff --git a/docs/content/features/image-generation.md b/docs/content/features/image-generation.md
index a6d50cf9169c..a566bcb4a55c 100644
--- a/docs/content/features/image-generation.md
+++ b/docs/content/features/image-generation.md
@@ -342,4 +342,4 @@ diffusers:
 ```bash
 (echo -n '{"prompt": "spiderman surfing","size": "512x512","model":"txt2vid"}') |
 curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/images/generations
-```
+```
\ No newline at end of file
diff --git a/docs/content/features/mcp.md b/docs/content/features/mcp.md
index c8f869f72ef6..26134d3df844 100644
--- a/docs/content/features/mcp.md
+++ b/docs/content/features/mcp.md
@@ -1,5 +1,5 @@
 +++
-title = "Model Context Protocol (MCP)"
+title = "🔗 Model Context Protocol (MCP)"
 weight = 20
 toc = true
 description = "Agentic capabilities with Model Context Protocol integration"
diff --git a/docs/content/features/model-gallery.md b/docs/content/features/model-gallery.md
index 6847f4451387..5542a107c4aa 100644
--- a/docs/content/features/model-gallery.md
+++ b/docs/content/features/model-gallery.md
@@ -2,7 +2,6 @@
 +++
 disableToc = false
 title = "🖼️ Model gallery"
-
 weight = 18
 url = '/models'
 +++
diff --git a/docs/content/features/object-detection.md b/docs/content/features/object-detection.md
index 560310d9828e..8a124324026a 100644
--- a/docs/content/features/object-detection.md
+++ b/docs/content/features/object-detection.md
@@ -188,4 +188,4 @@ Additional object detection models and backends will be added to this category i
 - [🎨 Image generation]({{%relref "features/image-generation" %}}): Generate images with AI
 - [📖 Text generation]({{%relref "features/text-generation" %}}): Generate text with language models
 - [🔍 GPT Vision]({{%relref "features/gpt-vision" %}}): Analyze images with language models
-- [🚀 GPU acceleration]({{%relref "features/GPU-acceleration" %}}): Optimize performance with GPU acceleration 
\ No newline at end of file
+- [🚀 GPU acceleration]({{%relref "features/GPU-acceleration" %}}): Optimize performance with GPU acceleration
diff --git a/docs/content/features/openai-functions.md b/docs/content/features/openai-functions.md
index f4a20ab6ca42..e77b00b3da82 100644
--- a/docs/content/features/openai-functions.md
+++ b/docs/content/features/openai-functions.md
@@ -261,4 +261,4 @@ Grammars and function tools can be used as well in conjunction with vision APIs:
 
 ## 💡 Examples
 
-A full e2e example with `docker-compose` is available [here](https://github.com/mudler/LocalAI-examples/tree/main/functions).
+A full e2e example with `docker-compose` is available [here](https://github.com/mudler/LocalAI-examples/tree/main/functions).
\ No newline at end of file
diff --git a/docs/content/features/reranker.md b/docs/content/features/reranker.md
index 8cdb18367e5f..b178594750f3 100644
--- a/docs/content/features/reranker.md
+++ b/docs/content/features/reranker.md
@@ -50,4 +50,4 @@ and test it with:
       ],
       "top_n": 3
     }'
-```
+```
\ No newline at end of file
diff --git a/docs/content/features/stores.md b/docs/content/features/stores.md
index 18fc750c07ae..f17490d38a19 100644
--- a/docs/content/features/stores.md
+++ b/docs/content/features/stores.md
@@ -2,7 +2,6 @@
 +++
 disableToc = false
 title = "💾 Stores"
-
 weight = 18
 url = '/stores'
 +++
diff --git a/docs/content/features/text-generation.md b/docs/content/features/text-generation.md
index 2c339182f8db..3831f8e1fa5f 100644
--- a/docs/content/features/text-generation.md
+++ b/docs/content/features/text-generation.md
@@ -383,4 +383,4 @@ template:
 
   completion: |
     {{.Input}}
-```
+```
\ No newline at end of file
diff --git a/docs/content/features/text-to-audio.md b/docs/content/features/text-to-audio.md
index 97df0fe12aad..d10c8ad09f9d 100644
--- a/docs/content/features/text-to-audio.md
+++ b/docs/content/features/text-to-audio.md
@@ -213,4 +213,4 @@ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
 }'
 ```
 
-If a `response_format` is added in the query (other than `wav`) and ffmpeg is not available, the call will fail.
+If a `response_format` is added in the query (other than `wav`) and ffmpeg is not available, the call will fail.
\ No newline at end of file