Merge branch 'main' into configurable-ctx

nomic-ai · Dec 13, 2023 · 2054338 · 2054338
2 parents 2c90389 + 3acbef1
commit 2054338
Show file tree

Hide file tree

Showing 6 changed files with 36 additions and 42 deletions.
diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt
@@ -114,8 +114,6 @@ add_library(llmodel
     llmodel_c.h llmodel_c.cpp
     dlhandle.h
 )
-target_link_libraries(llmodel PRIVATE ggml-mainline-default)
-target_compile_definitions(llmodel PRIVATE GGML_BUILD_VARIANT="default")
 target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}")
 
 set_target_properties(llmodel PROPERTIES

diff --git a/gpt4all-backend/llmodel.cpp b/gpt4all-backend/llmodel.cpp
@@ -82,7 +82,7 @@ const std::vector<LLModel::Implementation> &LLModel::Implementation::implementat
     static auto* libs = new std::vector<Implementation>([] () {
         std::vector<Implementation> fres;
 
-        std::string impl_name_re = "(bert|llama|gptj|llamamodel-mainline)";
+        std::string impl_name_re = "(bert|gptj|llamamodel-mainline)";
         if (requires_avxonly()) {
             impl_name_re += "-avxonly";
         } else {
@@ -192,6 +192,27 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
     return fres;
 }
 
+LLModel *LLModel::Implementation::constructCpuLlama() {
+    const LLModel::Implementation *impl = nullptr;
+    for (const auto &i : implementationList()) {
+        if (i.m_buildVariant == "metal" || i.m_modelType != "LLaMA") continue;
+        impl = &i;
+    }
+    if (!impl) {
+        std::cerr << "LLModel ERROR: Could not find CPU LLaMA implementation\n";
+        return nullptr;
+    }
+    auto fres = impl->m_construct();
+    fres->m_implementation = impl;
+    return fres;
+}
+
+std::vector<LLModel::GPUDevice> LLModel::Implementation::availableGPUDevices() {
+    static LLModel *cpuLlama = LLModel::Implementation::constructCpuLlama(); // (memory leak)
+    if (cpuLlama) { return cpuLlama->availableGPUDevices(0); }
+    return {};
+}
+
 void LLModel::Implementation::setImplementationsSearchPath(const std::string& path) {
     s_implementations_search_path = path;
 }

diff --git a/gpt4all-backend/llmodel.h b/gpt4all-backend/llmodel.h
@@ -15,6 +15,15 @@ class Dlhandle;
 class LLModel {
 public:
     using Token = int32_t;
+
+    struct GPUDevice {
+        int index = 0;
+        int type = 0;
+        size_t heapSize = 0;
+        std::string name;
+        std::string vendor;
+    };
+
     class Implementation {
     public:
         Implementation(Dlhandle&&);
@@ -29,14 +38,16 @@ class LLModel {
         static const std::vector<Implementation>& implementationList();
         static const Implementation *implementation(const char *fname, const std::string& buildVariant);
         static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto", int n_ctx = 2048);
+        static std::vector<GPUDevice> availableGPUDevices();
         static void setImplementationsSearchPath(const std::string& path);
         static const std::string& implementationsSearchPath();
 
     private:
+        static LLModel *constructCpuLlama();
+
         bool (*m_magicMatch)(const char *fname);
         LLModel *(*m_construct)();
 
-    private:
         std::string_view m_modelType;
         std::string_view m_buildVariant;
         Dlhandle *m_dlhandle;
@@ -58,14 +69,6 @@ class LLModel {
         int32_t n_last_batch_tokens = 0;
     };
 
-    struct GPUDevice {
-        int index = 0;
-        int type = 0;
-        size_t heapSize = 0;
-        std::string name;
-        std::string vendor;
-    };
-
     explicit LLModel() {}
     virtual ~LLModel() {}
 
@@ -106,7 +109,6 @@ class LLModel {
     virtual bool initializeGPUDevice(int /*device*/) { return false; }
     virtual bool hasGPUDevice() { return false; }
     virtual bool usingGPUDevice() { return false; }
-    static std::vector<GPUDevice> availableGPUDevices();
 
 protected:
     // These are pure virtual because subclasses need to implement as the default implementation of

diff --git a/gpt4all-backend/llmodel_shared.cpp b/gpt4all-backend/llmodel_shared.cpp
@@ -4,10 +4,6 @@
 #include <iostream>
 #include <unordered_set>
 
-#ifdef GGML_USE_KOMPUTE
-#include "ggml-vulkan.h"
-#endif
-
 void LLModel::recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate) {
     size_t i = 0;
     promptCtx.n_past = 0;
@@ -177,26 +173,3 @@ std::vector<float> LLModel::embedding(const std::string &/*text*/)
     }
     return std::vector<float>();
 }
-
-std::vector<LLModel::GPUDevice> LLModel::availableGPUDevices()
-{
-#if defined(GGML_USE_KOMPUTE)
-    std::vector<ggml_vk_device> vkDevices = ggml_vk_available_devices(0);
-
-    std::vector<LLModel::GPUDevice> devices;
-    for(const auto& vkDevice : vkDevices) {
-        LLModel::GPUDevice device;
-        device.index = vkDevice.index;
-        device.type = vkDevice.type;
-        device.heapSize = vkDevice.heapSize;
-        device.name = vkDevice.name;
-        device.vendor = vkDevice.vendor;
-
-        devices.push_back(device);
-    }
-
-    return devices;
-#else
-    return std::vector<LLModel::GPUDevice>();
-#endif
-}
diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt
@@ -173,7 +173,7 @@ else()
     PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf)
 endif()
 target_link_libraries(chat
-    PRIVATE llmodel bert-default)
+    PRIVATE llmodel)
 
 set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
 set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)

diff --git a/gpt4all-chat/mysettings.cpp b/gpt4all-chat/mysettings.cpp
@@ -64,7 +64,7 @@ MySettings::MySettings()
 {
     QSettings::setDefaultFormat(QSettings::IniFormat);
 
-    std::vector<LLModel::GPUDevice> devices = LLModel::availableGPUDevices();
+    std::vector<LLModel::GPUDevice> devices = LLModel::Implementation::availableGPUDevices();
     QVector<QString> deviceList{ "Auto" };
     for (LLModel::GPUDevice &d : devices)
         deviceList << QString::fromStdString(d.name);