Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chat: fix blank device in UI after model switch and improve Mixpanel reporting #2409

Merged
merged 9 commits into from
Jun 26, 2024
2 changes: 1 addition & 1 deletion gpt4all-backend/llama.cpp-mainline
Submodule llama.cpp-mainline updated 2 files
+32 −15 llama.cpp
+3 −4 llama.h
40 changes: 18 additions & 22 deletions gpt4all-backend/llamamodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,11 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
d_ptr->model_params.main_gpu = d_ptr->device;
d_ptr->model_params.n_gpu_layers = ngl;
d_ptr->model_params.split_mode = LLAMA_SPLIT_MODE_NONE;
} else {
#ifdef GGML_USE_CUDA
std::cerr << "Llama ERROR: CUDA loadModel was called without a device\n";
return false;
#endif // GGML_USE_CUDA
}
#elif defined(GGML_USE_METAL)
(void)ngl;
Expand All @@ -383,15 +388,17 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
// always fully offload on Metal
// TODO(cebtenzzre): use this parameter to allow using more than 53% of system RAM to load a model
d_ptr->model_params.n_gpu_layers = 100;
#else
#else // !KOMPUTE && !VULKAN && !CUDA && !METAL
(void)ngl;
#endif

d_ptr->model = llama_load_model_from_file_gpt4all(modelPath.c_str(), &d_ptr->model_params);
d_ptr->model = llama_load_model_from_file(modelPath.c_str(), d_ptr->model_params);
if (!d_ptr->model) {
fflush(stdout);
#ifndef GGML_USE_CUDA
d_ptr->device = -1;
d_ptr->deviceName.clear();
#endif
std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl;
return false;
}
Expand Down Expand Up @@ -434,8 +441,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
std::cerr << "LLAMA ERROR: failed to init context for model " << modelPath << std::endl;
llama_free_model(d_ptr->model);
d_ptr->model = nullptr;
#ifndef GGML_USE_CUDA
d_ptr->device = -1;
d_ptr->deviceName.clear();
#endif
return false;
}

Expand Down Expand Up @@ -723,31 +732,16 @@ bool LLamaModel::initializeGPUDevice(int device, std::string *unavail_reason) co
#endif
}

bool LLamaModel::hasGPUDevice() const
{
#if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
return d_ptr->device != -1;
#else
return false;
#endif
}

bool LLamaModel::usingGPUDevice() const
{
bool hasDevice;
if (!d_ptr->model)
return false;

bool usingGPU = llama_model_using_gpu(d_ptr->model);
#ifdef GGML_USE_KOMPUTE
hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
assert(!hasDevice || ggml_vk_has_device());
#elif defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
#elif defined(GGML_USE_METAL)
hasDevice = true;
#else
hasDevice = false;
assert(!usingGPU || ggml_vk_has_device());
#endif

return hasDevice;
return usingGPU;
}

const char *LLamaModel::backendName() const
Expand All @@ -760,6 +754,8 @@ const char *LLamaModel::gpuDeviceName() const
if (usingGPUDevice()) {
#if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
return d_ptr->deviceName.c_str();
#elif defined(GGML_USE_METAL)
return "Metal";
#endif
}
return nullptr;
Expand Down
1 change: 0 additions & 1 deletion gpt4all-backend/llamamodel_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class LLamaModel : public LLModel {
std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired = 0) const override;
bool initializeGPUDevice(size_t memoryRequired, const std::string &name) const override;
bool initializeGPUDevice(int device, std::string *unavail_reason = nullptr) const override;
bool hasGPUDevice() const override;
bool usingGPUDevice() const override;
const char *backendName() const override;
const char *gpuDeviceName() const override;
Expand Down
21 changes: 14 additions & 7 deletions gpt4all-backend/llmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define LLMODEL_H

#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <functional>
Expand Down Expand Up @@ -57,23 +58,30 @@ class LLModel {
backend(backend), index(index), type(type), heapSize(heapSize), name(std::move(name)),
vendor(std::move(vendor)) {}

std::string selectionName() const { return m_backendNames.at(backend) + ": " + name; }
std::string reportedName() const { return name + " (" + m_backendNames.at(backend) + ")"; }
std::string selectionName() const
{
assert(backend == "cuda"s || backend == "kompute"s);
return backendName() + ": " + name;
}

std::string backendName() const { return backendIdToName(backend); }

static std::string backendIdToName(const std::string &backend) { return s_backendNames.at(backend); }

static std::string updateSelectionName(const std::string &name) {
if (name == "Auto" || name == "CPU" || name == "Metal")
return name;
auto it = std::find_if(m_backendNames.begin(), m_backendNames.end(), [&name](const auto &entry) {
auto it = std::find_if(s_backendNames.begin(), s_backendNames.end(), [&name](const auto &entry) {
return name.starts_with(entry.second + ": ");
});
if (it != m_backendNames.end())
if (it != s_backendNames.end())
return name;
return "Vulkan: " + name; // previously, there were only Vulkan devices
}

private:
static inline const std::unordered_map<std::string, std::string> m_backendNames {
{"cuda", "CUDA"}, {"kompute", "Vulkan"},
static inline const std::unordered_map<std::string, std::string> s_backendNames {
{"cpu", "CPU"}, {"metal", "Metal"}, {"cuda", "CUDA"}, {"kompute", "Vulkan"},
};
};

Expand Down Expand Up @@ -196,7 +204,6 @@ class LLModel {
return false;
}

virtual bool hasGPUDevice() const { return false; }
virtual bool usingGPUDevice() const { return false; }
virtual const char *backendName() const { return "cpu"; }
virtual const char *gpuDeviceName() const { return nullptr; }
Expand Down
6 changes: 0 additions & 6 deletions gpt4all-backend/llmodel_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,6 @@ bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device)
return wrapper->llModel->initializeGPUDevice(device);
}

bool llmodel_has_gpu_device(llmodel_model model)
{
const auto *wrapper = static_cast<LLModelWrapper *>(model);
return wrapper->llModel->hasGPUDevice();
}

const char *llmodel_model_backend_name(llmodel_model model)
{
const auto *wrapper = static_cast<LLModelWrapper *>(model);
Expand Down
5 changes: 0 additions & 5 deletions gpt4all-backend/llmodel_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,6 @@ bool llmodel_gpu_init_gpu_device_by_struct(llmodel_model model, const llmodel_gp
*/
bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device);

/**
* @return True if a GPU device is successfully initialized, false otherwise.
*/
bool llmodel_has_gpu_device(llmodel_model model);

/**
* @return The name of the llama.cpp backend currently in use. One of "cpu", "kompute", or "metal".
*/
Expand Down
3 changes: 0 additions & 3 deletions gpt4all-bindings/python/gpt4all/_pyllmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,6 @@ class LLModelGPUDevice(ctypes.Structure):
llmodel.llmodel_gpu_init_gpu_device_by_int.argtypes = [ctypes.c_void_p, ctypes.c_int32]
llmodel.llmodel_gpu_init_gpu_device_by_int.restype = ctypes.c_bool

llmodel.llmodel_has_gpu_device.argtypes = [ctypes.c_void_p]
llmodel.llmodel_has_gpu_device.restype = ctypes.c_bool

llmodel.llmodel_model_backend_name.argtypes = [ctypes.c_void_p]
llmodel.llmodel_model_backend_name.restype = ctypes.c_char_p

Expand Down
18 changes: 10 additions & 8 deletions gpt4all-chat/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ void Chat::connectLLM()
connect(m_llmodel, &ChatLLM::recalcChanged, this, &Chat::handleRecalculating, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::generatedNameChanged, this, &Chat::generatedNameChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::reportSpeed, this, &Chat::handleTokenSpeedChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::reportDevice, this, &Chat::handleDeviceChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::reportFallbackReason, this, &Chat::handleFallbackReasonChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::loadedModelInfoChanged, this, &Chat::loadedModelInfoChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::databaseResultsChanged, this, &Chat::handleDatabaseResultsChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::modelInfoChanged, this, &Chat::handleModelInfoChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::trySwitchContextOfLoadedModelCompleted, this, &Chat::handleTrySwitchContextOfLoadedModelCompleted, Qt::QueuedConnection);
Expand Down Expand Up @@ -327,16 +326,19 @@ void Chat::handleTokenSpeedChanged(const QString &tokenSpeed)
emit tokenSpeedChanged();
}

void Chat::handleDeviceChanged(const QString &device)
QString Chat::deviceBackend() const
{
m_device = device;
emit deviceChanged();
return m_llmodel->deviceBackend();
}

void Chat::handleFallbackReasonChanged(const QString &fallbackReason)
QString Chat::device() const
{
m_fallbackReason = fallbackReason;
emit fallbackReasonChanged();
return m_llmodel->device();
}

QString Chat::fallbackReason() const
{
return m_llmodel->fallbackReason();
}

void Chat::handleDatabaseResultsChanged(const QList<ResultInfo> &results)
Expand Down
14 changes: 8 additions & 6 deletions gpt4all-chat/chat.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ class Chat : public QObject
Q_PROPERTY(QList<QString> collectionList READ collectionList NOTIFY collectionListChanged)
Q_PROPERTY(QString modelLoadingError READ modelLoadingError NOTIFY modelLoadingErrorChanged)
Q_PROPERTY(QString tokenSpeed READ tokenSpeed NOTIFY tokenSpeedChanged);
Q_PROPERTY(QString device READ device NOTIFY deviceChanged);
Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY fallbackReasonChanged);
Q_PROPERTY(QString deviceBackend READ deviceBackend NOTIFY loadedModelInfoChanged)
Q_PROPERTY(QString device READ device NOTIFY loadedModelInfoChanged)
Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY loadedModelInfoChanged)
Q_PROPERTY(LocalDocsCollectionsModel *collectionModel READ collectionModel NOTIFY collectionModelChanged)
// 0=no, 1=waiting, 2=working
Q_PROPERTY(int trySwitchContextInProgress READ trySwitchContextInProgress NOTIFY trySwitchContextInProgressChanged)
Expand Down Expand Up @@ -111,8 +112,10 @@ class Chat : public QObject
QString modelLoadingError() const { return m_modelLoadingError; }

QString tokenSpeed() const { return m_tokenSpeed; }
QString device() const { return m_device; }
QString fallbackReason() const { return m_fallbackReason; }
QString deviceBackend() const;
QString device() const;
// not loaded -> QString(), no fallback -> QString("")
QString fallbackReason() const;

int trySwitchContextInProgress() const { return m_trySwitchContextInProgress; }

Expand Down Expand Up @@ -149,6 +152,7 @@ public Q_SLOTS:
void fallbackReasonChanged();
void collectionModelChanged();
void trySwitchContextInProgressChanged();
void loadedModelInfoChanged();

private Q_SLOTS:
void handleResponseChanged(const QString &response);
Expand All @@ -159,8 +163,6 @@ private Q_SLOTS:
void handleRecalculating();
void handleModelLoadingError(const QString &error);
void handleTokenSpeedChanged(const QString &tokenSpeed);
void handleDeviceChanged(const QString &device);
void handleFallbackReasonChanged(const QString &device);
void handleDatabaseResultsChanged(const QList<ResultInfo> &results);
void handleModelInfoChanged(const ModelInfo &modelInfo);
void handleTrySwitchContextOfLoadedModelCompleted(int value);
Expand Down
Loading