Skip to content

Commit

Permalink
Upgrade to latest GPT4All. Use Mistral as default offline chat model
Browse files Browse the repository at this point in the history
GPT4all now supports gguf llama.cpp chat models. Latest
GPT4All (+mistral) performs much at least 3x faster.

On Macbook Pro at ~10s response start time vs 30s-120s earlier.
Mistral is also a better chat model, although it hallucinates more
than llama-2
  • Loading branch information
debanjum committed Oct 23, 2023
1 parent 6dc0df3 commit 0f1ebca
Show file tree
Hide file tree
Showing 10 changed files with 84 additions and 11 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ dependencies = [
"bs4 >= 0.0.1",
"anyio == 3.7.1",
"pymupdf >= 1.23.3",
"gpt4all == 1.0.12; platform_system == 'Linux' and platform_machine == 'x86_64'",
"gpt4all == 1.0.12; platform_system == 'Windows' or platform_system == 'Darwin'",
"gpt4all >= 2.0.0; platform_system == 'Linux' and platform_machine == 'x86_64'",
"gpt4all >= 2.0.0; platform_system == 'Windows' or platform_system == 'Darwin'",
]
dynamic = ["version"]

Expand Down
69 changes: 69 additions & 0 deletions src/khoj/migrations/migrate_offline_chat_default_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
Current format of khoj.yml
---
app:
...
content-type:
...
processor:
conversation:
offline-chat:
enable-offline-chat: false
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
...
search-type:
...
New format of khoj.yml
---
app:
...
content-type:
...
processor:
conversation:
offline-chat:
enable-offline-chat: false
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
...
search-type:
...
"""
import logging
from packaging import version

from khoj.utils.yaml import load_config_from_file, save_config_to_file


logger = logging.getLogger(__name__)


def migrate_offline_chat_default_model(args):
schema_version = "0.12.4"
raw_config = load_config_from_file(args.config_file)
previous_version = raw_config.get("version")

if "processor" not in raw_config:
return args
if raw_config["processor"] is None:
return args
if "conversation" not in raw_config["processor"]:
return args
if "offline-chat" not in raw_config["processor"]["conversation"]:
return args
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
return args

if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"):
logger.info(
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
)
raw_config["version"] = schema_version

# Update offline chat model to mistral in GGUF format to use latest GPT4All
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
if offline_chat_model.endswith(".bin"):
raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf"

save_config_to_file(raw_config, args.config_file)
return args
4 changes: 2 additions & 2 deletions src/khoj/processor/conversation/gpt4all/chat_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

def extract_questions_offline(
text: str,
model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf",
loaded_model: Union[Any, None] = None,
conversation_log={},
use_history: bool = True,
Expand Down Expand Up @@ -123,7 +123,7 @@ def converse_offline(
references,
user_query,
conversation_log={},
model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf",
loaded_model: Union[Any, None] = None,
completion_func=None,
conversation_command=ConversationCommand.Default,
Expand Down
4 changes: 2 additions & 2 deletions src/khoj/processor/conversation/gpt4all/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ def download_model(model_name: str):
# Use GPU for Chat Model, if available
try:
model = GPT4All(model_name=model_name, device="gpu")
logger.debug("Loaded chat model to GPU.")
logger.debug(f"Loaded {model_name} chat model to GPU.")
except ValueError:
model = GPT4All(model_name=model_name)
logger.debug("Loaded chat model to CPU.")
logger.debug(f"Loaded {model_name} chat model to CPU.")

return model
2 changes: 2 additions & 0 deletions src/khoj/processor/conversation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@
"gpt-4": 8192,
"llama-2-7b-chat.ggmlv3.q4_0.bin": 1548,
"gpt-3.5-turbo-16k": 15000,
"mistral-7b-instruct-v0.1.Q4_0.gguf": 1548,
}
model_to_tokenizer = {
"llama-2-7b-chat.ggmlv3.q4_0.bin": "hf-internal-testing/llama-tokenizer",
"mistral-7b-instruct-v0.1.Q4_0.gguf": "mistralai/Mistral-7B-Instruct-v0.1",
}


Expand Down
2 changes: 2 additions & 0 deletions src/khoj/utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema
from khoj.migrations.migrate_offline_model import migrate_offline_model
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
from khoj.migrations.migrate_offline_chat_default_model import migrate_offline_chat_default_model


def cli(args=None):
Expand Down Expand Up @@ -61,6 +62,7 @@ def run_migrations(args):
migrate_processor_conversation_schema,
migrate_offline_model,
migrate_offline_chat_schema,
migrate_offline_chat_default_model,
]
for migration in migrations:
args = migration(args)
Expand Down
4 changes: 2 additions & 2 deletions src/khoj/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
},
"offline-chat": {
"enable-offline-chat": False,
"chat-model": "llama-2-7b-chat.ggmlv3.q4_0.bin",
"chat-model": "mistral-7b-instruct-v0.1.Q4_0.gguf",
},
"tokenizer": None,
"max-prompt-size": None,
Expand Down Expand Up @@ -132,7 +132,7 @@
},
"offline-chat": {
"enable-offline-chat": False,
"chat-model": "llama-2-7b-chat.ggmlv3.q4_0.bin",
"chat-model": "mistral-7b-instruct-v0.1.Q4_0.gguf",
},
"tokenizer": None,
"max-prompt-size": None,
Expand Down
2 changes: 1 addition & 1 deletion src/khoj/utils/rawconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class OpenAIProcessorConfig(ConfigBase):

class OfflineChatProcessorConfig(ConfigBase):
enable_offline_chat: Optional[bool] = False
chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin"
chat_model: Optional[str] = "mistral-7b-instruct-v0.1.Q4_0.gguf"


class ConversationProcessorConfig(ConfigBase):
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def processor_config_offline_chat(tmp_path_factory):

# Setup conversation processor
processor_config = ProcessorConfig()
offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True)
offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True, chat_model="mistral-7b-instruct-v0.1.Q4_0.gguf")
processor_config.conversation = ConversationProcessorConfig(
offline_chat=offline_chat,
conversation_logfile=processor_dir.joinpath("conversation_logs.json"),
Expand Down
2 changes: 1 addition & 1 deletion tests/test_gpt4all_chat_actors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from khoj.processor.conversation.utils import message_to_log

MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_0.bin"
MODEL_NAME = "mistral-7b-instruct-v0.1.Q4_0.gguf"


@pytest.fixture(scope="session")
Expand Down

0 comments on commit 0f1ebca

Please sign in to comment.