Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/llama-vocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2347,6 +2347,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "_<EOT>"
|| t.first == "<|end_of_text|>"
|| t.first == "<end_of_utterance>" // smoldocling
|| t.first == "<|end▁of▁sentence|>" // deepseek-ocr
) {
special_eog_ids.insert(t.second);
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
Expand Down
15 changes: 12 additions & 3 deletions tools/mtmd/mtmd-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,14 +222,18 @@ static std::string chat_add_and_format(mtmd_cli_context & ctx, common_chat_msg &

static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg) {
bool add_bos = ctx.chat_history.empty();
auto formatted_chat = chat_add_and_format(ctx, msg);
LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.c_str());

mtmd_input_text text;
text.text = formatted_chat.c_str();
text.text = msg.content.c_str();
text.add_special = add_bos;
text.parse_special = true;

if (!mtmd_is_deepseekocr(ctx.ctx_vision.get())) {
auto formatted_chat = chat_add_and_format(ctx, msg);
LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.c_str());
text.text = formatted_chat.c_str();
}

if (g_is_interrupted) return 0;

mtmd::input_chunks chunks(mtmd_input_chunks_init());
Expand Down Expand Up @@ -332,6 +336,11 @@ int main(int argc, char ** argv) {
}

} else {
if (mtmd_is_deepseekocr(ctx.ctx_vision.get())) {
LOG_ERR("\n DeepSeek-OCR doesn't support chat mode.");
return 1;
}

LOG("\n Running in chat mode, available commands:");
if (mtmd_support_vision(ctx.ctx_vision.get())) {
LOG("\n /image <path> load an image");
Expand Down
4 changes: 4 additions & 0 deletions tools/mtmd/mtmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,10 @@ int mtmd_get_audio_bitrate(mtmd_context * ctx) {
return 16000; // 16kHz
}

bool mtmd_is_deepseekocr(mtmd_context * ctx) {
return ctx->ctx_v && clip_is_deepseekocr(ctx->ctx_v);
}

//
// public API functions
//
Expand Down
3 changes: 3 additions & 0 deletions tools/mtmd/mtmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ MTMD_API bool mtmd_support_audio(mtmd_context * ctx);
// return -1 if audio is not supported
MTMD_API int mtmd_get_audio_bitrate(mtmd_context * ctx);

// whether the current model is DeepSeek-OCR
MTMD_API bool mtmd_is_deepseekocr(mtmd_context * ctx);

// mtmd_bitmap
//
// if bitmap is image:
Expand Down