Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Telegram bot with functions tools.
- Agents can be triggered by name via HTTP or MQTT
- Incoming audio transcription using Whisper service
- Prompt placeholders: `{url:...}` and `{tool:...}` for dynamic content
- Photo messages are processed with OCR to extract text
- Photo messages and image documents are processed with OCR to extract text
- Dedicated log files for HTTP and MQTT activity
- Docker healthcheck endpoint for container monitoring
- GET `/agent/:agent` returns agent status
Expand Down
53 changes: 53 additions & 0 deletions src/handlers/onDocument.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import { Context } from "telegraf";
import { Message } from "telegraf/types";
import checkAccessLevel from "./access.ts";
import onTextMessage from "./onTextMessage.ts";
import onUnsupported from "./onUnsupported.ts";
import { processImageMessage } from "../helpers/vision.ts";
import { log } from "../helpers.ts";
import { createNewContext } from "../telegram/context.ts";

export default async function onDocument(ctx: Context) {
if (!("message" in ctx.update)) return;

const access = await checkAccessLevel(ctx);
if (!access) return;
const { msg: accessMsg, chat } = access;
const msg = accessMsg as unknown as Message.DocumentMessage;

const mime = msg.document?.mime_type || "";
if (!mime.startsWith("image/")) {
await onUnsupported(ctx);
return;
}

const chatTitle = "title" in msg.chat ? msg.chat.title : "private_chat";
log({
msg: `[document] ${msg.caption || ""}`,
logLevel: "info",
chatId: msg.chat.id,
chatTitle,
role: "user",
});

if (msg.caption && msg.caption.length > 100) {
log({
msg: `[document] caption too long, skip ocr: ${msg.caption.length}`,
logLevel: "info",
chatId: msg.chat.id,
chatTitle,
});

const newMsg = {
...msg,
text: msg.caption,
entities: [],
} as const;

const contextWithCaption = createNewContext(ctx, newMsg);
await onTextMessage(contextWithCaption);
return;
}

await processImageMessage(ctx, msg, chat, "upload_document");
}
60 changes: 2 additions & 58 deletions src/handlers/onPhoto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@ import { Context } from "telegraf";
import { Message, Update } from "telegraf/types";
import onTextMessage from "./onTextMessage.ts";
import checkAccessLevel from "./access.ts";
import { recognizeImageText } from "../helpers/vision.ts";
import { processImageMessage } from "../helpers/vision.ts";
import { log } from "../helpers";
import { useConfig } from "../config.ts";
import { sendTelegramMessage } from "../telegram/send.ts";
import { createNewContext } from "../telegram/context.ts";

// Type guard to check if update has a message
Expand Down Expand Up @@ -53,59 +51,5 @@ export default async function onPhoto(ctx: Context) {
return;
}

const config = useConfig();
const model = config?.vision?.model || "";
if (!model)
return await sendTelegramMessage(
msg.chat.id,
"Извините, обработка изображений не поддерживается",
);

// Create a new message object with the recognized text
const processPhoto = async () => {
let text = "";
try {
text = await recognizeImageText(msg, chat);
text = `Image contents: ${text}`;
} catch (error) {
const chatId = ctx.chat?.id || msg.chat?.id;
try {
const errText = `Ошибка при распознавании изображения: ${error instanceof Error ? error.message : "Неизвестная ошибка"}`;
await sendTelegramMessage(chatId || 0, errText, undefined, ctx, chat);
return;
} catch (error) {
log({
msg: error instanceof Error ? error.message : "Неизвестная ошибка",
logLevel: "error",
chatId,
chatTitle,
role: "user",
});
}
}
const caption = msg.caption ? `${msg.caption}\n\n` : "";

log({
msg: text,
logLevel: "info",
chatId: msg.chat.id,
chatTitle,
role: "user",
});

// Create a new message object with the recognized text
const newMsg = {
...msg,
text: caption + text,
entities: [],
} as const;

// Create a new context by extending the original context
const contextWithNewMessage = createNewContext(ctx, newMsg);

await onTextMessage(contextWithNewMessage);
};

// Use the original context for persistentChatAction
await ctx.persistentChatAction("upload_photo", processPhoto);
await processImageMessage(ctx, msg, chat, "upload_photo");
}
85 changes: 82 additions & 3 deletions src/helpers/vision.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,32 @@
import { Context } from "telegraf";
import { Message } from "telegraf/types";
import { useBot } from "../bot.ts";
import { llmCall } from "./gpt.ts";
import { useConfig } from "../config.ts";
import { ConfigChatType } from "../types.ts";
import { sendTelegramMessage } from "../telegram/send.ts";
import { createNewContext } from "../telegram/context.ts";
import { log } from "../helpers.ts";
import onTextMessage from "../handlers/onTextMessage.ts";

export type ImageMessage = Message.PhotoMessage | Message.DocumentMessage;

export async function recognizeImageText(
msg: Message.PhotoMessage,
msg: ImageMessage,
chatConfig: ConfigChatType,
): Promise<string> {
const photo = msg.photo[msg.photo.length - 1];
let fileId: string;
if ("photo" in msg && msg.photo?.length) {
fileId = msg.photo[msg.photo.length - 1].file_id;
} else if ("document" in msg && msg.document) {
fileId = msg.document.file_id;
} else {
throw new Error("Не удалось получить изображение.");
}

let link;
try {
link = await useBot(chatConfig.bot_token).telegram.getFileLink(photo.file_id);
link = await useBot(chatConfig.bot_token).telegram.getFileLink(fileId);
} catch (error) {
const err = error as Error;
if (
Expand Down Expand Up @@ -58,3 +73,67 @@ export async function recognizeImageText(
throw e;
}
}

export async function processImageMessage(
ctx: Context,
msg: ImageMessage,
chat: ConfigChatType,
uploadAction: "upload_photo" | "upload_document",
) {
const config = useConfig();
const model = config?.vision?.model || "";
if (!model)
return await sendTelegramMessage(
msg.chat.id,
"Извините, обработка изображений не поддерживается",
);

const chatTitle = "title" in msg.chat ? msg.chat.title : "private_chat";

const run = async () => {
let text = "";
try {
text = await recognizeImageText(msg, chat);
text = `Image contents: ${text}`;
} catch (error) {
const chatId = ctx.chat?.id || msg.chat?.id;
try {
const errText = `Ошибка при распознавании изображения: ${
error instanceof Error ? error.message : "Неизвестная ошибка"
}`;
await sendTelegramMessage(chatId || 0, errText, undefined, ctx, chat);
return;
} catch (error2) {
log({
msg: error2 instanceof Error ? error2.message : "Неизвестная ошибка",
logLevel: "error",
chatId,
chatTitle,
role: "user",
});
}
}

const caption = msg.caption ? `${msg.caption}\n\n` : "";

log({
msg: text,
logLevel: "info",
chatId: msg.chat.id,
chatTitle,
role: "user",
});

const newMsg = {
...msg,
text: caption + text,
entities: [],
} as const;

const contextWithNewMessage = createNewContext(ctx, newMsg);

await onTextMessage(contextWithNewMessage);
};

await ctx.persistentChatAction(uploadAction, run);
}
5 changes: 3 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import onTextMessage from "./handlers/onTextMessage.ts";
import onPhoto from "./handlers/onPhoto.ts";
import onAudio from "./handlers/onAudio.ts";
import onUnsupported from "./handlers/onUnsupported.ts";
import onDocument from "./handlers/onDocument.ts";
import { useLastCtx } from "./helpers/lastCtx.ts";
import {
agentGetHandler,
Expand Down Expand Up @@ -88,14 +89,14 @@ async function launchBot(bot_token: string, bot_name: string) {
bot.on(message("sticker"), onUnsupported);
bot.on(message("video"), onUnsupported);
bot.on(message("video_note"), onUnsupported);
bot.on(message("document"), onUnsupported);
bot.on(message("document"), onDocument);

bot.catch((err, ctx) => {
log({
msg: `[${bot_name}] Unhandled error for update ${ctx.update.update_id}: ${err instanceof Error ? err.message : String(err)}`,
logLevel: "error",
});
if (err instanceof Error){
if (err instanceof Error) {
console.error(err.stack);
}
});
Expand Down
82 changes: 82 additions & 0 deletions tests/handlers/onDocumentMain.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import { jest, describe, it, expect, beforeEach } from "@jest/globals";
import type { Context, Message } from "telegraf/types";
import type { ConfigChatType } from "../../src/types";

const mockCheckAccessLevel = jest.fn();
const mockProcessImageMessage = jest.fn();
const mockOnUnsupported = jest.fn();
const mockOnTextMessage = jest.fn();

jest.unstable_mockModule("../../src/handlers/access.ts", () => ({
__esModule: true,
default: (...args: unknown[]) => mockCheckAccessLevel(...args),
}));

jest.unstable_mockModule("../../src/helpers/vision.ts", () => ({
processImageMessage: (...args: unknown[]) => mockProcessImageMessage(...args),
}));

jest.unstable_mockModule("../../src/handlers/onUnsupported.ts", () => ({
__esModule: true,
default: (...args: unknown[]) => mockOnUnsupported(...args),
}));

jest.unstable_mockModule("../../src/handlers/onTextMessage.ts", () => ({
__esModule: true,
default: (...args: unknown[]) => mockOnTextMessage(...args),
}));

let onDocument: typeof import("../../src/handlers/onDocument.ts").default;

function createCtx(message: Record<string, unknown>): Context {
return {
message,
update: { message },
persistentChatAction: async (_: string, fn: () => Promise<void>) => {
await fn();
},
} as unknown as Context;
}

beforeEach(async () => {
jest.clearAllMocks();
jest.resetModules();
onDocument = (await import("../../src/handlers/onDocument.ts")).default;
});

describe("onDocument", () => {
it("processes image documents", async () => {
const msg = {
chat: { id: 1, type: "private", title: "t" },
document: { file_id: "f", mime_type: "image/png" },
caption: "cap",
} as Message.DocumentMessage;
const chat = {} as ConfigChatType;
mockCheckAccessLevel.mockResolvedValue({ msg, chat });
mockProcessImageMessage.mockImplementation(async () => {
mockOnTextMessage({ message: { text: "cap\n\nImage contents: ocr" } });
});
const ctx = createCtx(msg);
await onDocument(ctx);
expect(mockProcessImageMessage).toHaveBeenCalledWith(
ctx,
msg,
chat,
"upload_document",
);
expect(mockOnUnsupported).not.toHaveBeenCalled();
expect(mockOnTextMessage).toHaveBeenCalled();
});

it("redirects non-image documents", async () => {
const msg = {
chat: { id: 1, type: "private", title: "t" },
document: { file_id: "f", mime_type: "application/pdf" },
} as Message.DocumentMessage;
mockCheckAccessLevel.mockResolvedValue({ msg, chat: {} as ConfigChatType });
const ctx = createCtx(msg);
await onDocument(ctx);
expect(mockOnUnsupported).toHaveBeenCalledWith(ctx);
expect(mockProcessImageMessage).not.toHaveBeenCalled();
});
});
Loading