diff --git a/README.md b/README.md index d0d189d9..64a3252b 100644 --- a/README.md +++ b/README.md @@ -68,28 +68,28 @@ The following parameters are optional and can be set in the `.env` file: Check out the [Budget Manual](https://github.com/n3d1117/chatgpt-telegram-bot/discussions/184) for possible budget configurations. #### Additional optional configuration options -| Parameter | Description | Default value | -|------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------| -| `ENABLE_QUOTING` | Whether to enable message quoting in private chats | `true` | -| `ENABLE_IMAGE_GENERATION` | Whether to enable image generation via the `/image` command | `true` | -| `ENABLE_TRANSCRIPTION` | Whether to enable transcriptions of audio and video messages | `true` | -| `PROXY` | Proxy to be used for OpenAI and Telegram bot (e.g. `http://localhost:8080`) | - | -| `OPENAI_MODEL` | The OpenAI model to use for generating responses | `gpt-3.5-turbo` | -| `ASSISTANT_PROMPT` | A system message that sets the tone and controls the behavior of the assistant | `You are a helpful assistant.` | -| `SHOW_USAGE` | Whether to show OpenAI token usage information after each response | `false` | -| `STREAM` | Whether to stream responses. **Note**: incompatible, if enabled, with `N_CHOICES` higher than 1 | `true` | -| `MAX_TOKENS` | Upper bound on how many tokens the ChatGPT API will return | `1200` for GPT-3, `2400` for GPT-4 | -| `MAX_HISTORY_SIZE` | Max number of messages to keep in memory, after which the conversation will be summarised to avoid excessive token usage | `15` | -| `MAX_CONVERSATION_AGE_MINUTES` | Maximum number of minutes a conversation should live since the last message, after which the conversation will be reset | `180` | -| `VOICE_REPLY_WITH_TRANSCRIPT_ONLY` | Whether to answer to voice messages with the transcript only or with a ChatGPT response of the transcript | `false` | -| `VOICE_REPLY_PROMPTS` | A semicolon separated list of phrases (i.e. `Hi bot;Hello chat`). If the transcript starts with any of them, it will be treated as a prompt even if `VOICE_REPLY_WITH_TRANSCRIPT_ONLY` is set to `true` | - | -| `N_CHOICES` | Number of answers to generate for each input message. **Note**: setting this to a number higher than 1 will not work properly if `STREAM` is enabled | `1` | -| `TEMPERATURE` | Number between 0 and 2. Higher values will make the output more random | `1.0` | -| `PRESENCE_PENALTY` | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far | `0.0` | -| `FREQUENCY_PENALTY` | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far | `0.0` | -| `IMAGE_SIZE` | The DALL·E generated image size. Allowed values: `256x256`, `512x512` or `1024x1024` | `512x512` | -| `GROUP_TRIGGER_KEYWORD` | If set, the bot in group chats will only respond to messages that start with this keyword | - | -| `IGNORE_GROUP_TRANSCRIPTIONS` | If set to true, the bot will not process transcriptions in group chats | `true` | +| Parameter | Description | Default value | +|------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------| +| `ENABLE_QUOTING` | Whether to enable message quoting in private chats | `true` | +| `ENABLE_IMAGE_GENERATION` | Whether to enable image generation via the `/image` command | `true` | +| `ENABLE_TRANSCRIPTION` | Whether to enable transcriptions of audio and video messages | `true` | +| `PROXY` | Proxy to be used for OpenAI and Telegram bot (e.g. `http://localhost:8080`) | - | +| `OPENAI_MODEL` | The OpenAI model to use for generating responses. You can find all available models [here](https://platform.openai.com/docs/models/) | `gpt-3.5-turbo` | +| `ASSISTANT_PROMPT` | A system message that sets the tone and controls the behavior of the assistant | `You are a helpful assistant.` | +| `SHOW_USAGE` | Whether to show OpenAI token usage information after each response | `false` | +| `STREAM` | Whether to stream responses. **Note**: incompatible, if enabled, with `N_CHOICES` higher than 1 | `true` | +| `MAX_TOKENS` | Upper bound on how many tokens the ChatGPT API will return | `1200` for GPT-3, `2400` for GPT-4 | +| `MAX_HISTORY_SIZE` | Max number of messages to keep in memory, after which the conversation will be summarised to avoid excessive token usage | `15` | +| `MAX_CONVERSATION_AGE_MINUTES` | Maximum number of minutes a conversation should live since the last message, after which the conversation will be reset | `180` | +| `VOICE_REPLY_WITH_TRANSCRIPT_ONLY` | Whether to answer to voice messages with the transcript only or with a ChatGPT response of the transcript | `false` | +| `VOICE_REPLY_PROMPTS` | A semicolon separated list of phrases (i.e. `Hi bot;Hello chat`). If the transcript starts with any of them, it will be treated as a prompt even if `VOICE_REPLY_WITH_TRANSCRIPT_ONLY` is set to `true` | - | +| `N_CHOICES` | Number of answers to generate for each input message. **Note**: setting this to a number higher than 1 will not work properly if `STREAM` is enabled | `1` | +| `TEMPERATURE` | Number between 0 and 2. Higher values will make the output more random | `1.0` | +| `PRESENCE_PENALTY` | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far | `0.0` | +| `FREQUENCY_PENALTY` | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far | `0.0` | +| `IMAGE_SIZE` | The DALL·E generated image size. Allowed values: `256x256`, `512x512` or `1024x1024` | `512x512` | +| `GROUP_TRIGGER_KEYWORD` | If set, the bot in group chats will only respond to messages that start with this keyword | - | +| `IGNORE_GROUP_TRANSCRIPTIONS` | If set to true, the bot will not process transcriptions in group chats | `true` | | `BOT_LANGUAGE` | Language of general bot messages. Currently available: `en`, `de`, `ru`, `tr`, `it`, `fi`, `es`, `id`, `nl`, `zh-cn`, `zh-tw`, `vi`, `fa`, `pt-br`, `uk`. [Contribute with additional translations](https://github.com/n3d1117/chatgpt-telegram-bot/discussions/219) | `en` | Check out the [official API reference](https://platform.openai.com/docs/api-reference/chat) for more details. diff --git a/bot/openai_helper.py b/bot/openai_helper.py index 8f9b05e9..63b2528a 100644 --- a/bot/openai_helper.py +++ b/bot/openai_helper.py @@ -13,10 +13,11 @@ from calendar import monthrange # Models can be found here: https://platform.openai.com/docs/models/overview -GPT_3_MODELS = ("gpt-3.5-turbo", "gpt-3.5-turbo-0301") -GPT_4_MODELS = ("gpt-4", "gpt-4-0314") -GPT_4_32K_MODELS = ("gpt-4-32k", "gpt-4-32k-0314") -GPT_ALL_MODELS = GPT_3_MODELS + GPT_4_MODELS + GPT_4_32K_MODELS +GPT_3_MODELS = ("gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613") +GPT_3_16K_MODELS = ("gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613") +GPT_4_MODELS = ("gpt-4", "gpt-4-0314", "gpt-4-0613") +GPT_4_32K_MODELS = ("gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613") +GPT_ALL_MODELS = GPT_3_MODELS + GPT_3_16K_MODELS + GPT_4_MODELS + GPT_4_32K_MODELS def default_max_tokens(model: str) -> int: @@ -25,7 +26,16 @@ def default_max_tokens(model: str) -> int: :param model: The model name :return: The default number of max tokens """ - return 1200 if model in GPT_3_MODELS else 2400 + base = 1200 + if model in GPT_3_MODELS: + return base + elif model in GPT_4_MODELS: + return base * 2 + elif model in GPT_3_16K_MODELS: + return base * 4 + elif model in GPT_4_32K_MODELS: + return base * 8 + # Load translations parent_dir_path = os.path.join(os.path.dirname(__file__), os.pardir) @@ -33,6 +43,7 @@ def default_max_tokens(model: str) -> int: with open(translations_file_path, 'r', encoding='utf-8') as f: translations = json.load(f) + def localized_text(key, bot_language): """ Return translated text for a key in specified bot_language. @@ -272,12 +283,15 @@ async def __summarise(self, conversation) -> str: return response.choices[0]['message']['content'] def __max_model_tokens(self): + base = 4096 if self.config['model'] in GPT_3_MODELS: - return 4096 + return base + if self.config['model'] in GPT_3_16K_MODELS: + return base * 4 if self.config['model'] in GPT_4_MODELS: - return 8192 + return base * 2 if self.config['model'] in GPT_4_32K_MODELS: - return 32768 + return base * 8 raise NotImplementedError( f"Max tokens for model {self.config['model']} is not implemented yet." ) @@ -295,7 +309,7 @@ def __count_tokens(self, messages) -> int: except KeyError: encoding = tiktoken.get_encoding("gpt-3.5-turbo") - if model in GPT_3_MODELS: + if model in GPT_3_MODELS + GPT_3_16K_MODELS: tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n tokens_per_name = -1 # if there's a name, the role is omitted elif model in GPT_4_MODELS + GPT_4_32K_MODELS: diff --git a/requirements.txt b/requirements.txt index 9f43072a..433aa5fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ python-dotenv~=1.0.0 pydub~=0.25.1 -tiktoken==0.3.3 -openai==0.27.4 +tiktoken==0.4.0 +openai==0.27.8 python-telegram-bot==20.2 \ No newline at end of file