Skip to content

Commit e0f3686

Browse files
authored
Add MessageEntity.shift_entities and MessageEntity.concatenate (#4376)
1 parent 01f6893 commit e0f3686

File tree

2 files changed

+190
-5
lines changed

2 files changed

+190
-5
lines changed

telegram/_messageentity.py

Lines changed: 142 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
import copy
2222
import itertools
23-
from typing import TYPE_CHECKING, Dict, Final, List, Optional, Sequence
23+
from typing import TYPE_CHECKING, Dict, Final, List, Optional, Sequence, Tuple, Union
2424

2525
from telegram import constants
2626
from telegram._telegramobject import TelegramObject
@@ -32,6 +32,8 @@
3232
if TYPE_CHECKING:
3333
from telegram import Bot
3434

35+
_SEM = Sequence["MessageEntity"]
36+
3537

3638
class MessageEntity(TelegramObject):
3739
"""
@@ -146,9 +148,7 @@ def de_json(
146148
return super().de_json(data=data, bot=bot)
147149

148150
@staticmethod
149-
def adjust_message_entities_to_utf_16(
150-
text: str, entities: Sequence["MessageEntity"]
151-
) -> Sequence["MessageEntity"]:
151+
def adjust_message_entities_to_utf_16(text: str, entities: _SEM) -> _SEM:
152152
"""Utility functionality for converting the offset and length of entities from
153153
Unicode (:obj:`str`) to UTF-16 (``utf-16-le`` encoded :obj:`bytes`).
154154
@@ -206,7 +206,7 @@ def adjust_message_entities_to_utf_16(
206206
text_slice = text[last_position:position]
207207
accumulated_length += len(text_slice.encode(TextEncoding.UTF_16_LE)) // 2
208208
position_translation[position] = accumulated_length
209-
# get the final output entites
209+
# get the final output entities
210210
out = []
211211
for entity in entities:
212212
translated_positions = position_translation[entity.offset]
@@ -220,6 +220,143 @@ def adjust_message_entities_to_utf_16(
220220
out.append(new_entity)
221221
return out
222222

223+
@staticmethod
224+
def shift_entities(by: Union[str, int], entities: _SEM) -> _SEM:
225+
"""Utility functionality for shifting the offset of entities by a given amount.
226+
227+
Examples:
228+
Shifting by an integer amount:
229+
230+
.. code-block:: python
231+
232+
text = "Hello, world!"
233+
entities = [
234+
MessageEntity(offset=0, length=5, type=MessageEntity.BOLD),
235+
MessageEntity(offset=7, length=5, type=MessageEntity.ITALIC),
236+
]
237+
shifted_entities = MessageEntity.shift_entities(1, entities)
238+
await bot.send_message(
239+
chat_id=123,
240+
text="!" + text,
241+
entities=shifted_entities,
242+
)
243+
244+
Shifting using a string:
245+
246+
.. code-block:: python
247+
248+
text = "Hello, world!"
249+
prefix = "𝄢"
250+
entities = [
251+
MessageEntity(offset=0, length=5, type=MessageEntity.BOLD),
252+
MessageEntity(offset=7, length=5, type=MessageEntity.ITALIC),
253+
]
254+
shifted_entities = MessageEntity.shift_entities(prefix, entities)
255+
await bot.send_message(
256+
chat_id=123,
257+
text=prefix + text,
258+
entities=shifted_entities,
259+
)
260+
261+
Tip:
262+
The :paramref:`entities` are *not* modified in place. The function returns a sequence
263+
of new objects.
264+
265+
.. versionadded:: NEXT.VERSION
266+
267+
Args:
268+
by (:obj:`str` | :obj:`int`): Either the amount to shift the offset by or
269+
a string whose length will be used as the amount to shift the offset by. In this
270+
case, UTF-16 encoding will be used to calculate the length.
271+
entities (Sequence[:class:`telegram.MessageEntity`]): Sequence of entities
272+
273+
Returns:
274+
Sequence[:class:`telegram.MessageEntity`]: Sequence of entities with the offset shifted
275+
"""
276+
effective_shift = by if isinstance(by, int) else len(by.encode("utf-16-le")) // 2
277+
278+
out = []
279+
for entity in entities:
280+
new_entity = copy.copy(entity)
281+
with new_entity._unfrozen():
282+
new_entity.offset += effective_shift
283+
out.append(new_entity)
284+
return out
285+
286+
@classmethod
287+
def concatenate(
288+
cls,
289+
*args: Union[Tuple[str, _SEM], Tuple[str, _SEM, bool]],
290+
) -> Tuple[str, _SEM]:
291+
"""Utility functionality for concatenating two text along with their formatting entities.
292+
293+
Tip:
294+
This function is useful for prefixing an already formatted text with a new text and its
295+
formatting entities. In particular, it automatically correctly handles UTF-16 encoding.
296+
297+
Examples:
298+
This example shows a callback function that can be used to add a prefix and suffix to
299+
the message in a :class:`~telegram.ext.CallbackQueryHandler`:
300+
301+
.. code-block:: python
302+
303+
async def prefix_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
304+
prefix = "𠌕 bold 𝄢 italic underlined: 𝛙𝌢𑁍 | "
305+
prefix_entities = [
306+
MessageEntity(offset=2, length=4, type=MessageEntity.BOLD),
307+
MessageEntity(offset=9, length=6, type=MessageEntity.ITALIC),
308+
MessageEntity(offset=28, length=3, type=MessageEntity.UNDERLINE),
309+
]
310+
suffix = " | 𠌕 bold 𝄢 italic underlined: 𝛙𝌢𑁍"
311+
suffix_entities = [
312+
MessageEntity(offset=5, length=4, type=MessageEntity.BOLD),
313+
MessageEntity(offset=12, length=6, type=MessageEntity.ITALIC),
314+
MessageEntity(offset=31, length=3, type=MessageEntity.UNDERLINE),
315+
]
316+
317+
message = update.effective_message
318+
first = (prefix, prefix_entities, True)
319+
second = (message.text, message.entities)
320+
third = (suffix, suffix_entities, True)
321+
322+
new_text, new_entities = MessageEntity.concatenate(first, second, third)
323+
await update.callback_query.edit_message_text(
324+
text=new_text,
325+
entities=new_entities,
326+
)
327+
328+
Hint:
329+
The entities are *not* modified in place. The function returns a
330+
new sequence of objects.
331+
332+
.. versionadded:: NEXT.VERSION
333+
334+
Args:
335+
*args (Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`]] | \
336+
Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`], :obj:`bool`]):
337+
Arbitrary number of tuples containing the text and its entities to concatenate.
338+
If the last element of the tuple is a :obj:`bool`, it is used to determine whether
339+
to adjust the entities to UTF-16 via
340+
:meth:`adjust_message_entities_to_utf_16`. UTF-16 adjustment is disabled by
341+
default.
342+
343+
Returns:
344+
Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`]]: The concatenated text
345+
and its entities
346+
"""
347+
output_text = ""
348+
output_entities: List[MessageEntity] = []
349+
for arg in args:
350+
text, entities = arg[0], arg[1]
351+
352+
if len(arg) > 2 and arg[2] is True:
353+
entities = cls.adjust_message_entities_to_utf_16(text, entities)
354+
355+
output_entities.extend(cls.shift_entities(output_text, entities))
356+
output_text += text
357+
358+
return output_text, output_entities
359+
223360
ALL_TYPES: Final[List[str]] = list(constants.MessageEntityType)
224361
"""List[:obj:`str`]: A list of all available message entity types."""
225362
BLOCKQUOTE: Final[str] = constants.MessageEntityType.BLOCKQUOTE

tests/test_messageentity.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,54 @@ def test_fix_utf16(self):
103103
assert out_entity.offset == offset
104104
assert out_entity.length == length
105105

106+
@pytest.mark.parametrize("by", [6, "prefix", "𝛙𝌢𑁍"])
107+
def test_shift_entities(self, by):
108+
kwargs = {
109+
"url": "url",
110+
"user": 42,
111+
"language": "python",
112+
"custom_emoji_id": "custom_emoji_id",
113+
}
114+
entities = [
115+
MessageEntity(MessageEntity.BOLD, 2, 3, **kwargs),
116+
MessageEntity(MessageEntity.BOLD, 5, 6, **kwargs),
117+
]
118+
shifted = MessageEntity.shift_entities(by, entities)
119+
assert shifted[0].offset == 8
120+
assert shifted[1].offset == 11
121+
122+
assert shifted[0] is not entities[0]
123+
assert shifted[1] is not entities[1]
124+
125+
for entity in shifted:
126+
for key, value in kwargs.items():
127+
assert getattr(entity, key) == value
128+
129+
def test_concatenate(self):
130+
kwargs = {
131+
"url": "url",
132+
"user": 42,
133+
"language": "python",
134+
"custom_emoji_id": "custom_emoji_id",
135+
}
136+
first_entity = MessageEntity(MessageEntity.BOLD, 0, 6, **kwargs)
137+
second_entity = MessageEntity(MessageEntity.ITALIC, 0, 4, **kwargs)
138+
third_entity = MessageEntity(MessageEntity.UNDERLINE, 3, 6, **kwargs)
139+
140+
first = ("prefix 𝛙𝌢𑁍 | ", [first_entity], True)
141+
second = ("text 𝛙𝌢𑁍", [second_entity], False)
142+
third = (" | suffix 𝛙𝌢𑁍", [third_entity])
143+
144+
new_text, new_entities = MessageEntity.concatenate(first, second, third)
145+
146+
assert new_text == "prefix 𝛙𝌢𑁍 | text 𝛙𝌢𑁍 | suffix 𝛙𝌢𑁍"
147+
assert [entity.offset for entity in new_entities] == [0, 16, 30]
148+
for old, new in zip([first_entity, second_entity, third_entity], new_entities):
149+
assert new is not old
150+
assert new.type == old.type
151+
for key, value in kwargs.items():
152+
assert getattr(new, key) == value
153+
106154
def test_equality(self):
107155
a = MessageEntity(MessageEntity.BOLD, 2, 3)
108156
b = MessageEntity(MessageEntity.BOLD, 2, 3)

0 commit comments

Comments
 (0)