Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion docs/english/concepts/message-listening.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,21 @@ def say_hello_regex(say, context):
# regular expression matches are inside of context.matches
greeting = context['matches'][0]
say(f"{greeting}, how are you?")
```
```

## Parsing references in message text

Retrieved Slack messages can contain mrkdwn references such as user mentions, channel links, user group mentions, dates, and URLs. To parse these references from a message's `text` value, use `parse_slack_references()`:

```python
from slack_bolt.message_references import parse_slack_references


@app.message("hello")
def handle_message(message, say):
references = parse_slack_references(message["text"])
user_ids = [ref.id for ref in references if ref.type == "user"]
say(f"Found {len(user_ids)} user mention(s)")
```

The parser only extracts the IDs and labels present in the text. To retrieve the latest names or other entity details, use the Web API methods such as `users.info`, `conversations.info`, or `usergroups.list`.
189 changes: 189 additions & 0 deletions slack_bolt/message_references.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
"""Utilities for parsing Slack references in message text."""

import re
from dataclasses import dataclass
from typing import List, Optional, Tuple

USER_REFERENCE = "user"
CHANNEL_REFERENCE = "channel"
USERGROUP_REFERENCE = "usergroup"
SPECIAL_MENTION_REFERENCE = "special_mention"
DATE_REFERENCE = "date"
LINK_REFERENCE = "link"
UNKNOWN_REFERENCE = "unknown"

_ANGLE_BRACKET_REFERENCE_PATTERN = re.compile(r"<([^<>\n]+)>")
_SPECIAL_MENTION_NAMES = {"here", "channel", "everyone"}

__all__ = [
"CHANNEL_REFERENCE",
"DATE_REFERENCE",
"LINK_REFERENCE",
"SPECIAL_MENTION_REFERENCE",
"UNKNOWN_REFERENCE",
"USER_REFERENCE",
"USERGROUP_REFERENCE",
"SlackMessageReference",
"extract_channel_ids",
"extract_user_ids",
"extract_usergroup_ids",
"parse_slack_references",
]


@dataclass(frozen=True)
class SlackMessageReference:
"""A Slack mrkdwn reference found inside a message text string."""

type: str
raw: str
start: int
end: int
id: Optional[str] = None
label: Optional[str] = None
url: Optional[str] = None
special_mention: Optional[str] = None
timestamp: Optional[str] = None
date_format: Optional[str] = None
fallback: Optional[str] = None


def parse_slack_references(text: str) -> List[SlackMessageReference]:
"""Parses Slack mrkdwn references such as user mentions, channel links, and URLs.

This helper only parses the syntax present in the text. It does not resolve
IDs to their latest names; use the Slack Web API for current entity data.
"""
return [_to_slack_message_reference(match) for match in _ANGLE_BRACKET_REFERENCE_PATTERN.finditer(text)]


def extract_user_ids(text: str) -> List[str]:
return [ref.id for ref in parse_slack_references(text) if ref.type == USER_REFERENCE and ref.id is not None]


def extract_channel_ids(text: str) -> List[str]:
return [ref.id for ref in parse_slack_references(text) if ref.type == CHANNEL_REFERENCE and ref.id is not None]


def extract_usergroup_ids(text: str) -> List[str]:
return [ref.id for ref in parse_slack_references(text) if ref.type == USERGROUP_REFERENCE and ref.id is not None]


def _to_slack_message_reference(match: re.Match) -> SlackMessageReference:
raw = match.group(0)
inner = match.group(1)
start, end = match.span()
target, label = _split_label(inner)

if target.startswith("@"):
return _to_user_reference(target=target, label=label, raw=raw, start=start, end=end)

if target.startswith("#"):
return _to_channel_reference(target=target, label=label, raw=raw, start=start, end=end)

if target.startswith("!subteam^"):
return _to_usergroup_reference(target=target, label=label, raw=raw, start=start, end=end)

if target.startswith("!date^"):
return _to_date_reference(target=target, fallback=label, raw=raw, start=start, end=end)

if target.startswith("!"):
return _to_special_mention_reference(target=target, label=label, raw=raw, start=start, end=end)

return SlackMessageReference(type=LINK_REFERENCE, raw=raw, start=start, end=end, url=target, label=label)


def _to_user_reference(
*,
target: str,
label: Optional[str],
raw: str,
start: int,
end: int,
) -> SlackMessageReference:
user_id = target[1:]
if not user_id.startswith(("U", "W")):
return SlackMessageReference(type=UNKNOWN_REFERENCE, raw=raw, start=start, end=end, label=label)
return SlackMessageReference(type=USER_REFERENCE, raw=raw, start=start, end=end, id=user_id, label=label)


def _to_channel_reference(
*,
target: str,
label: Optional[str],
raw: str,
start: int,
end: int,
) -> SlackMessageReference:
channel_id = target[1:]
if channel_id == "":
return SlackMessageReference(type=UNKNOWN_REFERENCE, raw=raw, start=start, end=end, label=label)
return SlackMessageReference(type=CHANNEL_REFERENCE, raw=raw, start=start, end=end, id=channel_id, label=label)


def _to_usergroup_reference(
*,
target: str,
label: Optional[str],
raw: str,
start: int,
end: int,
) -> SlackMessageReference:
prefix_length = len("!subteam^")
usergroup_id = target[prefix_length:]
if usergroup_id == "":
return SlackMessageReference(type=UNKNOWN_REFERENCE, raw=raw, start=start, end=end, label=label)
return SlackMessageReference(type=USERGROUP_REFERENCE, raw=raw, start=start, end=end, id=usergroup_id, label=label)


def _to_special_mention_reference(
*,
target: str,
label: Optional[str],
raw: str,
start: int,
end: int,
) -> SlackMessageReference:
name = target[1:]
if name not in _SPECIAL_MENTION_NAMES:
return SlackMessageReference(type=UNKNOWN_REFERENCE, raw=raw, start=start, end=end, label=label)
return SlackMessageReference(
type=SPECIAL_MENTION_REFERENCE,
raw=raw,
start=start,
end=end,
label=label,
special_mention=name,
)


def _to_date_reference(
*,
target: str,
fallback: Optional[str],
raw: str,
start: int,
end: int,
) -> SlackMessageReference:
elements = target.split("^", 3)
if len(elements) < 3 or elements[1] == "" or elements[2] == "":
return SlackMessageReference(type=UNKNOWN_REFERENCE, raw=raw, start=start, end=end, fallback=fallback)

url = elements[3] if len(elements) > 3 and elements[3] != "" else None
return SlackMessageReference(
type=DATE_REFERENCE,
raw=raw,
start=start,
end=end,
timestamp=elements[1],
date_format=elements[2],
url=url,
fallback=fallback,
)


def _split_label(value: str) -> Tuple[str, Optional[str]]:
if "|" in value:
target, label = value.split("|", 1)
return target, label
return value, None
158 changes: 158 additions & 0 deletions tests/slack_bolt/test_message_references.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
from slack_bolt.message_references import (
CHANNEL_REFERENCE,
DATE_REFERENCE,
LINK_REFERENCE,
SPECIAL_MENTION_REFERENCE,
UNKNOWN_REFERENCE,
USER_REFERENCE,
USERGROUP_REFERENCE,
SlackMessageReference,
extract_channel_ids,
extract_user_ids,
extract_usergroup_ids,
parse_slack_references,
)


class TestMessageReferences:
def test_parse_slack_references(self):
text = (
"Hi <@U01V09UNAJZ|some_user>, join <#C123ABC456|general>, "
"ping <!subteam^SAZ94GDB8|ops>, <!here>, see <https://example.com|docs>, "
"email <mailto:perihelion@example.com|Email Perihelion>, "
"and note <!date^1392734382^{date_short}^https://example.com/|Feb 18, 2014 PST>."
)

references = parse_slack_references(text)

assert references == [
SlackMessageReference(
type=USER_REFERENCE,
raw="<@U01V09UNAJZ|some_user>",
start=text.index("<@U01V09UNAJZ|some_user>"),
end=text.index("<@U01V09UNAJZ|some_user>") + len("<@U01V09UNAJZ|some_user>"),
id="U01V09UNAJZ",
label="some_user",
),
SlackMessageReference(
type=CHANNEL_REFERENCE,
raw="<#C123ABC456|general>",
start=text.index("<#C123ABC456|general>"),
end=text.index("<#C123ABC456|general>") + len("<#C123ABC456|general>"),
id="C123ABC456",
label="general",
),
SlackMessageReference(
type=USERGROUP_REFERENCE,
raw="<!subteam^SAZ94GDB8|ops>",
start=text.index("<!subteam^SAZ94GDB8|ops>"),
end=text.index("<!subteam^SAZ94GDB8|ops>") + len("<!subteam^SAZ94GDB8|ops>"),
id="SAZ94GDB8",
label="ops",
),
SlackMessageReference(
type=SPECIAL_MENTION_REFERENCE,
raw="<!here>",
start=text.index("<!here>"),
end=text.index("<!here>") + len("<!here>"),
special_mention="here",
),
SlackMessageReference(
type=LINK_REFERENCE,
raw="<https://example.com|docs>",
start=text.index("<https://example.com|docs>"),
end=text.index("<https://example.com|docs>") + len("<https://example.com|docs>"),
url="https://example.com",
label="docs",
),
SlackMessageReference(
type=LINK_REFERENCE,
raw="<mailto:perihelion@example.com|Email Perihelion>",
start=text.index("<mailto:perihelion@example.com|Email Perihelion>"),
end=text.index("<mailto:perihelion@example.com|Email Perihelion>")
+ len("<mailto:perihelion@example.com|Email Perihelion>"),
url="mailto:perihelion@example.com",
label="Email Perihelion",
),
SlackMessageReference(
type=DATE_REFERENCE,
raw="<!date^1392734382^{date_short}^https://example.com/|Feb 18, 2014 PST>",
start=text.index("<!date^1392734382^{date_short}^https://example.com/|Feb 18, 2014 PST>"),
end=text.index("<!date^1392734382^{date_short}^https://example.com/|Feb 18, 2014 PST>")
+ len("<!date^1392734382^{date_short}^https://example.com/|Feb 18, 2014 PST>"),
timestamp="1392734382",
date_format="{date_short}",
url="https://example.com/",
fallback="Feb 18, 2014 PST",
),
]

for reference in references:
assert text[reference.start : reference.end] == reference.raw

def test_parse_slack_references_without_labels(self):
text = "<@W123> <#G123> <!subteam^S123> <https://example.com>"

references = parse_slack_references(text)

assert references == [
SlackMessageReference(type=USER_REFERENCE, raw="<@W123>", start=0, end=7, id="W123"),
SlackMessageReference(type=CHANNEL_REFERENCE, raw="<#G123>", start=8, end=15, id="G123"),
SlackMessageReference(type=USERGROUP_REFERENCE, raw="<!subteam^S123>", start=16, end=31, id="S123"),
SlackMessageReference(
type=LINK_REFERENCE,
raw="<https://example.com>",
start=32,
end=53,
url="https://example.com",
),
]

def test_extract_ids(self):
text = "<@U111> <@W222|person> <#C111|general> <!subteam^S111|ops> <https://example.com>"

assert extract_user_ids(text) == ["U111", "W222"]
assert extract_channel_ids(text) == ["C111"]
assert extract_usergroup_ids(text) == ["S111"]

def test_parse_unknown_references(self):
text = "<@B123|bot> <!unknown|label> <!date^^{date_short}|fallback>"

references = parse_slack_references(text)

assert references == [
SlackMessageReference(type=UNKNOWN_REFERENCE, raw="<@B123|bot>", start=0, end=11, label="bot"),
SlackMessageReference(type=UNKNOWN_REFERENCE, raw="<!unknown|label>", start=12, end=28, label="label"),
SlackMessageReference(
type=UNKNOWN_REFERENCE,
raw="<!date^^{date_short}|fallback>",
start=29,
end=59,
fallback="fallback",
),
]

def test_parse_date_reference_with_caret_in_url(self):
text = "<!date^1392734382^{date_short}^https://example.com/a^b|Feb 18, 2014 PST>"

references = parse_slack_references(text)

assert references == [
SlackMessageReference(
type=DATE_REFERENCE,
raw=text,
start=0,
end=len(text),
timestamp="1392734382",
date_format="{date_short}",
url="https://example.com/a^b",
fallback="Feb 18, 2014 PST",
)
]

def test_does_not_parse_escaped_or_multiline_angle_brackets(self):
text = "&lt;@U111&gt; <@U222\n> <@U333>"

assert parse_slack_references(text) == [
SlackMessageReference(type=USER_REFERENCE, raw="<@U333>", start=23, end=30, id="U333")
]