-
Notifications
You must be signed in to change notification settings - Fork 56
/
bluesky.py
1902 lines (1597 loc) · 60.4 KB
/
bluesky.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Bluesky source class.
* https://bsky.app/
* https://atproto.com/lexicons/app-bsky-actor
* https://github.com/bluesky-social/atproto/tree/main/lexicons/app/bsky
"""
import copy
from datetime import datetime, timezone
import json
import logging
from pathlib import Path
import re
import string
import urllib.parse
import requests
from lexrpc import Client
from lexrpc.base import Base, NSID_RE
from oauth_dropins.webutil import util
from oauth_dropins.webutil.util import trim_nulls
from . import as1
from .source import FRIENDS, html_to_text, Source, OMIT_LINK, creation_result
logger = logging.getLogger(__name__)
# via https://atproto.com/specs/handle
HANDLE_REGEX = (
r'([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+'
r'[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$'
)
HANDLE_PATTERN = re.compile(r'^' + HANDLE_REGEX)
DID_WEB_PATTERN = re.compile(r'^did:web:' + HANDLE_REGEX)
# at:// URI regexp
# https://atproto.com/specs/at-uri-scheme#full-at-uri-syntax
# https://atproto.com/specs/record-key#record-key-syntax
# https://atproto.com/specs/nsid
# also see arroba.util.parse_at_uri
_CHARS = 'a-zA-Z0-9-.:'
# TODO: add query and fragment? they're currently unused in the protocol
# https://atproto.com/specs/at-uri-scheme#structure
AT_URI_PATTERN = re.compile(rf"""
^at://
(?P<repo>[{_CHARS}]+)
(?:/(?P<collection>[a-zA-Z0-9-.]+)
(?:/(?P<rkey>[{_CHARS}]+))?)?
$""", re.VERBOSE)
# Maps AT Protocol NSID collections to path elements in bsky.app URLs.
# Used in at_uri_to_web_url.
#
# eg for mapping a URI like:
# at://did:plc:z72i7hd/app.bsky.feed.generator/mutuals
# to a frontend URL like:
# https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur/feed/mutuals
COLLECTION_TO_BSKY_APP_TYPE = {
'app.bsky.feed.generator': 'feed',
'app.bsky.feed.post': 'post',
'app.bsky.graph.list': 'lists',
}
BSKY_APP_TYPE_TO_COLLECTION = {
name: coll for coll, name in COLLECTION_TO_BSKY_APP_TYPE.items()
}
# TODO: load from app.bsky.embed.images lexicon
# https://github.com/snarfed/atproto/blob/c0489626327e1ac9c08961ad9ce828793d0d1d43/lexicons/app/bsky/embed/images.json#L13
MAX_IMAGES = 4
# maps AS1 objectType/verb to possible output Bluesky lexicon types.
# used in from_as1
POST_TYPES = tuple(as1.POST_TYPES) + ('bookmark',)
FROM_AS1_TYPES = {
as1.ACTOR_TYPES: (
'app.bsky.actor.profile',
'app.bsky.actor.defs#profileView',
'app.bsky.actor.defs#profileViewBasic',
'app.bsky.actor.defs#profileViewDetailed',
),
POST_TYPES: (
'app.bsky.feed.post',
'app.bsky.feed.defs#feedViewPost',
'app.bsky.feed.defs#postView',
),
('block',): (
'app.bsky.graph.block',
),
('flag',): (
'com.atproto.moderation.createReport#input',
),
('follow',): (
'app.bsky.graph.follow',
),
('share',): (
'app.bsky.feed.repost',
'app.bsky.feed.defs#feedViewPost',
'app.bsky.feed.defs#reasonRepost',
),
}
BSKY_APP_URL_RE = re.compile(r"""
^https://(staging\.)?bsky\.app
/profile/(?P<id>[^/]+)
(/(?P<type>post|feed)
/(?P<tid>[^?]+))?$
""", re.VERBOSE)
DEFAULT_PDS_DOMAIN = 'bsky.social'
DEFAULT_PDS = f'https://{DEFAULT_PDS_DOMAIN}/'
DEFAULT_APPVIEW = 'https://api.bsky.app'
# label on profiles set to only show them to logged in users
# https://bsky.app/profile/safety.bsky.app/post/3khhw7s3rtx2s
# https://docs.bsky.app/docs/advanced-guides/resolving-identities#for-backend-services
# https://github.com/bluesky-social/atproto/blob/main/packages/api/docs/labels.md#label-behaviors
NO_AUTHENTICATED_LABEL = '!no-unauthenticated'
LEXRPC_BASE = Base(truncate=True)
# TODO: html2text doesn't escape ]s in link text, which breaks this, eg
# <a href="http://post">ba](r</a> turns into [ba](r](http://post)
MARKDOWN_LINK_RE = re.compile(r'\[(?P<text>.*?)\]\((?P<url>.*?)\)')
def url_to_did_web(url):
"""Converts a URL to a ``did:web``.
In AT Proto, only hostname-based web DIDs are supported.
Paths are not supported, and will be discarded.
https://atproto.com/specs/did
Examples:
* ``https://foo.com`` => ``did:web:foo.com``
* ``https://foo.com:3000`` => ``did:web:foo.com``
* ``https://foo.bar.com/baz/baj`` => ``did:web:foo.bar.com``
Args:
url (str)
Returns:
str:
"""
parsed = urllib.parse.urlparse(url)
if not parsed.hostname:
raise ValueError(f'Invalid URL: {url}')
if parsed.netloc != parsed.hostname:
logger.warning(f"URL {url} contained a port, which will not be included in the DID.")
if parsed.path and parsed.path != "/":
logger.warning(f"URL {url} contained a path, which will not be included in the DID.")
return f'did:web:{parsed.hostname}'
def did_web_to_url(did):
"""Converts a did:web to a URL.
In AT Proto, only hostname-based web DIDs are supported.
Paths are not supported, and will throw an invalid error.
Examples:
* ``did:web:foo.com`` => ``https://foo.com``
* ``did:web:foo.com%3A3000`` => INVALID
* ``did:web:bar.com:baz:baj`` => INVALID
https://atproto.com/specs/did
Args:
did (str)
Returns:
str:
"""
if not did or not DID_WEB_PATTERN.match(did):
raise ValueError(f'Invalid did:web: {did}')
host = did.removeprefix('did:web:')
host = urllib.parse.unquote(host)
return f'https://{host}/'
def at_uri_to_web_url(uri, handle=None):
"""Converts an ``at://`` URI to a ``https://bsky.app`` URL.
https://atproto.com/specs/at-uri-scheme
Args:
uri (str): ``at://`` URI
handle: (str): optional user handle. If not provided, defaults to the DID in
uri.
Returns:
str: ``https://bsky.app`` URL, or None
Raises:
ValueError: if uri is not a string or doesn't start with ``at://``
"""
if not uri:
return None
if not uri.startswith('at://'):
raise ValueError(f'Expected at:// URI, got {uri}')
parsed = urllib.parse.urlparse(uri)
did = parsed.netloc
if not parsed.path:
return f'{Bluesky.user_url(handle or did)}'
collection, tid = parsed.path.strip('/').split('/')
type = COLLECTION_TO_BSKY_APP_TYPE.get(collection)
if not type:
return None
return f'{Bluesky.user_url(handle or did)}/{type}/{tid}'
def web_url_to_at_uri(url, handle=None, did=None):
"""Converts a ``https://bsky.app`` URL to an ``at://`` URI.
https://atproto.com/specs/at-uri-scheme
Currently supports profile, post, and feed URLs with DIDs and handles, eg:
* ``https://bsky.app/profile/did:plc:123abc``
* ``https://bsky.app/profile/vito.fyi/post/3jt7sst7vok2u``
* ``https://bsky.app/profile/bsky.app/feed/mutuals``
If both ``handle`` and ``did`` are provided, and ``handle`` matches the URL,
the handle in the resulting URI will be replaced with ``did``.
Args:
url (str): ``bsky.app`` URL
handle (str): Bluesky handle, or None
did (str): Valid DID, or None
Returns:
str: ``at://`` URI, or None
Raises:
ValueError: if ``url`` can't be parsed as a ``bsky.app`` profile or post URL
"""
if not url:
return None
match = BSKY_APP_URL_RE.match(url)
if not match:
raise ValueError(f"{url} doesn't look like a bsky.app profile or post URL")
id = match.group('id')
assert id
# If a did and handle have been provided explicitly,
# replace the existing handle with the did.
if did and handle and id == handle:
id = did
rkey = match.group('tid')
type = match.group('type')
if type:
collection = BSKY_APP_TYPE_TO_COLLECTION[type]
assert rkey
else:
collection = 'app.bsky.actor.profile'
rkey = 'self'
return f'at://{id}/{collection}/{rkey}'
def from_as1_to_strong_ref(obj, client=None, value=False):
"""Converts an AS1 object to an ATProto ``com.atproto.repo.strongRef``.
Uses AS1 ``id`` or ``url`, which should be an ``at://`` URI.
Args:
obj (dict): AS1 object or activity
client (lexrpc.Client): optional; if provided, this will be used to make API
calls to PDSes to fetch and populate the ``cid`` field and resolve handle
to DID.
value (bool): whether to include the record's ``value`` field in the
returned object
Returns:
dict: ATProto ``com.atproto.repo.strongRef`` record
"""
at_uri = Bluesky.post_id((obj.get('id') or as1.get_url(obj))
if isinstance(obj, dict) else obj
) or ''
match = AT_URI_PATTERN.match(at_uri)
if not match or not client:
return {
'uri': at_uri,
'cid': '',
}
repo, collection, rkey = match.groups()
if not repo.startswith('did:'):
handle = repo
repo = client.com.atproto.identity.resolveHandle(handle=handle)['did']
# only replace first instance of handle in case it's also in collection or rkey
at_uri = at_uri.replace(handle, repo, 1)
record = client.com.atproto.repo.getRecord(
repo=repo, collection=collection, rkey=rkey)
if not value:
record.pop('value', None)
return record
def from_as1_datetime(val):
"""Converts an AS1 RFC 3339 datetime string to ATProto ISO 8601.
Bluesky requires full date and time with time zone, recommends UTC with
Z suffix, fractional seconds.
https://atproto.com/specs/lexicon#datetime
Returns now (ie the current time) if the input datetime can't be parsed.
Args:
val (str): RFC 3339 datetime
Returns:
str: ATProto compatible ISO 8601 datetime
"""
dt = util.now()
if val:
try:
dt = util.parse_iso8601(val.strip())
except (AttributeError, TypeError, ValueError):
logging.debug(f"Couldn't parse {val} as ISO 8601; defaulting to current time")
if dt.tzinfo:
dt = util.as_utc(dt)
# else it's naive, assume it's UTC
assert dt.utcoffset() is None, dt.utcoffset()
return dt.isoformat(sep='T', timespec='milliseconds') + 'Z'
def base_object(obj):
"""Returns the "base" Bluesky object that an object operates on.
If the object is a reply, repost, or like of a Bluesky post, this returns
that post object. The id in the returned object is the AT protocol URI,
while the URL is the bsky.app web URL.
Args:
obj (dict): ActivityStreams object
Returns:
dict: minimal ActivityStreams object. Usually has at least ``id``; may
also have ``url``, ``author``, etc.
"""
for field in ('inReplyTo', 'object', 'target'):
for base in util.get_list(obj, field):
url = util.get_url(base)
if not url:
return {}
if url.startswith('https://bsky.app/'):
return {
'id': web_url_to_at_uri(url),
'url': url,
}
if url.startswith('at://'):
return {
'id': url,
'url': at_uri_to_web_url(url),
}
return {}
def from_as1(obj, out_type=None, blobs=None, client=None):
"""Converts an AS1 object to a Bluesky object.
Converts to ``record`` types by default, eg ``app.bsky.actor.profile`` or
``app.bsky.feed.post``. Use ``out_type`` to convert to a different type, eg
``app.bsky.actor.defs#profileViewBasic`` or ``app.bsky.feed.defs#feedViewPost``.
The ``objectType`` field is required.
If a string value in an output Bluesky object is longer than its
``maxGraphemes`` or ``maxLength`` in its lexicon, it's truncated with an ``…``
ellipsis character at the end in order to fit.
Args:
obj (dict): AS1 object or activity
out_type (str): desired output lexicon ``$type``
blobs (dict): optional mapping from str URL to ``blob`` dict to use in the
returned object. If not provided, or if this doesn't have an ``image`` or
similar URL in the input object, its output blob will be omitted.
client (Bluesky or lexrpc.Client): optional; if provided, this will be used
to make API calls to PDSes to fetch and populate CIDs for records
referenced by replies, likes, reposts, etc.
Returns:
dict: ``app.bsky.*`` object
Raises:
ValueError: if the ``objectType`` or ``verb`` fields are missing or
unsupported
"""
if isinstance(client, Bluesky):
client = client._client
activity = obj
inner_obj = as1.get_object(activity)
verb = activity.get('verb') or 'post'
if inner_obj and verb == 'post':
obj = inner_obj
type = as1.object_type(obj)
if not type:
raise ValueError(f"Missing objectType or verb")
actor = as1.get_object(activity, 'actor')
if blobs is None:
blobs = {}
# validate out_type
if out_type:
for in_types, out_types in FROM_AS1_TYPES.items():
if type in in_types and out_type in out_types:
break
else:
raise ValueError(f"{type} {verb} doesn't support out_type {out_type}")
# extract @-mention links in HTML text
obj = copy.deepcopy(obj)
Source.postprocess_object(obj, mentions=True)
ret = None
# TODO: once we're on Python 3.10, switch this to a match statement!
if type in as1.ACTOR_TYPES:
# avatar and banner. banner is featured image, if available
avatar = util.get_url(obj, 'image')
banner = None
for img in util.get_list(obj, 'image'):
url = img.get('url')
if img.get('objectType') == 'featured' and url:
banner = url
break
ret = {
'displayName': obj.get('displayName'),
'description': html_to_text(obj.get('summary')),
'avatar': blobs.get(avatar),
'banner': blobs.get(banner),
}
if not out_type or out_type == 'app.bsky.actor.profile':
ret = trim_nulls({**ret, '$type': 'app.bsky.actor.profile'})
return LEXRPC_BASE._maybe_validate('app.bsky.actor.profile', 'record', ret)
url = as1.get_url(obj)
id = obj.get('id')
if not url and id:
parsed = util.parse_tag_uri(id)
if parsed:
# This is only really formatted as a URL to keep url_to_did_web happy.
url = f'https://{parsed[0]}'
did_web = ''
if id and id.startswith('did:web:'):
did_web = id
else:
try:
did_web = url_to_did_web(url)
except ValueError as e:
logger.info(f"Couldn't generate did:web: {e}")
# handles must be hostnames
# https://atproto.com/specs/handle
username = obj.get('username')
parsed = urllib.parse.urlparse(url)
domain = parsed.netloc
did_web_bare = did_web.removeprefix('did:web:')
handle = (username if username and HANDLE_PATTERN.match(username)
else did_web_bare if ':' not in did_web_bare
else domain if domain
else '')
ret.update({
'$type': out_type,
# TODO: more specific than domain, many users will be on shared domains
'did': id if id and id.startswith('did:') else did_web,
'handle': handle,
'avatar': avatar,
'banner': banner,
})
# WARNING: this includes description, which isn't technically in this
# #profileViewBasic. hopefully clients should just ignore it!
# https://atproto.com/specs/lexicon#authority-and-control
ret = trim_nulls(ret, ignore=('did', 'handle'))
elif type == 'share':
if not out_type or out_type == 'app.bsky.feed.repost':
ret = {
'$type': 'app.bsky.feed.repost',
'subject': from_as1_to_strong_ref(inner_obj, client=client),
'createdAt': from_as1_datetime(obj.get('published')),
}
elif out_type == 'app.bsky.feed.defs#reasonRepost':
ret = {
'$type': 'app.bsky.feed.defs#reasonRepost',
'by': from_as1(actor, out_type='app.bsky.actor.defs#profileViewBasic'),
'indexedAt': from_as1_datetime(None),
}
elif out_type == 'app.bsky.feed.defs#feedViewPost':
ret = {
'$type': 'app.bsky.feed.defs#feedViewPost',
'post': from_as1(inner_obj, out_type='app.bsky.feed.defs#postView'),
'reason': from_as1(obj, out_type='app.bsky.feed.defs#reasonRepost'),
}
elif type == 'like':
ret = {
'$type': 'app.bsky.feed.like',
'subject': from_as1_to_strong_ref(inner_obj, client=client),
'createdAt': from_as1_datetime(obj.get('published')),
}
elif type in ('follow', 'block'):
if not inner_obj:
raise ValueError('follow activity requires actor and object')
ret = {
'$type': f'app.bsky.graph.{type}',
'subject': inner_obj.get('id'), # DID
'createdAt': from_as1_datetime(obj.get('published')),
}
elif type == 'flag':
if not inner_obj:
raise ValueError('flag activity requires object')
ret = {
'$type': 'com.atproto.moderation.createReport#input',
'subject': {
'$type': 'com.atproto.repo.strongRef',
**from_as1_to_strong_ref(inner_obj, client=client),
},
# https://github.com/bluesky-social/atproto/blob/main/lexicons/com/atproto/moderation/defs.json#
'reasonType': 'com.atproto.moderation.defs#reasonOther',
# https://github.com/bluesky-social/atproto/blob/651d4c2a3447525c68d3bf1b8492bdafb0a88c66/lexicons/com/atproto/moderation/createReport.json#L21
'reason': (obj.get('content') or obj.get('summary') or '')[:2000],
}
elif verb == 'post' and type in POST_TYPES:
# convert text from HTML and truncate
src = Bluesky('unused')
content = obj.get('content')
text = obj.get('summary') or content or obj.get('displayName') or ''
full_text = html_to_text(text, ignore_links=False, inline_links=True)
# extract links and convert to plain text
# TODO: unify into as1 or source
# https://github.com/snarfed/granary/issues/729
link_tags = []
while link := MARKDOWN_LINK_RE.search(full_text):
start, end = link.span()
if not link['text'].startswith(('@', '#')):
link_tags.append({
'objectType': 'link',
'displayName': link['text'],
'url': link['url'],
'startIndex': start,
'length': len(link['text']),
})
full_text = full_text[:start] + link['text'] + full_text[end:]
text = src.truncate(full_text, None, OMIT_LINK)
truncated = text != full_text
# convert index-based tags to facets
facets = []
for tag in util.get_list(obj, 'tags') + link_tags:
name = tag.get('displayName', '').strip().lstrip('@#')
type = tag.get('objectType')
if name and not type:
type = 'hashtag'
url = tag.get('url')
if not url and type != 'hashtag':
continue
facet = {
'$type': 'app.bsky.richtext.facet',
}
try:
start = int(tag['startIndex'])
if start and obj.get('content_is_html'):
raise NotImplementedError('HTML content is not supported with index tags')
end = start + int(tag['length'])
facet['index'] = {
# convert indices from Unicode chars to UTF-8 encoded bytes
# https://github.com/snarfed/atproto/blob/5b0c2d7dd533711c17202cd61c0e101ef3a81971/lexicons/app/bsky/richtext/facet.json#L34
'byteStart': len(full_text[:start].encode()),
'byteEnd': len(full_text[:end].encode()),
}
except (KeyError, ValueError, IndexError, TypeError):
pass
if type == 'hashtag':
facet['features'] = [{
'$type': 'app.bsky.richtext.facet#tag',
'tag': name,
}]
elif type == 'mention':
# extract and if necessary resolve DID
did = None
if url.startswith('did:'):
did = url
else:
if match := AT_URI_PATTERN.match(url):
did = match.group('repo')
elif match := BSKY_APP_URL_RE.match(url):
did = match.group('id')
if did and client and not did.startswith('did:'):
did = client.com.atproto.identity.resolveHandle(handle=did)['did']
if not did:
continue
facet['features'] = [{
'$type': 'app.bsky.richtext.facet#mention',
'did': did,
}]
else:
facet['features'] = [{
'$type': 'app.bsky.richtext.facet#link',
'uri': url,
}]
if name and 'index' not in facet:
# use displayName to guess index at first location found in text. note
# that #/@ character for mentions is included in index end.
#
# can't use \b for word boundaries here because that only includes
# alphanumerics, and Bluesky hashtags can include emoji
prefix = '#' if type == 'hashtag' else '@' if type == 'mention' else ''
# can't use \b at beginning/end because # and @ and emoji aren't
# word-constituent chars
bound = fr'[\s{string.punctuation.replace("-", "")}]'
match = re.search(fr'(^|{bound})({prefix}{name})($|{bound})', text,
flags=re.IGNORECASE)
if not match and type == 'mention' and '@' in name:
# try without @[server] suffix
username = name.split('@')[0]
match = re.search(fr'(^|\s)({prefix}{username})($|\s)', text)
if match:
facet['index'] = {
'byteStart': len(full_text[:match.start(2)].encode()),
'byteEnd': len(full_text[:match.end(2)].encode()),
}
# skip or trim this facet if it's off the end of content that got truncated
index = facet.get('index')
if not index:
continue
text_len = len(text.encode())
if truncated:
text_len -= len('…'.encode())
if index.get('byteStart', 0) >= text_len:
continue
if index.get('byteEnd', 0) > text_len:
index['byteEnd'] = text_len
if facet not in facets:
facets.append(facet)
# images
images_embed = images_record_embed = None
images = as1.get_objects(obj, 'image')
if images:
images_embed = {
'$type': 'app.bsky.embed.images#view',
'images': [{
'$type': 'app.bsky.embed.images#viewImage',
'thumb': img.get('url'),
'fullsize': img.get('url'),
'alt': img.get('displayName') or '',
} for img in images[:4]],
}
images_record_embed = {
'$type': 'app.bsky.embed.images',
'images': [{
'$type': 'app.bsky.embed.images#image',
'image': blobs.get(util.get_url(img)) or {},
'alt': img.get('displayName') or '',
} for img in images[:4]],
}
# article/note attachments
record_embed = record_record_embed = external_embed = external_record_embed = None
attachments = util.get_list(obj, 'attachments')
if truncated:
if url := as1.get_url(obj):
# override attachments, use one link to original post instead
attachments = [{
'objectType': 'link',
'url': url,
'displayName': f'Original post on {util.domain_from_link(url)}',
}]
for att in attachments:
if not att.get('objectType') in ('article', 'link', 'note'):
continue
id = att.get('id') or ''
url = att.get('url') or ''
if (id.startswith('at://') or id.startswith(Bluesky.BASE_URL) or
url.startswith('at://') or url.startswith(Bluesky.BASE_URL)):
# quoted Bluesky post
embed = from_as1(att).get('post') or {}
embed['value'] = embed.pop('record', None)
record_embed = {
'$type': f'app.bsky.embed.record#view',
'record': {
**embed,
'$type': f'app.bsky.embed.record#viewRecord',
# override these so that trim_nulls below will remove them
'likeCount': None,
'replyCount': None,
'repostCount': None,
},
}
record_record_embed = {
'$type': f'app.bsky.embed.record',
'record': from_as1_to_strong_ref(att, client=client),
}
else:
# external link
external_record_embed = {
'$type': f'app.bsky.embed.external',
'external': {
'$type': f'app.bsky.embed.external#external',
'uri': url or id,
'title': att.get('displayName'),
'description': att.get('summary') or att.get('content') or '',
}
}
external_embed = {
'$type': f'app.bsky.embed.external#view',
'external': {
**external_record_embed['external'],
'$type': f'app.bsky.embed.external#viewExternal',
'thumb': util.get_first(att, 'image'),
},
}
if record_embed and (images_embed or external_embed):
embed = {
'$type': 'app.bsky.embed.recordWithMedia#view',
'record': record_embed,
'media': images_embed or external_embed,
}
record_embed = {
'$type': 'app.bsky.embed.recordWithMedia',
'record': record_record_embed,
'media' : images_record_embed or external_record_embed,
}
else:
embed = record_embed or images_embed or external_embed
record_embed = record_record_embed or images_record_embed or external_record_embed
# in reply to
reply = None
in_reply_to = base_object(obj)
if in_reply_to:
parent_ref = from_as1_to_strong_ref(in_reply_to, client=client, value=True)
root_ref = (parent_ref.pop('value', {}).get('reply', {}).get('root')
or parent_ref)
reply = {
'$type': 'app.bsky.feed.post#replyRef',
'root': root_ref,
'parent': parent_ref,
}
ret = trim_nulls({
'$type': 'app.bsky.feed.post',
'text': text,
'createdAt': from_as1_datetime(obj.get('published')),
'embed': record_embed,
'facets': facets,
'reply': reply,
}, ignore=('alt', 'createdAt', 'cid', 'description', 'text', 'title', 'uri'))
if not out_type or out_type == 'app.bsky.feed.post':
return LEXRPC_BASE._maybe_validate('app.bsky.feed.post', 'record', ret)
# author
author = as1.get_object(obj, 'author')
author.setdefault('objectType', 'person')
author = from_as1(author, out_type='app.bsky.actor.defs#profileViewBasic')
ret = trim_nulls({
'$type': 'app.bsky.feed.defs#postView',
'uri': obj.get('id') or obj.get('url') or '',
'cid': '',
'record': ret,
'author': author,
'embed': embed,
'replyCount': 0,
'repostCount': 0,
'likeCount': 0,
'indexedAt': from_as1_datetime(None),
}, ignore=('author', 'createdAt', 'cid', 'description', 'indexedAt',
'record', 'text', 'title', 'uri'))
if out_type == 'app.bsky.feed.defs#postView':
pass
elif out_type == 'app.bsky.feed.defs#feedViewPost':
ret = {
'$type': out_type,
'post': ret,
}
else:
assert False, "shouldn't happen"
elif type == 'collection':
ret = {
'$type': 'app.bsky.graph.list',
'purpose': 'app.bsky.graph.defs#curatelist',
'name': obj.get('displayName') or obj.get('id'),
'description': obj.get('summary'),
'avatar': blobs.get(util.get_url(obj, 'image')),
'createdAt': from_as1_datetime(obj.get('published')),
}
elif verb == 'add':
ret = {
'$type': 'app.bsky.graph.listitem',
'subject': inner_obj.get('id'),
'list': activity.get('target'),
'createdAt': from_as1_datetime(obj.get('published')),
}
else:
raise ValueError(f'AS1 object has unknown objectType {type} verb {verb}')
nsid = ret.get('$type')
type = 'record'
if nsid == 'com.atproto.moderation.createReport#input':
nsid = 'com.atproto.moderation.createReport'
type = 'input'
if nsid:
return LEXRPC_BASE._maybe_validate(nsid, type, ret)
return ret
def to_as1(obj, type=None, uri=None, repo_did=None, repo_handle=None,
pds=DEFAULT_PDS):
"""Converts a Bluesky object to an AS1 object.
Args:
obj (dict): ``app.bsky.*`` object
type (str): optional ``$type`` to parse with, only used if ``obj['$type']``
is unset
uri (str): optional ``at://`` URI of this object. Used to populate the
``id`` and ``url`` fields for some object types, eg posts.
repo_did (str): optional DID of the repo this object is from. Required to
generate image URLs.
repo_handle (str): optional handle of the user whose repo this object is from
pds (str): base URL of the PDS that currently serves this object's repo.
Required to generate image URLs. Defaults to ``https://bsky.social/``.
Returns:
dict: AS1 object, or None if the record doesn't correspond to an AS1 object,
eg "not found" records
Raises:
ValueError: if the ``$type`` field is missing or unsupported
"""
if not obj:
return {}
type = obj.get('$type') or type
if not type:
raise ValueError('Bluesky object missing $type field')
uri_repo = uri_bsky_url = None
if uri:
uri_bsky_url = at_uri_to_web_url(uri)
if not uri.startswith('at://'):
raise ValueError('Expected at:// uri, got {uri}')
if parsed := AT_URI_PATTERN.match(uri):
uri_repo = parsed.group(1)
# for nested to_as1 calls, if necessary
kwargs = {'repo_did': repo_did, 'repo_handle': repo_handle, 'pds': pds}
# TODO: once we're on Python 3.10, switch this to a match statement!
if type in ('app.bsky.actor.profile',
'app.bsky.actor.defs#profileView',
'app.bsky.actor.defs#profileViewBasic',
'app.bsky.actor.defs#profileViewDetailed',
'app.bsky.feed.generator',
):
images = [{'url': obj.get('avatar')}]
banner = obj.get('banner')
if banner:
images.append({'url': obj.get('banner'), 'objectType': 'featured'})
handle = obj.get('handle')
did = obj.get('did')
if type == 'app.bsky.actor.profile':
if not handle:
handle = repo_handle
if not did:
did = repo_did
urls = []
if handle:
urls.append(Bluesky.user_url(handle))
if not util.domain_or_parent_in(handle, [DEFAULT_PDS_DOMAIN]):
urls.append(f'https://{handle}/')
if did and did.startswith('did:web:'):
urls.append(did_web_to_url(did))
if type == 'app.bsky.feed.generator':
if uri_bsky_url:
urls.insert(0, uri_bsky_url)
ret = {
'objectType': 'service',
'id': uri,
'author': repo_did,
'generator': did,
}
else:
ret = {
'objectType': 'person',
'id': did,
'username': obj.get('handle') or repo_handle,
}
ret.update({
'url': util.dedupe_urls(urls),
'displayName': obj.get('displayName'),
# TODO: for app.bsky.feed.generator, use descriptionFacets
'summary': util.linkify(obj.get('description') or '', pretty=True),
'image': images,
'published': obj.get('createdAt'),
})
# avatar and banner are blobs in app.bsky.actor.profile; convert to URLs
if type in ('app.bsky.actor.profile', 'app.bsky.feed.generator'):
repo_did = repo_did or did
if repo_did and pds:
for img in ret['image']:
img['url'] = blob_to_url(blob=img['url'], repo_did=repo_did, pds=pds)
else:
ret['image'] = []
# convert public view opt-out to unlisted AS1 audience targeting
# https://docs.bsky.app/docs/advanced-guides/resolving-identities#for-backend-services
# https://activitystrea.ms/specs/json/targeting/1.0/
labels = (obj.get('labels', {}).get('values', [])
if type == 'app.bsky.actor.profile'
else obj.get('labels', []))
for label in labels:
if label.get('val') == NO_AUTHENTICATED_LABEL and not label.get('neg'):
ret['to'] = [{
'objectType': 'group',
'alias': '@unlisted',
}]
elif type == 'app.bsky.feed.post':
text = obj.get('text', '')
# convert facets to tags
tags = []
for facet in obj.get('facets', []):
tag = {}
for feat in facet.get('features', []):
if feat.get('$type') == 'app.bsky.richtext.facet#link':
tag.update({
'objectType': 'article',
'url': feat.get('uri'),
})
elif feat.get('$type') == 'app.bsky.richtext.facet#mention':
tag.update({
'objectType': 'mention',
'url': Bluesky.user_url(feat.get('did')),
})
elif feat.get('$type') == 'app.bsky.richtext.facet#tag':
tag.update({