Skip to content

Commit

Permalink
[twitter] fix extraction (x.com)
Browse files Browse the repository at this point in the history
  • Loading branch information
soimort committed May 17, 2024
1 parent a4d34ff commit 57f6502
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 6 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| Site | URL | Videos? | Images? | Audios? |
| :--: | :-- | :-----: | :-----: | :-----: |
| **YouTube** | <https://www.youtube.com/> || | |
| **Twitter** | <https://twitter.com/> ||| |
| **X (Twitter)** | <https://x.com/> ||| |
| VK | <http://vk.com/> ||| |
| Vine | <https://vine.co/> || | |
| Vimeo | <https://vimeo.com/> || | |
Expand Down
8 changes: 6 additions & 2 deletions src/you_get/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
'veoh' : 'veoh',
'vine' : 'vine',
'vk' : 'vk',
'x' : 'twitter',
'xiaokaxiu' : 'yixia',
'xiaojiadianvideo' : 'fc2video',
'ximalaya' : 'ximalaya',
Expand Down Expand Up @@ -1856,9 +1857,12 @@ def url_to_module(url):
)
else:
try:
location = get_location(url) # t.co isn't happy with fake_headers
try:
location = get_location(url) # t.co isn't happy with fake_headers
except:
location = get_location(url, headers=fake_headers)
except:
location = get_location(url, headers=fake_headers)
location = get_location(url, headers=fake_headers, get_method='GET')

if location and location != url and not location.startswith('/'):
return url_to_module(location)
Expand Down
6 changes: 3 additions & 3 deletions src/you_get/extractors/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
**kwargs)
return

m = re.match('^https?://(mobile\.)?twitter\.com/([^/]+)/status/(\d+)', url)
m = re.match('^https?://(mobile\.)?(x|twitter)\.com/([^/]+)/status/(\d+)', url)
assert m
screen_name, item_id = m.group(2), m.group(3)
screen_name, item_id = m.group(3), m.group(4)
page_title = "{} [{}]".format(screen_name, item_id)

# FIXME: this API won't work for protected or nsfw contents
Expand Down Expand Up @@ -77,6 +77,6 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
# TODO: should we deal with quoted tweets?


site_info = "Twitter.com"
site_info = "X.com"
download = twitter_download
download_playlist = playlist_not_supported('twitter')

0 comments on commit 57f6502

Please sign in to comment.