[twitter] fix extraction (x.com)

soimort · May 17, 2024 · 57f6502 · 57f6502
1 parent a4d34ff
commit 57f6502
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -376,7 +376,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
 | Site | URL | Videos? | Images? | Audios? |
 | :--: | :-- | :-----: | :-----: | :-----: |
 | **YouTube** | <https://www.youtube.com/>    |✓| | |
-| **Twitter** | <https://twitter.com/>        |✓|✓| |
+| **X (Twitter)** | <https://x.com/>        |✓|✓| |
 | VK          | <http://vk.com/>              |✓|✓| |
 | Vine        | <https://vine.co/>            |✓| | |
 | Vimeo       | <https://vimeo.com/>          |✓| | |

diff --git a/src/you_get/common.py b/src/you_get/common.py
@@ -113,6 +113,7 @@
     'veoh'             : 'veoh',
     'vine'             : 'vine',
     'vk'               : 'vk',
+    'x'                : 'twitter',
     'xiaokaxiu'        : 'yixia',
     'xiaojiadianvideo' : 'fc2video',
     'ximalaya'         : 'ximalaya',
@@ -1856,9 +1857,12 @@ def url_to_module(url):
         )
     else:
         try:
-            location = get_location(url) # t.co isn't happy with fake_headers
+            try:
+                location = get_location(url) # t.co isn't happy with fake_headers
+            except:
+                location = get_location(url, headers=fake_headers)
         except:
-            location = get_location(url, headers=fake_headers)
+            location = get_location(url, headers=fake_headers, get_method='GET')
 
         if location and location != url and not location.startswith('/'):
             return url_to_module(location)

diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py
@@ -34,9 +34,9 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
                              **kwargs)
         return
 
-    m = re.match('^https?://(mobile\.)?twitter\.com/([^/]+)/status/(\d+)', url)
+    m = re.match('^https?://(mobile\.)?(x|twitter)\.com/([^/]+)/status/(\d+)', url)
     assert m
-    screen_name, item_id = m.group(2), m.group(3)
+    screen_name, item_id = m.group(3), m.group(4)
     page_title = "{} [{}]".format(screen_name, item_id)
 
     # FIXME: this API won't work for protected or nsfw contents
@@ -77,6 +77,6 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
     # TODO: should we deal with quoted tweets?
 
 
-site_info = "Twitter.com"
+site_info = "X.com"
 download = twitter_download
 download_playlist = playlist_not_supported('twitter')