soimort · jdnx · Apr 29, 2022 · May 15, 2022 · May 15, 2022 · May 15, 2022
diff --git a/src/you_get/common.py b/src/you_get/common.py
@@ -344,7 +344,7 @@ def undeflate(data):
 
 # an http.client implementation of get_content()
 # because urllib does not support "Connection: keep-alive"
-def getHttps(host, url, headers, debuglevel=0):
+def getHttps(host, url, headers, gzip=True, deflate=False, debuglevel=0):
     import http.client
 
     conn = http.client.HTTPSConnection(host)
@@ -353,8 +353,10 @@ def getHttps(host, url, headers, debuglevel=0):
     resp = conn.getresponse()
 
     data = resp.read()
-    data = ungzip(data)
-    #data = undeflate(data)
+    if gzip:
+        data = ungzip(data)
+    if deflate:
+        data = undeflate(data)
 
     return str(data, encoding='utf-8')
 
@@ -1654,7 +1656,7 @@ def print_version():
     download_grp.add_argument('--itag', help=argparse.SUPPRESS)
 
     download_grp.add_argument('-m', '--m3u8', action='store_true', default=False,
-        help = 'download vide using an m3u8 url')
+        help = 'download video using an m3u8 url')
 
 
     parser.add_argument('URL', nargs='*', help=argparse.SUPPRESS)

diff --git a/src/you_get/extractors/tiktok.py b/src/you_get/extractors/tiktok.py
@@ -5,24 +5,27 @@
 from ..common import *
 
 def tiktok_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
-    while True:
-        m = re.match('https://([^/]+)(/.*)', url)
-        host = m.group(1)
-        if host == 'www.tiktok.com':  # canonical URL reached
-            url = m.group(2).split('?')[0]
-            vid = url.split('/')[3]  # should be a string of numbers
-            break
-        else:
-            url = get_location(url)
-
     headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
         'Accept-Encoding': 'gzip, deflate',
         'Accept': '*/*',
         'Connection': 'keep-alive'  # important
     }
 
+    m = re.match('(https?://)?([^/]+)(/.*)', url)
+    host = m.group(2)
+    if host != 'www.tiktok.com':  # non-canonical URL
+        html = getHttps(host, url, headers=headers, gzip=False)
+        url = r1(r'(https://www.tiktok.com/[^?"]+)', html)
+        # use canonical URL
+        m = re.match('(https?://)?([^/]+)(/.*)', url)
+        host = m.group(2)
+
+    url = m.group(3).split('?')[0]
+    vid = url.split('/')[3]  # should be a string of numbers
+
     html = getHttps(host, url, headers=headers)
+
     data = r1(r'window\[\'SIGI_STATE\'\]=(.*?);window\[\'SIGI_RETRY\'\]', html) or \
         r1(r'<script id="SIGI_STATE" type="application/json">(.*?)</script>', html)
     info = json.loads(data)