[weverse] add extractors

mikf · Nov 4, 2023 · 3b60dab · 3b60dab
1 parent caf31e7
commit 3b60dab
Show file tree

Hide file tree

Showing 3 changed files with 201 additions and 0 deletions.
diff --git a/docs/configuration.rst b/docs/configuration.rst
@@ -3613,6 +3613,26 @@ Description
     Download video files.
 
 
+extractor.weverse.access-token
+------------------------------
+Type
+    ``string``
+Default
+    ``null``
+Description
+    Your Weverse account access token.
+
+    The token can be found in the ``we2_access_token`` cookie in the
+    ``.weverse.io`` cookie domain after logging in to your account.
+
+    An invalid or not up-to-date value
+    will result in ``401 Unauthorized`` errors.
+
+    If this option is unset, and the cookie is not used, an extra HTTP
+    request will be sent with your ``username`` and ``password`` to
+    attempt to fetch a new token.
+
+
 extractor.ytdl.enabled
 ----------------------
 Type

diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
@@ -168,6 +168,7 @@
     "webmshare",
     "webtoons",
     "weibo",
+    "weverse",
     "wikiart",
     "wikifeet",
     "xhamster",

diff --git a/gallery_dl/extractor/weverse.py b/gallery_dl/extractor/weverse.py
@@ -0,0 +1,180 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://weverse.io/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+from ..cache import cache
+import binascii
+import hashlib
+import hmac
+import time
+import urllib.parse
+import uuid
+
+BASE_PATTERN = r"(?:https?://)?(?:m\.)?weverse\.io"
+COMMUNITY_PATTERN = BASE_PATTERN + r"/(\w+)"
+
+MEMBER_ID_PATTERN = r"([a-f0-9]{32})"
+POST_ID_PATTERN = r"(\d-\d{9})"
+
+
+class WeverseExtractor(Extractor):
+    """Base class for weverse extractors"""
+    category = "weverse"
+    cookies_domain = ".weverse.io"
+    cookies_names = ("we2_access_token",)
+    root = "https://weverse.io"
+    request_interval = (1.0, 2.0)
+
+    def _init(self):
+        self.login()
+        if self.access_token:
+            self.api = WeverseAPI(self, self.access_token)
+
+    def login(self):
+        if self.config("access-token"):
+            self.access_token = self.config("access-token")
+            return
+
+        if not self.cookies_check(self.cookies_names):
+            username, password = self._get_auth_info()
+            if username:
+                self.cookies_update(
+                    self._login_impl(username, password), self.cookies_domain)
+
+        self.access_token = self.cookies.get(self.cookies_names[0])
+
+    @cache(maxage=365*24*3600, keyarg=1)
+    def _login_impl(self, username, password):
+        endpoint = ("https://accountapi.weverse.io"
+                    "/web/api/v2/auth/token/by-credentials")
+        data = {"email": username, "password": password}
+        headers = {
+            "x-acc-app-secret": "5419526f1c624b38b10787e5c10b2a7a",
+            "x-acc-app-version": "2.2.20-alpha.0",
+            "x-acc-language": "en",
+            "x-acc-service-id": "weverse",
+            "x-acc-trace-id": str(uuid.uuid64())
+        }
+        res = self.request(
+            endpoint, method="POST", data=data, headers=headers).json()
+        if "accessToken" not in res:
+            raise exception.AuthenticationError()
+        return {self.cookies_names[0]: res["accessToken"]}
+
+
+class WeversePostExtractor(WeverseExtractor):
+    """Extractor for weverse posts"""
+    subcategory = "post"
+    directory_fmt = ("{category}", "{community[communityName]}",
+                     "{author_name}", "{postId}")
+    filename_fmt = "{category}_{filename}.{extension}"
+    archive_fmt = "{postId}"
+    pattern = COMMUNITY_PATTERN + r"/(?:artist|fanpost)/" + POST_ID_PATTERN
+    example = "https://weverse.io/abcdef/artist/1-123456789"
+
+    def __init__(self, match):
+        WeverseExtractor.__init__(self, match)
+        self.community_keyword = match.group(1)
+        self.post_id = match.group(2)
+
+    def items(self):
+        data = self.api.post(self.post_id)
+
+        if "publishedAt" in data:
+            data["date"] = text.parse_timestamp(data["publishedAt"] / 1000)
+
+        extension = data["extension"]
+        attachments = data["attachment"]
+
+        # skip posts with no media
+        if extension in [None, {}] and attachments in [None, {}]:
+            return
+
+        del data["extension"]
+        del data["attachment"]
+
+        author = data["author"]
+        data["author_name"] = author.get("artistOfficialProfile", {}).get(
+            "officialName") or author["profileName"]
+
+        yield Message.Directory, data
+        for type in attachments:
+            if type == "photo":
+                for photo in attachments[type].values():
+                    url = photo["url"]
+                    data["filename"] = photo["photoId"]
+                    data["extension"] = text.ext_from_url(url)
+                    yield Message.Url, url, data
+            if type == "video":
+                for video in attachments[type].values():
+                    best_video = self.api.video(video["videoId"])
+                    url = best_video["url"]
+                    data["filename"] = video["videoId"]
+                    data["extension"] = text.ext_from_url(url)
+                    yield Message.Url, url, data
+
+
+class WeverseAPI():
+    """Interface for the Weverse API"""
+
+    BASE_API_URL = "https://global.apis.naver.com"
+
+    def __init__(self, extractor, access_token):
+        self.extractor = extractor
+        self.headers = {"Authorization": "Bearer " + access_token}
+
+    def _endpoint_with_params(self, endpoint, params):
+        params_delimiter = "?"
+        if "?" in endpoint:
+            params_delimiter = "&"
+        return endpoint + params_delimiter + urllib.parse.urlencode(
+            query=params)
+
+    def _message_digest(self, endpoint, params, timestamp):
+        key = "1b9cb6378d959b45714bec49971ade22e6e24e42".encode()
+        url = self._endpoint_with_params(endpoint, params)
+        message = "{}{}".format(url[:255], timestamp).encode()
+        hash = hmac.new(key, message, hashlib.sha1).digest()
+        return binascii.b2a_base64(hash).rstrip().decode()
+
+    def post(self, post_id):
+        endpoint = "/post/v1.0/post-{}".format(post_id)
+        params = {"fieldSet": "postV1"}
+        return self._call(endpoint, params)
+
+    def video(self, video_id):
+        endpoint = "/cvideo/v1.0/cvideo-{}/downloadInfo".format(video_id)
+        videos = self._call(endpoint)["downloadInfo"]
+        best_video = max(videos, key=lambda video: video["resolution"])
+        return best_video
+
+    def _call(self, endpoint, params=None):
+        if params is None:
+            params = {}
+        params = util.combine_dict({
+            "appId": "be4d79eb8fc7bd008ee82c8ec4ff6fd4",
+            "language": "en",
+            "platform": "WEB",
+            "wpf": "pc"
+        }, params)
+        timestamp = int(time.time() * 1000)
+        message_digest = self._message_digest(endpoint, params, timestamp)
+        params = util.combine_dict(params, {
+            "wmsgpad": timestamp,
+            "wmd": message_digest
+        })
+        while True:
+            try:
+                return self.extractor.request(
+                    self.BASE_API_URL + "/weverse/wevweb" + endpoint,
+                    params=params, headers=self.headers,
+                ).json()
+            except exception.HttpError as exc:
+                self.extractor.log.warning(exc)
+                return