diff --git a/src/modelscope_hub/_download.py b/src/modelscope_hub/_download.py index 2014255..47e037b 100644 --- a/src/modelscope_hub/_download.py +++ b/src/modelscope_hub/_download.py @@ -24,7 +24,6 @@ import hashlib import io import os -import platform import time import uuid from concurrent.futures import ThreadPoolExecutor, as_completed @@ -287,21 +286,12 @@ def __init__(self, legacy_client: "LegacyClient", config: "HubConfig") -> None: # User-agent & headers # ------------------------------------------------------------------ def _build_user_agent(self, user_agent: dict | str | None = None) -> str: - from .version import __version__ + from .utils import build_user_agent - env = os.environ.get("MODELSCOPE_CLOUD_ENVIRONMENT", "custom") - user_name = os.environ.get("MODELSCOPE_CLOUD_USERNAME", "unknown") - - ua = ( - f"modelscope_hub/{__version__}; python/{platform.python_version()}; " - f"session_id/{uuid.uuid4().hex}; platform/{platform.platform()}; " - f"processor/{platform.processor()}; env/{env}; user/{user_name}" + return build_user_agent( + session_id=self._config.get_session_id(), + extra=user_agent, ) - if isinstance(user_agent, dict): - ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items()) - elif isinstance(user_agent, str): - ua += "; " + user_agent - return ua def _detect_region(self) -> str: """Detect Alibaba cloud region ID for intra-cloud acceleration.""" @@ -536,6 +526,14 @@ def download_repo( cache_dir=str(output_dir), ) + if repo_type in ("skill", "skills"): + return self._download_archive( + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + output_dir=output_dir, + ) + if repo_type in ("dataset", "datasets"): files = self._client.list_dataset_files_paginated( repo_id=repo_id, @@ -609,6 +607,56 @@ def download_repo( return output_dir + # ------------------------------------------------------------------ + # Internal: archive-based download (skills) + # ------------------------------------------------------------------ + def _download_archive( + self, + repo_id: str, + repo_type: str, + revision: str, + output_dir: Path, + ) -> Path: + """Download a repo via its zip archive endpoint and extract. + + Skill repos do not support per-file ``/repo?FilePath=...`` download. + The old SDK uses ``/archive/zip/{revision}`` for these. + """ + import shutil + import tempfile + import zipfile + + tmp_path: Path | None = None + try: + resp = self._client.download_archive( + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + ) + + with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp: + tmp_path = Path(tmp.name) + for chunk in resp.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE): + if chunk: + tmp.write(chunk) + + with zipfile.ZipFile(tmp_path, "r") as zf: + zf.extractall(output_dir) + + # Flatten if zip has a single top-level directory + entries = [e for e in output_dir.iterdir()] + if len(entries) == 1 and entries[0].is_dir(): + nested = entries[0] + for item in nested.iterdir(): + shutil.move(str(item), str(output_dir / item.name)) + nested.rmdir() + finally: + if tmp_path is not None: + tmp_path.unlink(missing_ok=True) + + logger.info("Extracted archive for %s to %s", repo_id, output_dir) + return output_dir + # ------------------------------------------------------------------ # Internal helpers # ------------------------------------------------------------------ diff --git a/src/modelscope_hub/_legacy_api.py b/src/modelscope_hub/_legacy_api.py index 417ce5b..7ca0de4 100644 --- a/src/modelscope_hub/_legacy_api.py +++ b/src/modelscope_hub/_legacy_api.py @@ -74,6 +74,7 @@ def __init__( endpoint: str, timeout: int = API_TIMEOUT, max_retries: int = API_MAX_RETRIES, + user_agent: str | None = None, ) -> None: self._token = token self._endpoint = endpoint.rstrip("/") @@ -81,6 +82,8 @@ def __init__( self._session_authenticated = False self._session = requests.Session() + if user_agent: + self._session.headers["User-Agent"] = user_agent retry = Retry( total=max_retries, backoff_factor=0.5, @@ -202,8 +205,8 @@ def _json_data(self, resp: requests.Response) -> Any: # ------------------------------------------------------------------ # Auth # ------------------------------------------------------------------ - def login(self, access_token: str) -> dict: - """Authenticate via access token and return user info + git token. + def login(self, access_token: str) -> tuple[dict, "requests.cookies.RequestsCookieJar"]: + """Authenticate via access token and return (user_data, cookies). POST /api/v1/login @@ -218,7 +221,7 @@ def login(self, access_token: str) -> dict: self._session.cookies.clear() self._ensure_session_auth() resp = self._request("POST", "login", json_body={"AccessToken": access_token}) - return self._json_data(resp) + return self._json_data(resp), resp.cookies # ------------------------------------------------------------------ # Repo CRUD (model / dataset) @@ -365,10 +368,42 @@ def create_tag( POST /api/v1/{type}s/{repo_id}/repo/tag """ segment = _resolve_segment(repo_type) - body = {"Tag": tag, "Revision": revision} + body = {"TagName": tag, "Ref": revision} resp = self._request("POST", f"{segment}/{repo_id}/repo/tag", json_body=body) return self._json_data(resp) + # ------------------------------------------------------------------ + # File deletion + # ------------------------------------------------------------------ + def delete_file( + self, + repo_id: str, + repo_type: str, + file_path: str, + revision: str = "master", + ) -> dict: + """Delete a single file from the repository. + + DELETE /api/v1/{type}s/{owner}/{name}/file?FilePath=...&Revision=... + (for models) + DELETE /api/v1/datasets/{owner}/{name}/repo?FilePath=... + (for datasets) + """ + segment = _resolve_segment(repo_type) + if repo_type == RepoType.DATASET: + resp = self._request( + "DELETE", + f"{segment}/{repo_id}/repo", + params={"FilePath": file_path}, + ) + else: + resp = self._request( + "DELETE", + f"{segment}/{repo_id}/file", + params={"FilePath": file_path, "Revision": revision}, + ) + return self._json_data(resp) + # ------------------------------------------------------------------ # Git Commits # ------------------------------------------------------------------ @@ -447,10 +482,16 @@ def upload_blob( *, headers: dict[str, str] | None = None, timeout: int | None = None, - ) -> requests.Response: + ) -> dict: """Upload a blob to the presigned URL returned by :meth:`validate_blobs`. PUT {upload_url} + + Sends both ``Authorization: Bearer`` and ``Cookie: m_session_id`` + headers to authenticate against the LFS domain (which may differ + from the main API domain). + + Returns the parsed JSON response body on success. """ upload_headers: dict[str, str] = { "Content-Length": str(size), @@ -458,6 +499,7 @@ def upload_blob( } if self._token: upload_headers["Authorization"] = f"Bearer {self._token}" + upload_headers["Cookie"] = f"m_session_id={self._token}" if headers: upload_headers.update(headers) @@ -474,7 +516,22 @@ def upload_blob( raise RequestTimeoutError(f"Blob upload timed out: {exc}") from exc raise_for_status(resp) - return resp + + # Presigned URLs (cloud storage) may return empty bodies on success. + try: + body = resp.json() + except (ValueError, RuntimeError): + return {} + if isinstance(body, dict) and body.get("Code") not in (200, "200", None): + from .errors import APIError + raise APIError( + body.get("Message") or body.get("message") or f"Blob upload failed (Code={body.get('Code')})", + status_code=resp.status_code, + response_body=body, + url=upload_url, + method="PUT", + ) + return body # ------------------------------------------------------------------ # Raw Download URL @@ -520,6 +577,32 @@ def get_collection( resp = self._request("GET", "collections", params=params) return self._json_data(resp) + # ------------------------------------------------------------------ + # Archive Download (skill repos) + # ------------------------------------------------------------------ + def download_archive( + self, + repo_id: str, + repo_type: str, + revision: str = "master", + headers: dict[str, str] | None = None, + ) -> requests.Response: + """Download the entire repo as a zip archive. + + GET /api/v1/{type}s/{repo_id}/archive/zip/{revision} + + Skills (and potentially other repo types) do not support per-file + download via ``/repo?FilePath=...``. This method streams the + archive endpoint instead. + """ + segment = _resolve_segment(repo_type) + return self._request( + "GET", + f"{segment}/{repo_id}/archive/zip/{revision}", + headers=headers, + stream=True, + ) + # ------------------------------------------------------------------ # Raw Download URL # ------------------------------------------------------------------ diff --git a/src/modelscope_hub/_openapi.py b/src/modelscope_hub/_openapi.py index 2446ef3..4ccc852 100644 --- a/src/modelscope_hub/_openapi.py +++ b/src/modelscope_hub/_openapi.py @@ -118,6 +118,10 @@ def _url(self, path: str) -> str: def _auth_headers(self, *, require_token: bool = False) -> dict[str, str]: token = self._config.token + if not token: + token = self._config.load_token() + if token: + self._config.token = token if not token: if require_token: raise AuthenticationError( @@ -555,10 +559,12 @@ def deploy_mcp_server( payload: DeployMcpServerPayload | Mapping[str, Any] | None = None, ) -> JSON: """``POST /mcp/servers/{id}/deploy`` — deploy an MCP server for the caller.""" + body = dict(payload or {}) + body.setdefault("transport_type", "sse") return self._request( "POST", f"/mcp/servers/{server_id}/deploy", - json_body=dict(payload or {}), + json_body=body, ) def undeploy_mcp_server(self, server_id: str | int) -> JSON: diff --git a/src/modelscope_hub/api.py b/src/modelscope_hub/api.py index 228e9ce..5ca9072 100644 --- a/src/modelscope_hub/api.py +++ b/src/modelscope_hub/api.py @@ -41,6 +41,7 @@ AuthenticationError, HubError, InvalidParameter, + NetworkError, NotExistError, NotSupportedError, ) @@ -163,10 +164,15 @@ def openapi(self) -> OpenAPIClient: def legacy(self) -> LegacyClient: """Lazily-constructed legacy ``/api/v1`` client.""" if self._legacy is None: + from .utils import build_user_agent + self._legacy = LegacyClient( token=self._config.token, endpoint=self._config.endpoint, + user_agent=build_user_agent(self._config.get_session_id()), ) + elif self._legacy.token != self._config.token and self._config.token: + self._legacy.token = self._config.token return self._legacy @property @@ -320,32 +326,37 @@ def get_cookies( *, cookies_required: bool = False, ) -> RequestsCookieJar | None: - """Build a cookie jar for legacy API authentication. + """Get cookies for authentication from token or local cache. + + Resolution order: + 1. Explicit ``access_token`` argument + 2. Token configured on this instance + 3. ``MODELSCOPE_API_TOKEN`` environment variable + 4. Saved cookies from ``~/.modelscope/credentials/cookies`` - The legacy ``/api/v1/`` surface authenticates via a - ``m_session_id`` cookie whose value is the access token. This - method creates a :class:`~requests.cookies.RequestsCookieJar` - with that cookie, scoped to the current endpoint's domain. + When a token is available (steps 1-3), a fresh + :class:`~requests.cookies.RequestsCookieJar` with ``m_session_id`` + is built. Otherwise the locally cached cookies from a prior + ``login()`` call are loaded. Parameters ---------- access_token : str, optional - Explicit token override. Falls back to the token configured - on this instance, then to ``MODELSCOPE_API_TOKEN`` env var. + Explicit token override. cookies_required : bool, optional - When ``True``, raise :class:`AuthenticationError` if no token is - available. Default is ``False`` (return ``None``). + When ``True``, raise :class:`AuthenticationError` if no + credentials are available. Default is ``False``. Returns ------- RequestsCookieJar or None - Cookie jar with ``m_session_id`` set, or ``None`` when no - token is available and ``cookies_required`` is ``False``. + Cookie jar for authentication, or ``None`` when no + credentials are available and ``cookies_required`` is ``False``. Raises ------ AuthenticationError - When ``cookies_required`` is ``True`` and no token is available. + When ``cookies_required`` is ``True`` and no credentials found. Examples -------- @@ -354,26 +365,32 @@ def get_cookies( 'ms-xxxxxxxx' """ import os + token = access_token or self._config.token or os.environ.get("MODELSCOPE_API_TOKEN") - if not token: - if cookies_required: - raise AuthenticationError( - "No credentials found. " - "Pass --token, call HubApi.login(), or set MODELSCOPE_API_TOKEN. " - "Your token is available at https://modelscope.cn/my/myaccesstoken" - ) - return None - domain = urlparse(self._config.endpoint).hostname or "" - jar = RequestsCookieJar() - jar.set("m_session_id", token, domain=domain, path="/") - return jar + if token: + domain = urlparse(self._config.endpoint).hostname or "" + jar = RequestsCookieJar() + jar.set("m_session_id", token, domain=domain, path="/") + return jar + + cookies = self._config.load_cookies() + if cookies is not None: + return cookies + + if cookies_required: + raise AuthenticationError( + "No credentials found. " + "Pass --token, call HubApi.login(), or set MODELSCOPE_API_TOKEN. " + "Your token is available at https://modelscope.cn/my/myaccesstoken" + ) + return None def login(self, token: str) -> UserInfo: - """Persist ``token`` locally and return the authenticated user profile. + """Authenticate and persist credentials locally. - The token is saved to the local config file so subsequent sessions - pick it up automatically. ``GET /users/me`` is then called to verify - the credential. + Calls ``POST /api/v1/login`` to obtain server-issued session cookies + and a git access token, then saves them to + ``~/.modelscope/credentials/`` (compatible with the old modelscope SDK). Parameters ---------- @@ -404,20 +421,33 @@ def login(self, token: str) -> UserInfo: raise InvalidParameter("token must be a non-empty string") token = token.strip() - self._config.save_token(token) + self._config.token = token + self._config._logged_out = False self._openapi = None if self._legacy is not None: self._legacy.token = token try: - return self.whoami() - except AuthenticationError as exc: + data, cookies = self.legacy.login(token) + except (AuthenticationError, HubError) as exc: self._config.clear_token() raise AuthenticationError( "Login failed: the provided token was rejected by the server.", status_code=getattr(exc, "status_code", None), ) from exc + git_token = data.get("AccessToken", "") + username = data.get("Username", "") + email = data.get("Email", "") + + self._config.save_cookies(cookies) + if git_token: + self._config.save_git_token(git_token) + if username: + self._config.save_user_info(username, email or "") + + return self.whoami() + def logout(self) -> None: """Clear the locally persisted token. @@ -1259,7 +1289,12 @@ def delete_files( commit_message: str | None = None, revision: str | None = None, ) -> dict: - """Delete one or more files via a legacy commit operation. + """Delete one or more files from a repository. + + .. note:: + File deletion is restricted by the server to cookie-based session + auth (interactive login). API tokens (``ms-...``) may receive a 401 + "token no longer supports deletion operations" error. Parameters ---------- @@ -1270,19 +1305,19 @@ def delete_files( file_paths : iterable of str Paths of files to remove. Empty entries are ignored. commit_message : str, optional - Commit message. Defaults to ``"Delete N file(s)"``. + Unused (kept for API compatibility). revision : str, optional - Branch to commit on. Defaults to ``"master"``. + Branch to delete from. Defaults to ``"master"``. Returns ------- dict - Commit response payload. + Summary with ``deleted_files`` and ``failed_files`` lists. Raises ------ InvalidParameter - When ``file_paths`` resolves to an empty operation list. + When ``file_paths`` resolves to an empty list. Examples -------- @@ -1290,22 +1325,30 @@ def delete_files( ... "alice/llama-7b", ... "model", ... ["old_weights.bin", "deprecated/config.json"], - ... commit_message="Remove deprecated artifacts", ... ) """ rt = self._normalize_repo_type(repo_type) - operations = [ - {"action": "delete", "file_path": p} for p in file_paths if p - ] - if not operations: + paths = [p for p in file_paths if p] + if not paths: raise InvalidParameter("file_paths must contain at least one non-empty path.") - return self.legacy.create_commit( - repo_id=repo_id, - repo_type=str(rt), - operations=operations, - commit_message=commit_message or f"Delete {len(operations)} file(s)", - revision=revision or "master", - ) + + deleted, failed = [], [] + for p in paths: + try: + self.legacy.delete_file( + repo_id=repo_id, + repo_type=str(rt), + file_path=p, + revision=revision or "master", + ) + deleted.append(p) + except (AuthenticationError, NetworkError) as exc: + failed.append(p) + raise + except Exception: + failed.append(p) + + return {"deleted_files": deleted, "failed_files": failed, "total_files": len(paths)} # ================================================================== # Versioning diff --git a/src/modelscope_hub/cli/download.py b/src/modelscope_hub/cli/download.py index 418737d..b9bf196 100644 --- a/src/modelscope_hub/cli/download.py +++ b/src/modelscope_hub/cli/download.py @@ -179,11 +179,13 @@ def _download_collection(self) -> None: def _download_one(elem: dict) -> tuple[str, str | None, str | None]: skill_id = f"{elem['ElementPath']}/{elem['ElementName']}" + skill_name = elem["ElementName"] + skill_dir = str(Path(local_dir) / skill_name) if local_dir else None try: result = api.download_repo( skill_id, repo_type=RepoType.SKILL, - local_dir=local_dir, + local_dir=skill_dir, ) return skill_id, str(result), None except Exception as exc: diff --git a/src/modelscope_hub/cli/main.py b/src/modelscope_hub/cli/main.py index 666b469..f38d9b4 100644 --- a/src/modelscope_hub/cli/main.py +++ b/src/modelscope_hub/cli/main.py @@ -16,6 +16,7 @@ from typing import Sequence from .. import __version__ +from ..constants import MODELSCOPE_ASCII from ..errors import HubError, InvalidParameter, NetworkError, NotSupportedError from .base import CLICommand, add_repo_type_arg, error, info, make_api, success from .cache import CacheCommand, _CacheClear, _CacheScan @@ -188,6 +189,7 @@ def _discover_plugins(subparsers) -> None: # --------------------------------------------------------------------------- def run_cmd(argv: Sequence[str] | None = None) -> int: """Console-script entry point referenced by ``[project.scripts]``.""" + print(MODELSCOPE_ASCII, file=sys.stderr) parser = _build_parser() args = parser.parse_args(argv) diff --git a/src/modelscope_hub/cli/mcp.py b/src/modelscope_hub/cli/mcp.py index 13dd5d8..b3f8d9d 100644 --- a/src/modelscope_hub/cli/mcp.py +++ b/src/modelscope_hub/cli/mcp.py @@ -84,12 +84,25 @@ class _McpDeploy(CLICommand): def register(subparsers: Action) -> None: p = subparsers.add_parser("deploy", help="Deploy an MCP server.") p.add_argument("server_id") + p.add_argument( + "--transport-type", dest="transport_type", default=None, + help="Transport type (default: sse).", + ) + p.add_argument( + "--expiration-minutes", dest="expiration_minutes", type=int, default=None, + help="Expiration time in minutes.", + ) add_subcmd_token_endpoint(p) p.set_defaults(_command=McpCommand, _mcp_leaf=_McpDeploy) def execute(self) -> None: api = make_api(self.args) - api.deploy_mcp_server(self.args.server_id) + payload: dict = {} + if self.args.transport_type: + payload["transport_type"] = self.args.transport_type + if self.args.expiration_minutes is not None: + payload["expiration_minutes"] = self.args.expiration_minutes + api.deploy_mcp_server(self.args.server_id, payload=payload or None) success(f"Deploy requested for MCP server: {self.args.server_id}") diff --git a/src/modelscope_hub/cli/repo.py b/src/modelscope_hub/cli/repo.py index ec6fadf..865e3db 100644 --- a/src/modelscope_hub/cli/repo.py +++ b/src/modelscope_hub/cli/repo.py @@ -17,6 +17,25 @@ # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- +_ALREADY_EXISTS_CODES = {10020101001, 10010101001} + + +def _is_already_exists(exc: BaseException) -> bool: + """Detect "repo already exists" regardless of locale.""" + msg = str(exc).lower() + if "exist" in msg or "已被注册" in msg or "已存在" in msg: + return True + body = getattr(exc, "response_body", None) + if isinstance(body, dict): + code = body.get("Code") + try: + if int(code) in _ALREADY_EXISTS_CODES: + return True + except (TypeError, ValueError): + pass + return False + + def _format_visibility(value: object) -> str: if value is None: return "-" @@ -71,12 +90,13 @@ def _add_arguments(p) -> None: p.add_argument("--base-image", dest="base_image", default=None, help="Studio base image.") p.add_argument("--cover-image", dest="cover_image", default=None, help="Studio cover image URL.") p.add_argument("--hardware", dest="hardware", default=None, help="Studio hardware spec.") + p.add_argument("--category", dest="category", default=None, help="Skill category (required for skill repos).") add_subcmd_token_endpoint(p) def execute(self) -> None: api = make_api(self.args) extra: dict[str, object] = {} - for key in ("sdk_type", "sdk_version", "base_image", "cover_image", "hardware"): + for key in ("sdk_type", "sdk_version", "base_image", "cover_image", "hardware", "category"): value = getattr(self.args, key, None) if value is not None: extra[key] = value @@ -92,7 +112,7 @@ def execute(self) -> None: ) success(f"Created {self.args.repo_type}: {repo.repo_id or self.args.repo_id}") except Exception as exc: - if getattr(self.args, "exist_ok", False) and "exist" in str(exc).lower(): + if getattr(self.args, "exist_ok", False) and _is_already_exists(exc): info(f"Repository already exists: {self.args.repo_id}") return raise diff --git a/src/modelscope_hub/compat/hub_api.py b/src/modelscope_hub/compat/hub_api.py index 4250b8d..f0c6a48 100644 --- a/src/modelscope_hub/compat/hub_api.py +++ b/src/modelscope_hub/compat/hub_api.py @@ -12,6 +12,23 @@ from ..api import HubApi from ..constants import RepoType +_ALREADY_EXISTS_CODES = {10020101001, 10010101001} + + +def _is_repo_exists_error(exc: BaseException) -> bool: + msg = str(exc).lower() + if "exist" in msg or "已被注册" in msg or "已存在" in msg: + return True + body = getattr(exc, "response_body", None) + if isinstance(body, dict): + code = body.get("Code") + try: + if int(code) in _ALREADY_EXISTS_CODES: + return True + except (TypeError, ValueError): + pass + return False + class LegacyHubApi: """Drop-in replacement for the old ``modelscope.hub.api.HubApi``. @@ -85,7 +102,7 @@ def create_repo( **kwargs, ) except Exception as exc: - if exist_ok and "exist" in str(exc).lower(): + if exist_ok and _is_repo_exists_error(exc): return raise diff --git a/src/modelscope_hub/config.py b/src/modelscope_hub/config.py index 6191f0b..a4dd1eb 100644 --- a/src/modelscope_hub/config.py +++ b/src/modelscope_hub/config.py @@ -14,15 +14,18 @@ from __future__ import annotations import os -import stat from dataclasses import dataclass, field from pathlib import Path from .constants import ( CONFIG_DIR_NAME, + COOKIES_FILE_NAME, + CREDENTIALS_DIR_NAME, DEFAULT_CACHE_DIR_NAME, DEFAULT_ENDPOINT, - TOKEN_FILE_NAME, + GIT_TOKEN_FILE_NAME, + SESSION_FILE_NAME, + USER_INFO_FILE_NAME, ) from .errors import CacheError, InvalidParameter @@ -57,6 +60,7 @@ class HubConfig: ) ) token: str | None = None + _logged_out: bool = field(default=False, init=False, repr=False) # ------------------------------------------------------------------ # Construction helpers @@ -79,14 +83,15 @@ def __post_init__(self) -> None: # Path helpers # ------------------------------------------------------------------ @property - def token_path(self) -> Path: - return self.config_dir / TOKEN_FILE_NAME + def credentials_dir(self) -> Path: + return self.config_dir / CREDENTIALS_DIR_NAME def ensure_dirs(self) -> None: """Create the config and cache directories if they do not exist.""" try: self.config_dir.mkdir(parents=True, exist_ok=True) self.cache_dir.mkdir(parents=True, exist_ok=True) + self.credentials_dir.mkdir(parents=True, exist_ok=True) except OSError as exc: # pragma: no cover - filesystem dependent raise CacheError(f"Failed to create SDK directories: {exc}") from exc @@ -94,38 +99,144 @@ def ensure_dirs(self) -> None: # Token persistence # ------------------------------------------------------------------ def save_token(self, token: str) -> None: - """Persist ``token`` to ``~/.modelscope/token`` with restrictive perms.""" + """Persist token as ``m_session_id`` cookie in ``credentials/cookies``. + + Creates a :class:`~requests.cookies.RequestsCookieJar` with a 30-day + expiry, matching the old SDK convention where the API token lives + exclusively inside the pickled cookie jar. + """ if not token or not token.strip(): raise InvalidParameter("token must be a non-empty string") - self.ensure_dirs() - path = self.token_path - path.write_text(token.strip(), encoding="utf-8") - try: - path.chmod(stat.S_IRUSR | stat.S_IWUSR) # 0o600 - except OSError: # pragma: no cover - best-effort on non-POSIX - pass - self.token = token.strip() + import time + from http.cookiejar import Cookie + from requests.cookies import RequestsCookieJar + from urllib.parse import urlparse + + token = token.strip() + domain = urlparse(self.endpoint).hostname or "modelscope.cn" + expires = int(time.time()) + 30 * 24 * 3600 # 30 days + + jar = RequestsCookieJar() + jar.set_cookie(Cookie( + version=0, name="m_session_id", value=token, + port=None, port_specified=False, + domain=domain, domain_specified=True, domain_initial_dot=False, + path="/", path_specified=True, + secure=False, expires=expires, discard=False, + comment=None, comment_url=None, rest={}, rfc2109=False, + )) + self.save_cookies(jar) + self.token = token + self._logged_out = False def load_token(self) -> str | None: - """Return the token persisted on disk, or ``None`` if absent.""" - path = self.token_path - if not path.is_file(): - return None - try: - value = path.read_text(encoding="utf-8").strip() - except OSError: + """Load the API token from ``~/.modelscope/credentials/cookies``. + + Reads the pickled cookie jar and extracts the ``m_session_id`` value. + Returns ``None`` if: + - no cookies file exists + - the ``m_session_id`` cookie has expired + - :meth:`clear_token` was called (explicit logout) + """ + if self._logged_out: return None - return value or None + + cookies = self.load_cookies() + if cookies: + for cookie in cookies: + if cookie.name == "m_session_id": + return cookie.value + return None def clear_token(self) -> None: - """Remove any persisted token from disk and from this config.""" + """Remove persisted credentials (deletes ``credentials/cookies``).""" self.token = None - path = self.token_path + self._logged_out = True + path = self.credentials_dir / COOKIES_FILE_NAME try: path.unlink(missing_ok=True) - except OSError as exc: # pragma: no cover - filesystem dependent - raise CacheError(f"Failed to remove token file {path}: {exc}") from exc + except OSError: + pass + + # ------------------------------------------------------------------ + # Credentials persistence (compat with old modelscope SDK) + # ------------------------------------------------------------------ + def save_cookies(self, cookies: object) -> None: + """Pickle cookies to ``~/.modelscope/credentials/cookies``.""" + import pickle + import stat + + self.ensure_dirs() + path = self.credentials_dir / COOKIES_FILE_NAME + with open(path, "wb") as f: + pickle.dump(cookies, f) + path.chmod(stat.S_IRUSR | stat.S_IWUSR) + + def load_cookies(self) -> object | None: + """Load saved cookies, returning None if absent or expired.""" + import pickle + + path = self.credentials_dir / COOKIES_FILE_NAME + if not path.is_file(): + return None + try: + with open(path, "rb") as f: + cookies = pickle.load(f) + except (OSError, pickle.UnpicklingError): + return None + if not cookies: + return None + for cookie in cookies: + if cookie.name == "m_session_id" and cookie.is_expired(): + return None + return cookies + + def save_user_info(self, username: str, email: str) -> None: + """Save ``username:email`` to ``~/.modelscope/credentials/user``.""" + self.ensure_dirs() + path = self.credentials_dir / USER_INFO_FILE_NAME + path.write_text(f"{username}:{email}", encoding="utf-8") + + def save_git_token(self, git_token: str) -> None: + """Save git token to ``~/.modelscope/credentials/git_token``.""" + import stat + + self.ensure_dirs() + path = self.credentials_dir / GIT_TOKEN_FILE_NAME + path.write_text(git_token, encoding="utf-8") + path.chmod(stat.S_IRUSR | stat.S_IWUSR) + + def load_git_token(self) -> str | None: + """Read git token from ``~/.modelscope/credentials/git_token``.""" + path = self.credentials_dir / GIT_TOKEN_FILE_NAME + if path.is_file(): + try: + return path.read_text(encoding="utf-8").strip() or None + except OSError: + return None + return None + + def get_session_id(self) -> str: + """Return a stable SDK session UUID, auto-generating if absent. + + The session ID is persisted to ``~/.modelscope/credentials/session`` + and included in the User-Agent header for telemetry. + """ + import uuid as _uuid + + path = self.credentials_dir / SESSION_FILE_NAME + if path.is_file(): + try: + sid = path.read_text(encoding="utf-8").strip() + if len(sid) == 32: + return sid + except OSError: + pass + sid = _uuid.uuid4().hex + self.ensure_dirs() + path.write_text(sid, encoding="utf-8") + return sid # Singleton-style accessor — kept as a function so tests can monkeypatch it. diff --git a/src/modelscope_hub/constants.py b/src/modelscope_hub/constants.py index 6d1a0a3..6f968bc 100644 --- a/src/modelscope_hub/constants.py +++ b/src/modelscope_hub/constants.py @@ -280,12 +280,32 @@ def _env_bool(name: str, default: bool) -> bool: ] +# --------------------------------------------------------------------------- +# Branding +# --------------------------------------------------------------------------- +MODELSCOPE_ASCII = r""" + _ .-') _ .-') _ ('-. .-') _ (`-. ('-. +( '.( OO )_ ( ( OO) ) _( OO) ( OO ). ( (OO ) _( OO) + ,--. ,--.).-'),-----. \ .'_ (,------.,--. (_)---\_) .-----. .-'),-----. _.` \(,------. + | `.' |( OO' .-. ',`'--..._) | .---'| |.-') / _ | ' .--./ ( OO' .-. '(__...--'' | .---' + | |/ | | | || | \ ' | | | | OO )\ :` `. | |('-. / | | | | | / | | | | + | |'.'| |\_) | |\| || | ' |(| '--. | |`-' | '..`''.) /_) |OO )\_) | |\| | | |_.' |(| '--. + | | | | \ | | | || | / : | .--'(| '---.'.-._) \ || |`-'| \ | | | | | .___.' | .--' + | | | | `' '-' '| '--' / | `---.| | \ /(_' '--'\ `' '-' ' | | | `---. + `--' `--' `-----' `-------' `------'`------' `-----' `-----' `-----' `--' `------' +""" # noqa: E501 + + # --------------------------------------------------------------------------- # Filesystem layout # --------------------------------------------------------------------------- DEFAULT_CACHE_DIR_NAME: str = "modelscope" -TOKEN_FILE_NAME: str = "token" +SESSION_FILE_NAME: str = "session" CONFIG_DIR_NAME: str = ".modelscope" +CREDENTIALS_DIR_NAME: str = "credentials" +COOKIES_FILE_NAME: str = "cookies" +GIT_TOKEN_FILE_NAME: str = "git_token" +USER_INFO_FILE_NAME: str = "user" __all__ = [ @@ -317,8 +337,8 @@ def _env_bool(name: str, default: bool) -> bool: "OPENAPI_PREFIX", "RepoType", "StrEnum", + "SESSION_FILE_NAME", "TEMPORARY_FOLDER_NAME", - "TOKEN_FILE_NAME", "UPLOAD_ADAPTIVE_BATCH_SIZE", "UPLOAD_BLOB_CONNECT_TIMEOUT", "UPLOAD_BLOB_MAX_RETRIES", diff --git a/src/modelscope_hub/errors.py b/src/modelscope_hub/errors.py index 22c74e7..b7d69d5 100644 --- a/src/modelscope_hub/errors.py +++ b/src/modelscope_hub/errors.py @@ -318,6 +318,30 @@ class NotSupportedError(HubError): } +_CN_TO_EN: dict[str, str] = { + "该名称已被注册使用,请重新命名": "Repository name already exists. Please choose a different name.", + "用户未登录": "User not logged in.", + "user not logged in": "User not logged in.", + "更新模型失败": "Failed to update model.", + "参数错误:版本名称不能为空": "Invalid parameter: tag name cannot be empty.", + "模型不存在": "Model does not exist.", + "数据集不存在": "Dataset does not exist.", + "创建空间失败": "Failed to create studio.", + "the current token no longer supports deletion operations. Please go to the site page : https://www.modelscope.cn to delete": + "Deletion is restricted to web console. Visit https://modelscope.cn to delete.", +} + + +def _translate_message(msg: str) -> str: + """Translate known Chinese server messages to English.""" + if not msg: + return msg + for cn, en in _CN_TO_EN.items(): + if cn in msg: + return en + return msg + + def _extract_payload(response: "Response") -> tuple[str, str | None, Any | None]: """Best-effort extraction of (message, request_id, body) from a response.""" request_id = response.headers.get("x-request-id") or response.headers.get("X-Request-Id") @@ -336,7 +360,7 @@ def _extract_payload(response: "Response") -> tuple[str, str | None, Any | None] body = response.text or None if isinstance(body, str) and body.strip(): message = body.strip().splitlines()[0][:500] - return message, request_id, body + return _translate_message(message), request_id, body if isinstance(body, dict): for key in ("message", "Message", "msg", "Msg", "error", "Error", "detail", "Detail"): @@ -348,7 +372,7 @@ def _extract_payload(response: "Response") -> tuple[str, str | None, Any | None] body.get("request_id") or body.get("requestId") or body.get("RequestId") or request_id ) - return message, request_id, body + return _translate_message(message), request_id, body def raise_for_status(response: "Response") -> None: diff --git a/src/modelscope_hub/utils/__init__.py b/src/modelscope_hub/utils/__init__.py index 726e337..79b0c4f 100644 --- a/src/modelscope_hub/utils/__init__.py +++ b/src/modelscope_hub/utils/__init__.py @@ -6,9 +6,46 @@ from .logger import get_logger __all__ = [ + "build_user_agent", "compute_hash", "ensure_dir", "get_cache_dir", "get_file_size", "get_logger", ] + + +def build_user_agent( + session_id: str | None = None, + extra: dict | str | None = None, +) -> str: + """Build the standard ModelScope Hub SDK User-Agent string. + + Parameters + ---------- + session_id : str, optional + Stable session UUID (from ``HubConfig.get_session_id()``). + Falls back to a random UUID if not provided. + extra : dict, str or None + Additional key/value pairs or free-form string appended to the UA. + """ + import os + import platform + import uuid + + from .. import __version__ + + env = os.environ.get("MODELSCOPE_CLOUD_ENVIRONMENT", "custom") + user_name = os.environ.get("MODELSCOPE_CLOUD_USERNAME", "unknown") + sid = session_id or uuid.uuid4().hex + + ua = ( + f"modelscope_hub/{__version__}; python/{platform.python_version()}; " + f"session_id/{sid}; platform/{platform.platform()}; " + f"processor/{platform.processor()}; env/{env}; user/{user_name}" + ) + if isinstance(extra, dict): + ua += "; " + "; ".join(f"{k}/{v}" for k, v in extra.items()) + elif isinstance(extra, str): + ua += "; " + extra + return ua diff --git a/tests/integration/test_remote_file_ops.py b/tests/integration/test_remote_file_ops.py index 4da7eea..7ae6565 100644 --- a/tests/integration/test_remote_file_ops.py +++ b/tests/integration/test_remote_file_ops.py @@ -62,6 +62,10 @@ def test_download_file(self, tmp_path): assert local_path.exists() assert content == "hello modelscope" + @pytest.mark.xfail( + reason="Server restricts file deletion to cookie-based session auth; " + "API tokens get 401 'token no longer supports deletion operations'" + ) def test_delete_files(self): """delete_files removes the file from the repo.""" print(f"\n** repo_id: {self.repo_id}") @@ -70,6 +74,7 @@ def test_delete_files(self): self.repo_id, "model", ["test_file.txt"], commit_message="cleanup" ) print(f"** delete_files response: {result}") + assert "test_file.txt" in result["deleted_files"] files = self.api.list_repo_files(self.repo_id, "model") paths = [f.path for f in files] print(f"** Files after deletion: {paths}") @@ -106,7 +111,7 @@ def setup_repo_with_commit(self, api, test_owner, unique_repo_name): def test_list_repo_revisions(self): """list_repo_revisions contains the master branch.""" revisions = self.api.list_repo_revisions(self.repo_id, "model") - names = [r.get("name") or r.get("Name") or "" for r in revisions] + names = [r.get("Revision") or r.get("name") or r.get("Name") or "" for r in revisions] print(f"\n** repo_id: {self.repo_id}") print(f"** list_repo_revisions returned {len(revisions)} revision(s): {names}") print(f"** raw response: {revisions}") @@ -121,7 +126,7 @@ def test_create_repo_tag(self): result = self.api.create_repo_tag(self.repo_id, "model", "v1.0") print(f"** create_repo_tag response: {result}") revisions = self.api.list_repo_revisions(self.repo_id, "model") - names = [r.get("name") or r.get("Name") or "" for r in revisions] + names = [r.get("Revision") or r.get("name") or r.get("Name") or "" for r in revisions] print(f"** Revisions after tagging: {names}") assert "v1.0" in names diff --git a/tests/integration/test_remote_repo.py b/tests/integration/test_remote_repo.py index cd95107..89b1021 100644 --- a/tests/integration/test_remote_repo.py +++ b/tests/integration/test_remote_repo.py @@ -9,6 +9,10 @@ @pytest.mark.remote +@pytest.mark.xfail( + reason="Server restricts repo deletion to cookie-based session auth; " + "API tokens get 401 'token no longer supports deletion operations'" +) class TestRemoteRepoLifecycle: """Test full repo lifecycle: create → info → exists → delete.""" diff --git a/tests/integration/test_sdk_api.py b/tests/integration/test_sdk_api.py index 561ee76..e446e7f 100644 --- a/tests/integration/test_sdk_api.py +++ b/tests/integration/test_sdk_api.py @@ -22,6 +22,7 @@ class TestRepoManagement: """Test HubApi repo CRUD operations directly.""" + @pytest.mark.xfail(reason="Server restricts deletion to cookie-based session auth") def test_create_get_delete_model(self, api, test_owner, unique_repo_name): repo_id = f"{test_owner}/{unique_repo_name}" try: @@ -172,6 +173,7 @@ def test_list_repo_files(self): paths = [f.path for f in files] assert "list_test.txt" in paths + @pytest.mark.xfail(reason="Server restricts file deletion to cookie-based session auth") def test_delete_files(self): self.api.upload_file( self.repo_id, "model", b"del", "to_delete.txt", commit_message="del", @@ -225,13 +227,13 @@ def test_list_revisions(self): revisions = self.api.list_repo_revisions(self.repo_id, "model") assert isinstance(revisions, list) assert len(revisions) > 0 - names = [r.get("name") or r.get("Name") or "" for r in revisions] + names = [r.get("Revision") or r.get("name") or r.get("Name") or "" for r in revisions] assert "master" in names def test_create_tag(self): self.api.create_repo_tag(self.repo_id, "model", "v1.0") revisions = self.api.list_repo_revisions(self.repo_id, "model") - names = [r.get("name") or r.get("Name") or "" for r in revisions] + names = [r.get("Revision") or r.get("name") or r.get("Name") or "" for r in revisions] assert "v1.0" in names