Skip to content
76 changes: 62 additions & 14 deletions src/modelscope_hub/_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import hashlib
import io
import os
import platform
import time
import uuid
from concurrent.futures import ThreadPoolExecutor, as_completed
Expand Down Expand Up @@ -287,21 +286,12 @@ def __init__(self, legacy_client: "LegacyClient", config: "HubConfig") -> None:
# User-agent & headers
# ------------------------------------------------------------------
def _build_user_agent(self, user_agent: dict | str | None = None) -> str:
from .version import __version__
from .utils import build_user_agent

env = os.environ.get("MODELSCOPE_CLOUD_ENVIRONMENT", "custom")
user_name = os.environ.get("MODELSCOPE_CLOUD_USERNAME", "unknown")

ua = (
f"modelscope_hub/{__version__}; python/{platform.python_version()}; "
f"session_id/{uuid.uuid4().hex}; platform/{platform.platform()}; "
f"processor/{platform.processor()}; env/{env}; user/{user_name}"
return build_user_agent(
session_id=self._config.get_session_id(),
extra=user_agent,
)
if isinstance(user_agent, dict):
ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items())
elif isinstance(user_agent, str):
ua += "; " + user_agent
return ua

def _detect_region(self) -> str:
"""Detect Alibaba cloud region ID for intra-cloud acceleration."""
Expand Down Expand Up @@ -536,6 +526,14 @@ def download_repo(
cache_dir=str(output_dir),
)

if repo_type in ("skill", "skills"):
return self._download_archive(
repo_id=repo_id,
repo_type=repo_type,
revision=revision,
output_dir=output_dir,
)

if repo_type in ("dataset", "datasets"):
files = self._client.list_dataset_files_paginated(
repo_id=repo_id,
Expand Down Expand Up @@ -609,6 +607,56 @@ def download_repo(

return output_dir

# ------------------------------------------------------------------
# Internal: archive-based download (skills)
# ------------------------------------------------------------------
def _download_archive(
self,
repo_id: str,
repo_type: str,
revision: str,
output_dir: Path,
) -> Path:
"""Download a repo via its zip archive endpoint and extract.

Skill repos do not support per-file ``/repo?FilePath=...`` download.
The old SDK uses ``/archive/zip/{revision}`` for these.
"""
import shutil
import tempfile
import zipfile

tmp_path: Path | None = None
try:
resp = self._client.download_archive(
repo_id=repo_id,
repo_type=repo_type,
revision=revision,
)

with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp:
tmp_path = Path(tmp.name)
for chunk in resp.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
if chunk:
tmp.write(chunk)

with zipfile.ZipFile(tmp_path, "r") as zf:
zf.extractall(output_dir)

# Flatten if zip has a single top-level directory
entries = [e for e in output_dir.iterdir()]
if len(entries) == 1 and entries[0].is_dir():
nested = entries[0]
for item in nested.iterdir():
shutil.move(str(item), str(output_dir / item.name))
nested.rmdir()
finally:
if tmp_path is not None:
tmp_path.unlink(missing_ok=True)

logger.info("Extracted archive for %s to %s", repo_id, output_dir)
return output_dir

# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
Expand Down
95 changes: 89 additions & 6 deletions src/modelscope_hub/_legacy_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,16 @@ def __init__(
endpoint: str,
timeout: int = API_TIMEOUT,
max_retries: int = API_MAX_RETRIES,
user_agent: str | None = None,
) -> None:
self._token = token
self._endpoint = endpoint.rstrip("/")
self._timeout = timeout
self._session_authenticated = False

self._session = requests.Session()
if user_agent:
self._session.headers["User-Agent"] = user_agent
retry = Retry(
total=max_retries,
backoff_factor=0.5,
Expand Down Expand Up @@ -202,8 +205,8 @@ def _json_data(self, resp: requests.Response) -> Any:
# ------------------------------------------------------------------
# Auth
# ------------------------------------------------------------------
def login(self, access_token: str) -> dict:
"""Authenticate via access token and return user info + git token.
def login(self, access_token: str) -> tuple[dict, "requests.cookies.RequestsCookieJar"]:
"""Authenticate via access token and return (user_data, cookies).

POST /api/v1/login

Expand All @@ -218,7 +221,7 @@ def login(self, access_token: str) -> dict:
self._session.cookies.clear()
self._ensure_session_auth()
resp = self._request("POST", "login", json_body={"AccessToken": access_token})
return self._json_data(resp)
return self._json_data(resp), resp.cookies

# ------------------------------------------------------------------
# Repo CRUD (model / dataset)
Expand Down Expand Up @@ -365,10 +368,42 @@ def create_tag(
POST /api/v1/{type}s/{repo_id}/repo/tag
"""
segment = _resolve_segment(repo_type)
body = {"Tag": tag, "Revision": revision}
body = {"TagName": tag, "Ref": revision}
resp = self._request("POST", f"{segment}/{repo_id}/repo/tag", json_body=body)
return self._json_data(resp)

# ------------------------------------------------------------------
# File deletion
# ------------------------------------------------------------------
def delete_file(
self,
repo_id: str,
repo_type: str,
file_path: str,
revision: str = "master",
) -> dict:
"""Delete a single file from the repository.

DELETE /api/v1/{type}s/{owner}/{name}/file?FilePath=...&Revision=...
(for models)
DELETE /api/v1/datasets/{owner}/{name}/repo?FilePath=...
(for datasets)
"""
segment = _resolve_segment(repo_type)
if repo_type == RepoType.DATASET:
resp = self._request(
"DELETE",
f"{segment}/{repo_id}/repo",
params={"FilePath": file_path},
)
else:
resp = self._request(
"DELETE",
f"{segment}/{repo_id}/file",
params={"FilePath": file_path, "Revision": revision},
)
return self._json_data(resp)

# ------------------------------------------------------------------
# Git Commits
# ------------------------------------------------------------------
Expand Down Expand Up @@ -447,17 +482,24 @@ def upload_blob(
*,
headers: dict[str, str] | None = None,
timeout: int | None = None,
) -> requests.Response:
) -> dict:
"""Upload a blob to the presigned URL returned by :meth:`validate_blobs`.

PUT {upload_url}

Sends both ``Authorization: Bearer`` and ``Cookie: m_session_id``
headers to authenticate against the LFS domain (which may differ
from the main API domain).

Returns the parsed JSON response body on success.
"""
upload_headers: dict[str, str] = {
"Content-Length": str(size),
"X-Request-ID": uuid.uuid4().hex,
}
if self._token:
upload_headers["Authorization"] = f"Bearer {self._token}"
upload_headers["Cookie"] = f"m_session_id={self._token}"
if headers:
upload_headers.update(headers)

Expand All @@ -474,7 +516,22 @@ def upload_blob(
raise RequestTimeoutError(f"Blob upload timed out: {exc}") from exc

raise_for_status(resp)
return resp

# Presigned URLs (cloud storage) may return empty bodies on success.
try:
body = resp.json()
except (ValueError, RuntimeError):
return {}
if isinstance(body, dict) and body.get("Code") not in (200, "200", None):
from .errors import APIError
raise APIError(
body.get("Message") or body.get("message") or f"Blob upload failed (Code={body.get('Code')})",
status_code=resp.status_code,
response_body=body,
url=upload_url,
method="PUT",
)
return body

# ------------------------------------------------------------------
# Raw Download URL
Expand Down Expand Up @@ -520,6 +577,32 @@ def get_collection(
resp = self._request("GET", "collections", params=params)
return self._json_data(resp)

# ------------------------------------------------------------------
# Archive Download (skill repos)
# ------------------------------------------------------------------
def download_archive(
self,
repo_id: str,
repo_type: str,
revision: str = "master",
headers: dict[str, str] | None = None,
) -> requests.Response:
"""Download the entire repo as a zip archive.

GET /api/v1/{type}s/{repo_id}/archive/zip/{revision}

Skills (and potentially other repo types) do not support per-file
download via ``/repo?FilePath=...``. This method streams the
archive endpoint instead.
"""
segment = _resolve_segment(repo_type)
return self._request(
"GET",
f"{segment}/{repo_id}/archive/zip/{revision}",
headers=headers,
stream=True,
)

# ------------------------------------------------------------------
# Raw Download URL
# ------------------------------------------------------------------
Expand Down
8 changes: 7 additions & 1 deletion src/modelscope_hub/_openapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ def _url(self, path: str) -> str:

def _auth_headers(self, *, require_token: bool = False) -> dict[str, str]:
token = self._config.token
if not token:
token = self._config.load_token()
if token:
self._config.token = token
if not token:
if require_token:
raise AuthenticationError(
Expand Down Expand Up @@ -555,10 +559,12 @@ def deploy_mcp_server(
payload: DeployMcpServerPayload | Mapping[str, Any] | None = None,
) -> JSON:
"""``POST /mcp/servers/{id}/deploy`` — deploy an MCP server for the caller."""
body = dict(payload or {})
body.setdefault("transport_type", "sse")
return self._request(
"POST",
f"/mcp/servers/{server_id}/deploy",
json_body=dict(payload or {}),
json_body=body,
)

def undeploy_mcp_server(self, server_id: str | int) -> JSON:
Expand Down
Loading