Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/tutorials/create_a_dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
" ),\n",
" \"Permeability\": ColumnAnnotation(\n",
" description=\"MDR1-MDCK efflux ratio (B-A/A-B)\", \n",
" user_attributes={\"Unit\": \"\tmL/min/kg\"}\n",
" user_attributes={\"Unit\": \"mL/min/kg\"}\n",
" )\n",
" },\n",
" \n",
Expand Down
108 changes: 108 additions & 0 deletions docs/tutorials/create_a_model.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A model in Polaris centralizes all data about a method and can be attached to different results.\n",
"\n",
"## Create a Model\n",
"\n",
"To create a model, you need to instantiate the `Model` class. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from polaris.model import Model\n",
"\n",
"# Create a new Model Card\n",
"model = Model(\n",
" name=\"MolGPS\", \n",
" description=\"Graph transformer foundation model for molecular modeling\",\n",
" code_url=\"https://github.com/datamol-io/graphium\"\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Share your model\n",
"Want to share your model with the community? Upload it to the Polaris Hub!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.upload_to_hub(owner=\"your-username\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Attach a model with a result\n",
"\n",
"The model card can then be attached to a newly created result on upload."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from polaris import load_benchmark, load_model\n",
"\n",
"# Load a benchmark\n",
"benchmark = load_benchmark(\"polaris/hello-world-benchmark\")\n",
"\n",
"# Get the results\n",
"results = benchmark.evaluate(...)\n",
"\n",
"# Attach it to the result\n",
"results.model = load_model(\"recursion/MolGPS\")\n",
"\n",
"# Upload the results\n",
"results.upload_to_hub(owner=\"your-username\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"\n",
"The End. "
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
4 changes: 2 additions & 2 deletions polaris/dataset/_subset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from copy import deepcopy
from copy import copy
from typing import Callable, Iterable, Literal, Sequence

import numpy as np
Expand Down Expand Up @@ -225,7 +225,7 @@ def as_dataframe(self) -> pd.DataFrame:

def copy(self) -> Self:
"""Returns a copy of the subset."""
return deepcopy(self)
return copy(self)

def extend_inputs(self, input_cols: Iterable[str] | str) -> Self:
"""
Expand Down
11 changes: 8 additions & 3 deletions polaris/evaluate/_metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import datetime

from pydantic import Field, PrivateAttr
from pydantic import Field, PrivateAttr, computed_field

from polaris._artifact import BaseArtifactModel
from polaris.utils.dict2html import dict2html
Expand All @@ -20,8 +20,8 @@ class ResultsMetadataV1(BaseArtifactModel):
"""

# Additional metadata
github_url: HttpUrlString | None = None
paper_url: HttpUrlString | None = None
github_url: HttpUrlString | None = Field(None, alias="code_url")
paper_url: HttpUrlString | None = Field(None, alias="report_url")
contributors: list[HubUser] = Field(default_factory=list)

# Private attributes
Expand Down Expand Up @@ -52,6 +52,11 @@ class ResultsMetadataV2(BaseArtifactModel):
# Private attributes
_created_at: datetime = PrivateAttr(default_factory=datetime.now)

@computed_field
@property
def model_artifact_id(self) -> str:
return self.model.artifact_id

def _repr_html_(self) -> str:
return dict2html(self.model_dump())

Expand Down
40 changes: 29 additions & 11 deletions polaris/hub/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from polaris.competition import CompetitionSpecification
from polaris.model import Model
from polaris.dataset import Dataset, DatasetV1, DatasetV2
from polaris.evaluate import BenchmarkResults, CompetitionPredictions
from polaris.evaluate import BenchmarkResultsV1, BenchmarkResultsV2, CompetitionPredictions
from polaris.hub.external_client import ExternalAuthClient
from polaris.hub.oauth import CachedTokenAuth
from polaris.hub.settings import PolarisHubSettings
Expand Down Expand Up @@ -265,7 +265,7 @@ def list_datasets(self, limit: int = 100, offset: int = 0) -> list[str]:
offset: The offset from which to start returning datasets.

Returns:
A list of dataset names in the format `owner/dataset_name`.
A list of dataset names in the format `owner/dataset_slug`.
"""
with track_progress(description="Fetching datasets", total=1):
# Step 1: Fetch enough v2 datasets to cover the offset and limit
Expand Down Expand Up @@ -383,22 +383,23 @@ def _get_v2_dataset(self, owner: str | HubOwner, slug: str) -> DatasetV2:
return dataset

def list_benchmarks(self, limit: int = 100, offset: int = 0) -> list[str]:
"""List all available benchmarks on the Polaris Hub.
"""List all available benchmarks (v1 and v2) on the Polaris Hub.
We prioritize v2 benchmarks over v1 benchmarks.

Args:
limit: The maximum number of benchmarks to return.
offset: The offset from which to start returning benchmarks.

Returns:
A list of benchmark names in the format `owner/benchmark_name`.
A list of benchmark names in the format `owner/benchmark_slug`.
"""
with track_progress(description="Fetching benchmarks", total=1):
# Step 1: Fetch enough v2 benchmarks to cover the offset and limit
v2_json_response = self._base_request_to_hub(
url="/v2/benchmark", method="GET", params={"limit": limit, "offset": offset}
).json()
v2_data = v2_json_response["data"]
v2_benchmarks = [f"{HubOwner(**benchmark['owner'])}/{benchmark['name']}" for benchmark in v2_data]
v2_benchmarks = [benchmark["artifactId"] for benchmark in v2_data]

# If v2 benchmarks satisfy the limit, return them
if len(v2_benchmarks) == limit:
Expand All @@ -416,7 +417,7 @@ def list_benchmarks(self, limit: int = 100, offset: int = 0) -> list[str]:
},
).json()
v1_data = v1_json_response["data"]
v1_benchmarks = [f"{HubOwner(**benchmark['owner'])}/{benchmark['name']}" for benchmark in v1_data]
v1_benchmarks = [benchmark["artifactId"] for benchmark in v1_data]

# Combine the v2 and v1 benchmarks
combined_benchmarks = v2_benchmarks + v1_benchmarks
Expand Down Expand Up @@ -491,11 +492,11 @@ def _get_v2_benchmark(self, owner: str | HubOwner, slug: str) -> BenchmarkV2Spec
with StorageSession(self, "read", BenchmarkV2Specification.urn_for(owner, slug)) as storage:
split = {label: storage.get_file(label) for label in response_data.get("split", {}).keys()}

return BenchmarkV2Specification(**response_data, split=split)
return BenchmarkV2Specification(**{**response_data, "split": split})

def upload_results(
self,
results: BenchmarkResults,
results: BenchmarkResultsV1 | BenchmarkResultsV2,
access: AccessType = "private",
owner: HubOwner | str | None = None,
):
Expand Down Expand Up @@ -911,6 +912,24 @@ def submit_competition_predictions(
)
return response

def list_models(self, limit: int = 100, offset: int = 0) -> list[str]:
"""List all available models on the Polaris Hub.

Args:
limit: The maximum number of models to return.
offset: The offset from which to start returning models.

Returns:
A list of models names in the format `owner/model_slug`.
"""
with track_progress(description="Fetching models", total=1):
json_response = self._base_request_to_hub(
url="/v2/model", method="GET", params={"limit": limit, "offset": offset}
).json()
models = [model["artifactId"] for model in json_response["data"]]

return models

def get_model(self, artifact_id: str) -> Model:
url = f"/v2/model/{artifact_id}"
response = self._base_request_to_hub(url=url, method="GET")
Expand Down Expand Up @@ -947,9 +966,8 @@ def upload_model(
model_json = model.model_dump(by_alias=True, exclude_none=True)

# Make a request to the Hub
response = self._base_request_to_hub(
url="/v2/model", method="POST", json={"access": access, **model_json}
)
url = f"/v2/model/{model.artifact_id}"
response = self._base_request_to_hub(url=url, method="PUT", json={"access": access, **model_json})

# Inform the user about where to find their newly created artifact.
model_url = urljoin(self.settings.hub_url, response.headers.get("Content-Location"))
Expand Down
Loading