From 636e20c80743aaaeb6ab6e477ceeaa6d17a40338 Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Mon, 24 Feb 2025 08:02:24 -0500 Subject: [PATCH 01/13] Alias github_url to code_url --- polaris/evaluate/_metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polaris/evaluate/_metadata.py b/polaris/evaluate/_metadata.py index 2ab38e21..cabc7ec9 100644 --- a/polaris/evaluate/_metadata.py +++ b/polaris/evaluate/_metadata.py @@ -20,7 +20,7 @@ class ResultsMetadataV1(BaseArtifactModel): """ # Additional metadata - github_url: HttpUrlString | None = None + github_url: HttpUrlString | None = Field(None, alias="code_url") paper_url: HttpUrlString | None = None contributors: list[HubUser] = Field(default_factory=list) From 2dcf287668656b7e9ae90d4cda6e602ebe328196 Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Mon, 24 Feb 2025 09:34:06 -0500 Subject: [PATCH 02/13] wip tutorial --- docs/tutorials/create_a_model.ipynb | 136 ++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 docs/tutorials/create_a_model.ipynb diff --git a/docs/tutorials/create_a_model.ipynb b/docs/tutorials/create_a_model.ipynb new file mode 100644 index 00000000..94cb65f9 --- /dev/null +++ b/docs/tutorials/create_a_model.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A model in Polaris centralizes all data about a method and can be attached to different results.\n", + "\n", + "## Create a Model\n", + "\n", + "To create a model, you need to instantiate the `Model` class. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from polaris.model import Model\n", + "\n", + "# Create a new Model Card\n", + "model = Model(\n", + " name=\"MolGPS\", \n", + " description=\"Graph transformer foundation model for molecular modeling\",\n", + " code_url=\"https://github.com/datamol-io/graphium\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Share your model\n", + "Want to share your model with the community? Upload it to the Polaris Hub!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.upload_to_hub(owner=\"your-username\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Attach a model with a result\n", + "\n", + "The model card can then be attached to a newly created result on upload." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import polaris as po\n", + "\n", + "# Load a benchmark\n", + "benchmark = po.load_benchmark(\"polaris/hello-world\")\n", + "\n", + "# Get the results\n", + "results = benchmark.evaluate(...)\n", + "\n", + "# Attach it to the result\n", + "results.model = po.load_model(\"recursion/MolGPS\")\n", + "\n", + "# Upload the results\n", + "results.upload_to_hub(owner=\"dummy-user\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, you could also do" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the results\n", + "results = benchmark.evaluate(...)\n", + "\n", + "# Attach it to the result\n", + "results.model = Model(\n", + " name=\"MolGPS\",\n", + " owner=\"recursion\",\n", + " description=\"Graph transformer foundation model for molecular modeling\",\n", + " code_url=\"https://github.com/datamol-io/graphium\"\n", + ")\n", + "\n", + "# Upload the results\n", + "results.upload_to_hub(owner=\"dummy-user\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "The End. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 3444f647cdaf660588ff7953f715cea4a9c2e138 Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Mon, 24 Feb 2025 10:58:59 -0500 Subject: [PATCH 03/13] Alias paper_url to report_url --- polaris/evaluate/_metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polaris/evaluate/_metadata.py b/polaris/evaluate/_metadata.py index cabc7ec9..effe6931 100644 --- a/polaris/evaluate/_metadata.py +++ b/polaris/evaluate/_metadata.py @@ -21,7 +21,7 @@ class ResultsMetadataV1(BaseArtifactModel): # Additional metadata github_url: HttpUrlString | None = Field(None, alias="code_url") - paper_url: HttpUrlString | None = None + paper_url: HttpUrlString | None = Field(None, alias="report_url") contributors: list[HubUser] = Field(default_factory=list) # Private attributes From 1bef77278667165742cb0d77cd5b27acf6c50e41 Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Mon, 24 Feb 2025 12:07:33 -0500 Subject: [PATCH 04/13] Add list_models method in client --- polaris/hub/client.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/polaris/hub/client.py b/polaris/hub/client.py index e3b43d4b..dcfd7103 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -891,6 +891,24 @@ def submit_competition_predictions( ) return response + def list_models(self, limit: int = 100, offset: int = 0) -> list[str]: + """List all available models on the Polaris Hub. + + Args: + limit: The maximum number of models to return. + offset: The offset from which to start returning models. + + Returns: + A list of models names in the format `owner/model_slug`. + """ + with track_progress(description="Fetching models", total=1): + json_response = self._base_request_to_hub( + url="/v2/model", method="GET", params={"limit": limit, "offset": offset} + ).json() + models = [model["artifactId"] for model in json_response["data"]] + + return models + def get_model(self, artifact_id: str) -> Model: url = f"/v2/model/{artifact_id}" response = self._base_request_to_hub(url=url, method="GET") From 19cef26501144405b2ddd1d584201aa168a905c9 Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Mon, 24 Feb 2025 12:07:58 -0500 Subject: [PATCH 05/13] Update docstring --- polaris/hub/client.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/polaris/hub/client.py b/polaris/hub/client.py index dcfd7103..421420ca 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -265,7 +265,7 @@ def list_datasets(self, limit: int = 100, offset: int = 0) -> list[str]: offset: The offset from which to start returning datasets. Returns: - A list of dataset names in the format `owner/dataset_name`. + A list of dataset names in the format `owner/dataset_slug`. """ with track_progress(description="Fetching datasets", total=1): # Step 1: Fetch enough v2 datasets to cover the offset and limit @@ -383,14 +383,15 @@ def _get_v2_dataset(self, owner: str | HubOwner, name: str) -> DatasetV2: return dataset def list_benchmarks(self, limit: int = 100, offset: int = 0) -> list[str]: - """List all available benchmarks on the Polaris Hub. + """List all available benchmarks (v1 and v2) on the Polaris Hub. + We prioritize v2 benchmarks over v1 benchmarks. Args: limit: The maximum number of benchmarks to return. offset: The offset from which to start returning benchmarks. Returns: - A list of benchmark names in the format `owner/benchmark_name`. + A list of benchmark names in the format `owner/benchmark_slug`. """ with track_progress(description="Fetching benchmarks", total=1): # Step 1: Fetch enough v2 benchmarks to cover the offset and limit From 62eb98c5361c8635f7da7eec24e87ca25ad0e90f Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Mon, 24 Feb 2025 12:08:10 -0500 Subject: [PATCH 06/13] Make use of artifactId --- polaris/hub/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polaris/hub/client.py b/polaris/hub/client.py index 421420ca..2e64720d 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -399,7 +399,7 @@ def list_benchmarks(self, limit: int = 100, offset: int = 0) -> list[str]: url="/v2/benchmark", method="GET", params={"limit": limit, "offset": offset} ).json() v2_data = v2_json_response["data"] - v2_benchmarks = [f"{HubOwner(**benchmark['owner'])}/{benchmark['name']}" for benchmark in v2_data] + v2_benchmarks = [benchmark["artifactId"] for benchmark in v2_data] # If v2 benchmarks satisfy the limit, return them if len(v2_benchmarks) == limit: @@ -417,7 +417,7 @@ def list_benchmarks(self, limit: int = 100, offset: int = 0) -> list[str]: }, ).json() v1_data = v1_json_response["data"] - v1_benchmarks = [f"{HubOwner(**benchmark['owner'])}/{benchmark['name']}" for benchmark in v1_data] + v1_benchmarks = [benchmark["artifactId"] for benchmark in v1_data] # Combine the v2 and v1 benchmarks combined_benchmarks = v2_benchmarks + v1_benchmarks From 93d7af16c2cebadbc9b0dc8adb9ac493f1ac4b03 Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Tue, 25 Feb 2025 07:26:55 -0500 Subject: [PATCH 07/13] fix url --- polaris/hub/client.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/polaris/hub/client.py b/polaris/hub/client.py index 2e64720d..e4bfcb34 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -946,9 +946,8 @@ def upload_model( model_json = model.model_dump(by_alias=True, exclude_none=True) # Make a request to the Hub - response = self._base_request_to_hub( - url="/v2/model", method="POST", json={"access": access, **model_json} - ) + url = f"/v2/model/{model.artifact_id}" + response = self._base_request_to_hub(url=url, method="PUT", json={"access": access, **model_json}) # Inform the user about where to find their newly created artifact. model_url = urljoin(self.settings.hub_url, response.headers.get("Content-Location")) From abdcdbd4a68e17c597a5263ec8a39ee1f73c9e5f Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Tue, 25 Feb 2025 07:58:36 -0500 Subject: [PATCH 08/13] update tutorial --- docs/tutorials/create_a_dataset.ipynb | 2 +- docs/tutorials/create_a_model.ipynb | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/tutorials/create_a_dataset.ipynb b/docs/tutorials/create_a_dataset.ipynb index d237c9dc..04857d01 100644 --- a/docs/tutorials/create_a_dataset.ipynb +++ b/docs/tutorials/create_a_dataset.ipynb @@ -42,7 +42,7 @@ " ),\n", " \"Permeability\": ColumnAnnotation(\n", " description=\"MDR1-MDCK efflux ratio (B-A/A-B)\", \n", - " user_attributes={\"Unit\": \"\tmL/min/kg\"}\n", + " user_attributes={\"Unit\": \"mL/min/kg\"}\n", " )\n", " },\n", " \n", diff --git a/docs/tutorials/create_a_model.ipynb b/docs/tutorials/create_a_model.ipynb index 94cb65f9..f3890af1 100644 --- a/docs/tutorials/create_a_model.ipynb +++ b/docs/tutorials/create_a_model.ipynb @@ -59,19 +59,19 @@ "metadata": {}, "outputs": [], "source": [ - "import polaris as po\n", + "from polaris import load_benchmark, load_model\n", "\n", "# Load a benchmark\n", - "benchmark = po.load_benchmark(\"polaris/hello-world\")\n", + "benchmark = load_benchmark(\"polaris/hello-world-benchmark\")\n", "\n", "# Get the results\n", "results = benchmark.evaluate(...)\n", "\n", "# Attach it to the result\n", - "results.model = po.load_model(\"recursion/MolGPS\")\n", + "results.model = load_model(\"recursion/MolGPS\")\n", "\n", "# Upload the results\n", - "results.upload_to_hub(owner=\"dummy-user\")" + "results.upload_to_hub(owner=\"your-username\")" ] }, { @@ -99,7 +99,7 @@ ")\n", "\n", "# Upload the results\n", - "results.upload_to_hub(owner=\"dummy-user\")" + "results.upload_to_hub(owner=\"your-username\")" ] }, { From 59273171a75cd5804515ce03f5a556af5ae47896 Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Wed, 26 Feb 2025 14:01:43 -0500 Subject: [PATCH 09/13] fix error: BenchmarkV2Specification() got multiple values for keyword argument 'split' --- polaris/hub/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polaris/hub/client.py b/polaris/hub/client.py index e4bfcb34..86fce335 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -492,7 +492,7 @@ def _get_v2_benchmark(self, owner: str | HubOwner, name: str) -> BenchmarkV2Spec with StorageSession(self, "read", BenchmarkV2Specification.urn_for(owner, name)) as storage: split = {label: storage.get_file(label) for label in response_data.get("split", {}).keys()} - return BenchmarkV2Specification(**response_data, split=split) + return BenchmarkV2Specification(**{**response_data, "split": split}) def upload_results( self, From 9d3385aced2bab2d32aa705403f87bd948034d8f Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Wed, 26 Feb 2025 17:40:50 -0500 Subject: [PATCH 10/13] add model_artifact_id computed property --- polaris/evaluate/_metadata.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/polaris/evaluate/_metadata.py b/polaris/evaluate/_metadata.py index effe6931..b0f9957d 100644 --- a/polaris/evaluate/_metadata.py +++ b/polaris/evaluate/_metadata.py @@ -1,6 +1,6 @@ from datetime import datetime -from pydantic import Field, PrivateAttr +from pydantic import Field, PrivateAttr, computed_field from polaris._artifact import BaseArtifactModel from polaris.utils.dict2html import dict2html @@ -52,6 +52,11 @@ class ResultsMetadataV2(BaseArtifactModel): # Private attributes _created_at: datetime = PrivateAttr(default_factory=datetime.now) + @computed_field + @property + def model_artifact_id(self) -> str: + return self.model.artifact_id + def _repr_html_(self) -> str: return dict2html(self.model_dump()) From 624b0921e96127e9f7b9fc7aa9284ac94a9d6d6c Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Thu, 27 Feb 2025 10:33:59 -0500 Subject: [PATCH 11/13] fix typing --- polaris/hub/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polaris/hub/client.py b/polaris/hub/client.py index 86fce335..78e5c7ed 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -24,7 +24,7 @@ from polaris.competition import CompetitionSpecification from polaris.model import Model from polaris.dataset import Dataset, DatasetV1, DatasetV2 -from polaris.evaluate import BenchmarkResults, CompetitionPredictions +from polaris.evaluate import BenchmarkResultsV1, BenchmarkResultsV2, CompetitionPredictions from polaris.hub.external_client import ExternalAuthClient from polaris.hub.oauth import CachedTokenAuth from polaris.hub.settings import PolarisHubSettings @@ -496,7 +496,7 @@ def _get_v2_benchmark(self, owner: str | HubOwner, name: str) -> BenchmarkV2Spec def upload_results( self, - results: BenchmarkResults, + results: BenchmarkResultsV1 | BenchmarkResultsV2, access: AccessType = "private", owner: HubOwner | str | None = None, ): From 36d702cbdc6db26cf4a4f425121c0d04e024d84f Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Thu, 27 Feb 2025 10:46:20 -0500 Subject: [PATCH 12/13] update tutorial --- docs/tutorials/create_a_model.ipynb | 30 +---------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/docs/tutorials/create_a_model.ipynb b/docs/tutorials/create_a_model.ipynb index f3890af1..c98c0956 100644 --- a/docs/tutorials/create_a_model.ipynb +++ b/docs/tutorials/create_a_model.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -74,34 +74,6 @@ "results.upload_to_hub(owner=\"your-username\")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Alternatively, you could also do" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get the results\n", - "results = benchmark.evaluate(...)\n", - "\n", - "# Attach it to the result\n", - "results.model = Model(\n", - " name=\"MolGPS\",\n", - " owner=\"recursion\",\n", - " description=\"Graph transformer foundation model for molecular modeling\",\n", - " code_url=\"https://github.com/datamol-io/graphium\"\n", - ")\n", - "\n", - "# Upload the results\n", - "results.upload_to_hub(owner=\"your-username\")" - ] - }, { "cell_type": "markdown", "metadata": {}, From c07a41c3409a1b31aef0dbe2821d01cc02771ab9 Mon Sep 17 00:00:00 2001 From: Honore Hounwanou Date: Thu, 27 Feb 2025 12:36:10 -0500 Subject: [PATCH 13/13] Fix maximum recursion depth exceeded by making a shallow copy instead of a deepcopy Co-authored-by: Cas Wognum Co-authored-by: Julien St-Laurent --- polaris/dataset/_subset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polaris/dataset/_subset.py b/polaris/dataset/_subset.py index 6d1efda3..51955981 100644 --- a/polaris/dataset/_subset.py +++ b/polaris/dataset/_subset.py @@ -1,4 +1,4 @@ -from copy import deepcopy +from copy import copy from typing import Callable, Iterable, List, Literal, Sequence import numpy as np @@ -225,7 +225,7 @@ def as_dataframe(self) -> pd.DataFrame: def copy(self) -> Self: """Returns a copy of the subset.""" - return deepcopy(self) + return copy(self) def extend_inputs(self, input_cols: Iterable[str] | str) -> Self: """