Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,26 @@ This library is a Python client to interact with the [Polaris Hub](https://polar
```python
import polaris as po

# Download a benchmark (the associated dataset will be transparently downloaded)
benchmark = po.load_benchmark("org_or_user/name")
# Load the benchmark from the Hub
benchmark = po.load_benchmark("polaris/hello_world_benchmark")

# Retrieve the splits
# Get the train and test data-loaders
train, test = benchmark.get_train_test_split()

# Work your magic!
y_pred = ...
# Use the training data to train your model
# Get the input as an array with 'train.inputs' and 'train.targets'
# Or simply iterate over the train object.
for x, y in train:
...

# Run the evaluation procedure
results = benchmark.evaluate(y_pred)
# Work your magic to accurately predict the test set
predictions = [0.0 for x in test]

# Upload your results to the hub
results.upload_to_hub()
# Evaluate your predictions
results = benchmark.evaluate(predictions)

# Submit your results
results.upload_to_hub(owner="dummy-user")
```

## Documentation
Expand Down
8 changes: 8 additions & 0 deletions docs/api/load.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

::: polaris.load_dataset

---

::: polaris.load_benchmark

---
21 changes: 17 additions & 4 deletions docs/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,26 @@ If all you care about is to partake in a benchmark that is hosted on the hub, it
```python
import polaris as po

benchmark = po.load_benchmark("org_or_user/name")
# Load the benchmark from the Hub
benchmark = po.load_benchmark("polaris/hello_world_benchmark")

# Get the train and test data-loaders
train, test = benchmark.get_train_test_split()

y_pred = ... # Work your magic!
# Use the training data to train your model
# Get the input as an array with 'train.inputs' and 'train.targets'
# Or simply iterate over the train object.
for x, y in train:
...

# Work your magic to accurately predict the test set
predictions = [0.0 for x in test]

# Evaluate your predictions
results = benchmark.evaluate(predictions)

results = benchmark.evaluate(y_pred)
results.upload_to_hub()
# Submit your results
results.upload_to_hub(owner="dummy-user")
```

That's all there is to it to partake in a benchmark. No complicated, custom data-loaders or evaluation protocol. With just a few lines of code, you can feel confident that you are properly evaluating your model and focus on what you do best: Solving the hard problems in our domain!
Expand Down
31 changes: 12 additions & 19 deletions docs/tutorials/basics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2023-11-06 17:37:18.375\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.hub.client\u001b[0m:\u001b[36mlogin\u001b[0m:\u001b[36m262\u001b[0m - \u001b[1mYou are already logged in to the Polaris Hub as lu-valencelabs (lu@valencediscovery.com). Set `overwrite=True` to force re-authentication.\u001b[0m\n"
"\u001b[32m2023-11-27 14:54:08.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.hub.client\u001b[0m:\u001b[36mlogin\u001b[0m:\u001b[36m262\u001b[0m - \u001b[1mYou are already logged in to the Polaris Hub as cwognum (cas@valencediscovery.com). Set `overwrite=True` to force re-authentication.\u001b[0m\n"
]
}
],
Expand Down Expand Up @@ -285,7 +285,7 @@
{
"data": {
"text/html": [
"<table border=\"1\"><tr><th>name</th><td>None</td></tr><tr><th>description</th><td></td></tr><tr><th>tags</th><td></td></tr><tr><th>user_attributes</th><td></td></tr><tr><th>owner</th><td>None</td></tr><tr><th>benchmark_name</th><td>hello_world_benchmark</td></tr><tr><th>benchmark_owner</th><td><table border=\"1\"><tr><th>slug</th><td>polaris</td></tr><tr><th>organization_id</th><td>org_2WG9hRFgKNIRtGw4orsMPcr1F4S</td></tr><tr><th>user_id</th><td>None</td></tr><tr><th>owner</th><td>org_2WG9hRFgKNIRtGw4orsMPcr1F4S</td></tr></table></td></tr><tr><th>github_url</th><td>None</td></tr><tr><th>paper_url</th><td>None</td></tr><tr><th>contributors</th><td>None</td></tr><tr><th>results</th><td><table border=\"1\"><thead><tr><th>Test set</th><th>Target label</th><th>Metric</th><th>Score</th></tr></thead><tbody><tr><td>test</td><td>SOL</td><td>mean_squared_error</td><td>2.6875139821</td></tr><tr><td>test</td><td>SOL</td><td>mean_absolute_error</td><td>1.2735690161</td></tr></tbody></table></td></tr></table>"
"<table border=\"1\"><tr><th>name</th><td>None</td></tr><tr><th>description</th><td></td></tr><tr><th>tags</th><td></td></tr><tr><th>user_attributes</th><td></td></tr><tr><th>owner</th><td>None</td></tr><tr><th>benchmark_name</th><td>hello_world_benchmark</td></tr><tr><th>benchmark_owner</th><td><table border=\"1\"><tr><th>slug</th><td>polaris</td></tr><tr><th>external_id</th><td>org_2WG9hRFgKNIRtGw4orsMPcr1F4S</td></tr><tr><th>type</th><td>organization</td></tr></table></td></tr><tr><th>github_url</th><td>None</td></tr><tr><th>paper_url</th><td>None</td></tr><tr><th>contributors</th><td>None</td></tr><tr><th>artifact_id</th><td>None</td></tr><tr><th>benchmark_artifact_id</th><td>polaris/hello-world-benchmark</td></tr><tr><th>results</th><td><table border=\"1\"><thead><tr><th>Test set</th><th>Target label</th><th>Metric</th><th>Score</th></tr></thead><tbody><tr><td>test</td><td>SOL</td><td>mean_squared_error</td><td>2.6875139821</td></tr><tr><td>test</td><td>SOL</td><td>mean_absolute_error</td><td>1.2735690161</td></tr></tbody></table></td></tr></table>"
],
"text/plain": [
"{\n",
Expand All @@ -297,13 +297,14 @@
" \"benchmark_name\": \"hello_world_benchmark\",\n",
" \"benchmark_owner\": {\n",
" \"slug\": \"polaris\",\n",
" \"organization_id\": \"org_2WG9hRFgKNIRtGw4orsMPcr1F4S\",\n",
" \"user_id\": null,\n",
" \"owner\": \"org_2WG9hRFgKNIRtGw4orsMPcr1F4S\"\n",
" \"external_id\": \"org_2WG9hRFgKNIRtGw4orsMPcr1F4S\",\n",
" \"type\": \"organization\"\n",
" },\n",
" \"github_url\": null,\n",
" \"paper_url\": null,\n",
" \"contributors\": null,\n",
" \"artifact_id\": null,\n",
" \"benchmark_artifact_id\": \"polaris/hello-world-benchmark\",\n",
" \"results\": [\n",
" {\n",
" \"Test set\": \"test\",\n",
Expand Down Expand Up @@ -341,7 +342,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 15,
"id": "a601f415-c563-4efe-94c3-0d44f3fd6576",
"metadata": {},
"outputs": [],
Expand All @@ -362,7 +363,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 16,
"id": "60cbf4b9-8514-480d-beda-8a50e5f7c9a6",
"metadata": {
"scrolled": true
Expand All @@ -372,16 +373,16 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/lu.zhu/miniconda3/envs/pov3/lib/python3.11/site-packages/pydantic/main.py:309: UserWarning: Pydantic serializer warnings:\n",
"/home/cas/micromamba/envs/polaris/lib/python3.12/site-packages/pydantic/main.py:308: UserWarning: Pydantic serializer warnings:\n",
" Expected `url` but got `str` - serialized value may not be as expected\n",
" Expected `url` but got `str` - serialized value may not be as expected\n",
" return self.__pydantic_serializer__.to_python(\n",
"\u001b[32m2023-11-06 17:38:06.152\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mpolaris.hub.client\u001b[0m:\u001b[36mupload_results\u001b[0m:\u001b[36m413\u001b[0m - \u001b[32m\u001b[1mYour result has been successfully uploaded to the Hub. View it here: https://polarishub.io/benchmarks/polaris/hello_world_benchmark/YYH033LKM1BaT8byAC5Jc\u001b[0m\n"
"\u001b[32m2023-11-27 14:54:46.649\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mpolaris.hub.client\u001b[0m:\u001b[36mupload_results\u001b[0m:\u001b[36m428\u001b[0m - \u001b[32m\u001b[1mYour result has been successfully uploaded to the Hub. View it here: https://polarishub.io/benchmarks/polaris/hello_world_benchmark/ns4JrC3hQNK9M1hbVPchy\u001b[0m\n"
]
}
],
"source": [
"client.upload_results(results)\n",
"client.upload_results(results, owner=\"cwognum\")\n",
"client.close()"
]
},
Expand All @@ -396,14 +397,6 @@
"\n",
"---"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0868ff53-7a42-4e4c-bae4-29fb04c513c7",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -422,7 +415,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.12.0"
}
},
"nbformat": 4,
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ nav:
- Custom Datasets and Benchmarks: tutorials/custom_dataset_benchmark.ipynb
# - Creating Datasets with zarr: tutorials/dataset_zarr.ipynb
- API Reference:
- Load: api/load.md
- Core:
- Dataset: api/dataset.md
- Benchmark: api/benchmark.md
Expand Down
5 changes: 3 additions & 2 deletions polaris/benchmark/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from polaris.utils.dict2html import dict2html
from polaris.utils.errors import InvalidBenchmarkError, PolarisChecksumError
from polaris.utils.misc import listit
from polaris.utils.types import AccessType, DataFormat, PredictionsType, SplitType
from polaris.utils.types import AccessType, DataFormat, HubOwner, PredictionsType, SplitType

ColumnsType = Union[str, list[str]]

Expand Down Expand Up @@ -371,6 +371,7 @@ def upload_to_hub(
settings: Optional[PolarisHubSettings] = None,
cache_auth_token: bool = True,
access: Optional[AccessType] = "private",
owner: Optional[Union[HubOwner, str]] = None,
**kwargs: dict,
):
"""
Expand All @@ -382,7 +383,7 @@ def upload_to_hub(
with PolarisHubClient(
env_file=env_file, settings=settings, cache_auth_token=cache_auth_token, **kwargs
) as client:
return client.upload_benchmark(self, access)
return client.upload_benchmark(self, access=access, owner=owner)

def to_json(self, destination: str) -> str:
"""Save the benchmark to a destination directory as a JSON file.
Expand Down
5 changes: 3 additions & 2 deletions polaris/dataset/_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from polaris.utils.dict2html import dict2html
from polaris.utils.errors import InvalidDatasetError, PolarisChecksumError
from polaris.utils.io import get_zarr_root, robust_copy
from polaris.utils.types import AccessType, HttpUrlString, License
from polaris.utils.types import AccessType, HttpUrlString, HubOwner, License

# Constants
_SUPPORTED_TABLE_EXTENSIONS = ["parquet"]
Expand Down Expand Up @@ -201,6 +201,7 @@ def upload_to_hub(
settings: Optional[PolarisHubSettings] = None,
cache_auth_token: bool = True,
access: Optional[AccessType] = "private",
owner: Optional[Union[HubOwner, str]] = None,
**kwargs: dict,
):
"""
Expand All @@ -212,7 +213,7 @@ def upload_to_hub(
with PolarisHubClient(
env_file=env_file, settings=settings, cache_auth_token=cache_auth_token, **kwargs
) as client:
return client.upload_dataset(self, access)
return client.upload_dataset(self, access=access, owner=owner)

@classmethod
def from_zarr(cls, path: str) -> "Dataset":
Expand Down
3 changes: 2 additions & 1 deletion polaris/evaluate/_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def upload_to_hub(
settings: Optional[PolarisHubSettings] = None,
cache_auth_token: bool = True,
access: Optional[AccessType] = "private",
owner: Optional[Union[HubOwner, str]] = None,
**kwargs: dict,
):
"""
Expand All @@ -193,7 +194,7 @@ def upload_to_hub(
with PolarisHubClient(
env_file=env_file, settings=settings, cache_auth_token=cache_auth_token, **kwargs
) as client:
return client.upload_results(self, access)
return client.upload_results(self, access=access, owner=owner)

def _repr_dict_(self) -> dict:
"""Utility function for pretty-printing to the command line and jupyter notebooks"""
Expand Down
51 changes: 48 additions & 3 deletions polaris/hub/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,12 @@ def get_benchmark(self, owner: Union[str, HubOwner], name: str) -> BenchmarkSpec
)
return benchmark_cls(**response)

def upload_results(self, results: BenchmarkResults, access: AccessType = "private"):
def upload_results(
self,
results: BenchmarkResults,
access: AccessType = "private",
owner: Optional[Union[HubOwner, str]] = None,
):
"""Upload the results to the Polaris Hub.
Info: Owner
Expand All @@ -395,9 +400,19 @@ def upload_results(self, results: BenchmarkResults, access: AccessType = "privat
Args:
results: The results to upload.
access: Grant public or private access to result
owner: Which Hub user or organization owns the artifact.
Optional if and only if the `benchmark.owner` attribute is set.
"""

# Get the serialized model data-structure

if results.owner is None:
if owner is None:
raise ValueError(
"The `owner` argument must be specified if the `results.owner` attribute is not set."
)
results.owner = owner if isinstance(owner, HubOwner) else HubOwner(slug=owner)

result_json = results.model_dump(by_alias=True, exclude_none=True)

# Make a request to the hub
Expand All @@ -414,7 +429,11 @@ def upload_results(self, results: BenchmarkResults, access: AccessType = "privat
return response

def upload_dataset(
self, dataset: Dataset, access: AccessType = "private", timeout: TimeoutTypes = (10, 200)
self,
dataset: Dataset,
access: AccessType = "private",
timeout: TimeoutTypes = (10, 200),
owner: Optional[Union[HubOwner, str]] = None,
):
"""Upload the dataset to the Polaris Hub.
Expand All @@ -432,8 +451,21 @@ def upload_dataset(
dataset: The dataset to upload.
access: Grant public or private access to result
timeout: Request timeout values. User can modify the value when uploading large dataset as needed.
This can be a single value with the timeout in seconds for all IO operations, or a more granular
tuple with (connect_timeout, write_timeout). The type of the the timout parameter comes from `httpx`.
Since datasets can get large, it might be needed to increase the write timeout for larger datasets.
See also: https://www.python-httpx.org/advanced/#timeout-configuration
owner: Which Hub user or organization owns the artifact.
Optional if and only if the `benchmark.owner` attribute is set.
"""

if dataset.owner is None:
if owner is None:
raise ValueError(
"The `owner` argument must be specified if the `dataset.owner` attribute is not set."
)
dataset.owner = owner if isinstance(owner, HubOwner) else HubOwner(slug=owner)

# Get the serialized data-model
# We exclude the table as it handled separately and the cache_dir as it is user-specific
dataset_json = dataset.model_dump(exclude={"cache_dir", "table"}, exclude_none=True, by_alias=True)
Expand Down Expand Up @@ -500,7 +532,12 @@ def upload_dataset(

return response

def upload_benchmark(self, benchmark: BenchmarkSpecification, access: AccessType = "private"):
def upload_benchmark(
self,
benchmark: BenchmarkSpecification,
access: AccessType = "private",
owner: Optional[Union[HubOwner, str]] = None,
):
"""Upload the benchmark to the Polaris Hub.
Info: Owner
Expand All @@ -520,7 +557,15 @@ def upload_benchmark(self, benchmark: BenchmarkSpecification, access: AccessType
Args:
benchmark: The benchmark to upload.
access: Grant public or private access to result
owner: Which Hub user or organization owns the artifact.
Optional if and only if the `benchmark.owner` attribute is set.
"""
if benchmark.owner is None:
if owner is None:
raise ValueError(
"The `owner` argument must be specified if the `benchmark.owner` attribute is not set."
)
benchmark.owner = owner if isinstance(owner, HubOwner) else HubOwner(slug=owner)

# Get the serialized data-model
# We exclude the dataset as we expect it to exist on the hub already.
Expand Down
Loading