Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] add hydroshare as a repo provider #967

Merged
merged 14 commits into from Feb 3, 2020
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion binderhub/app.py 100644 → 100755
Expand Up @@ -33,7 +33,7 @@
from .main import MainHandler, ParameterizedMainHandler, LegacyRedirectHandler
from .repoproviders import (GitHubRepoProvider, GitRepoProvider,
GitLabRepoProvider, GistRepoProvider,
ZenodoProvider, FigshareProvider)
ZenodoProvider, FigshareProvider, HydroshareProvider)
from .metrics import MetricsHandler

from .utils import ByteSpecification, url_path_join
Expand Down Expand Up @@ -393,6 +393,7 @@ def _add_slash(self, proposal):
'gl': GitLabRepoProvider,
'zenodo': ZenodoProvider,
'figshare': FigshareProvider,
'hydroshare': HydroshareProvider,
},
config=True,
help="""
Expand Down
3 changes: 2 additions & 1 deletion binderhub/event-schemas/launch.json
Expand Up @@ -12,7 +12,8 @@
"GitLab",
"Git",
"Zenodo",
"Figshare"
"Figshare",
"Hydroshare"
],
"description": "Provider for the repository being launched"
},
Expand Down
3 changes: 2 additions & 1 deletion binderhub/main.py
Expand Up @@ -16,7 +16,8 @@
"gl": "GitLab",
"git": "Git repo",
"zenodo": "Zenodo",
"figshare": "Figshare"
"figshare": "Figshare",
"hydroshare": "Hydroshare"
}


Expand Down
47 changes: 46 additions & 1 deletion binderhub/repoproviders.py 100644 → 100755
Expand Up @@ -7,7 +7,7 @@
Note: When adding a new repo provider, add it to the allowed values for
repo providers in event-schemas/launch.json.
"""
from datetime import timedelta
from datetime import timedelta, datetime, timezone
import json
import os
import time
Expand Down Expand Up @@ -250,6 +250,51 @@ def get_build_slug(self):
return "figshare-{}".format(self.record_id)


class HydroshareProvider(RepoProvider):
"""Provide contents of a Hydroshare resource

Users must provide a spec consisting of the Hydroshare resource id.
"""
name = Unicode("Hydroshare")
url_regex = re.compile(r".*([0-9a-f]{32}).*")

def _parse_resource_id(self, spec):
match = self.url_regex.match(spec)
if not match:
raise ValueError("The specified Hydroshare resource id was not recognized.")
resource_id = match.groups()[0]
return resource_id

@gen.coroutine
def get_resolved_ref(self):
client = AsyncHTTPClient()
self.resource_id = self._parse_resource_id(self.spec)
req = HTTPRequest("https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements".format(self.resource_id),
user_agent="BinderHub")
r = yield client.fetch(req)
def parse_date(json_body):
json_response = json.loads(json_body)
date = next(
item for item in json_response["dates"] if item["type"] == "modified"
)["start_date"]
# Hydroshare timestamp always returns the same timezone, so strip it
date = date.split(".")[0]
parsed_date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S")
epoch = parsed_date.replace(tzinfo=timezone(timedelta(0))).timestamp()
# truncate the timestamp
return str(int(epoch))
# date last updated is only good for the day... probably need something finer eventually
self.record_id = "{}.v{}".format(self.resource_id, parse_date(r.body))
return self.record_id

def get_repo_url(self):
self.resource_id = self._parse_resource_id(self.spec)
return "https://www.hydroshare.org/resource/{}".format(self.resource_id)

def get_build_slug(self):
return "hydroshare-{}".format(self.record_id)


class GitRepoProvider(RepoProvider):
"""Bare bones git repo provider.

Expand Down
7 changes: 6 additions & 1 deletion binderhub/static/js/index.js
Expand Up @@ -89,6 +89,11 @@ function updateRepoText() {
$("#ref").prop("disabled", true);
$("label[for=ref]").prop("disabled", true);
}
else if (provider === "hydroshare") {
text = "Hydroshare resource id or URL";
$("#ref").prop("disabled", true);
$("label[for=ref]").prop("disabled", true);
}
$("#repository").attr('placeholder', text);
$("label[for=repository]").text(text);
$("#ref").attr('placeholder', tag_text);
Expand All @@ -112,7 +117,7 @@ function getBuildFormValues() {
}

var ref = $('#ref').val().trim() || 'master';
if (providerPrefix === 'zenodo' || providerPrefix === 'figshare') {
if (providerPrefix === 'zenodo' || providerPrefix === 'figshare' || providerPrefix === 'hydroshare') {
ref = "";
}
var path = $('#filepath').val().trim();
Expand Down
1 change: 1 addition & 0 deletions binderhub/templates/index.html
Expand Up @@ -56,6 +56,7 @@ <h4 id="form-header" class='row'>Build and launch a repository</h4>
<li class="dropdown-item" value="git"><a href="#">Git repository</a></li>
<li class="dropdown-item" value="zenodo"><a href="#">Zenodo DOI</a></li>
<li class="dropdown-item" value="figshare"><a href="#">Figshare DOI</a></li>
<li class="dropdown-item" value="hydroshare"><a href="#">Hydroshare resource</a></li>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am unsure about the wording here, because for Zenodo and Fishare, we also support plain Zenodo URLs (they are just patched through), and it seems the Hydroshare support in repo2docker also supports DOIs...

@betatim What wording should BinderHub use here? Favour "DOI" or "resource" (or "record"). I'd tend towards only talking about DOIs externally, but not failing when direct resource URLs are provided.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used resource because it is a HydroShare term. I'm fine highlighting the DOI, but I do think it'd be good to point to a resource id as well, because resources don't have a functional DOI until they publish the resource. Let me know what you'd like, and I can update it in this PR

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if there's a provider-specific wording, we can just use that since a user familiar with it will be expecting that word (as long as it's not too long or complex)

</ul>
</div>
</div>
Expand Down
31 changes: 30 additions & 1 deletion binderhub/tests/test_repoproviders.py 100644 → 100755
Expand Up @@ -6,7 +6,8 @@

from binderhub.repoproviders import (
tokenize_spec, strip_suffix, GitHubRepoProvider, GitRepoProvider,
GitLabRepoProvider, GistRepoProvider, ZenodoProvider, FigshareProvider
GitLabRepoProvider, GistRepoProvider, ZenodoProvider, FigshareProvider,
HydroshareProvider
)


Expand Down Expand Up @@ -67,6 +68,34 @@ async def test_figshare():
assert repo_url == spec


async def test_hydroshare():
betatim marked this conversation as resolved.
Show resolved Hide resolved
spec = 'https://www.hydroshare.org/resource/142c59757ed54de1816777828c9716e7'

provider = HydroshareProvider(spec=spec)

ref = await provider.get_resolved_ref()
assert ref == '142c59757ed54de1816777828c9716e7.v1545934606'

slug = provider.get_build_slug()
assert slug == 'hydroshare-142c59757ed54de1816777828c9716e7.v1545934606'
repo_url = provider.get_repo_url()
assert repo_url == spec


async def test_hydroshare_doi():
spec = '10.4211/hs.b8f6eae9d89241cf8b5904033460af61'

provider = HydroshareProvider(spec=spec)

ref = await provider.get_resolved_ref()
assert ref == 'b8f6eae9d89241cf8b5904033460af61.v1565445792'

slug = provider.get_build_slug()
assert slug == 'hydroshare-b8f6eae9d89241cf8b5904033460af61.v1565445792'
repo_url = provider.get_repo_url()
assert repo_url == 'https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61'


@pytest.mark.github_api
def test_github_ref():
provider = GitHubRepoProvider(spec='jupyterhub/zero-to-jupyterhub-k8s/v0.4')
Expand Down