Skip to content

Commit

Permalink
✨ Add access_urls to indexd models to point to gen3 locations
Browse files Browse the repository at this point in the history
  • Loading branch information
dankolbman committed Dec 12, 2018
1 parent e6ba013 commit 9be2d69
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 3 deletions.
2 changes: 2 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ class Config:
INDEXD_USER = os.environ.get('INDEXD_USER', 'test')
INDEXD_PASS = os.environ.get('INDEXD_PASS', 'test')

GEN3_URL = os.environ.get('GEN3_URL', 'gen3')

BUCKET_SERVICE_URL = os.environ.get('BUCKET_SERVICE_URL', None)
BUCKET_SERVICE_TOKEN = os.environ.get('BUCKET_SERVICE_TOKEN', None)
SNS_EVENT_ARN = os.environ.get('SNS_EVENT_ARN', None)
Expand Down
21 changes: 20 additions & 1 deletion dataservice/api/common/model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from flask import abort
from flask import abort, current_app
from requests.exceptions import HTTPError
import sqlalchemy.types as types
from sqlalchemy import event, inspect
Expand Down Expand Up @@ -105,6 +105,25 @@ def constructor(self):
# Update fields from indexd
self.merge_indexd()

@property
def access_urls(self):
"""
Access urls should contain only links out to gen3 data endpoints
that are used to download the file's themselves.
For urls that are already https:// urls, we will consider them as
valid gen3 locations, for urls that are s3:// protocol, we will assume
that they are internal files and resolve them to our gen3 service
"""
urls = []
for url in self.urls:
if url.startswith('s3://'):
url = (f'{current_app.config["GEN3_URL"]}'
f'/data/{self.latest_did}')
urls.append(url)

return urls

def merge_indexd(self):
"""
If the document matching this object's latest_did cannot be found in
Expand Down
1 change: 1 addition & 0 deletions dataservice/api/common/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def check_unknown_fields(self, data, original_data):

class IndexdFileSchema(Schema):
urls = ma.List(ma.Str(), required=True)
access_urls = ma.List(ma.Str(), dump_only=True)
acl = ma.List(ma.Str(), required=False)
file_name = ma.Str()
hashes = ma.Dict(required=True)
Expand Down
1 change: 0 additions & 1 deletion tests/genomic_file/test_genomic_file_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,6 @@ def gf_se(bs, strategy='wgs'):
# file has been derived from, in this case, 'wgs' and 'wxs'
# assert set(gf3.experiment_strategy) == {'wxs', 'wgs'}


# TODO Check that file is not deleted if deletion on indexd fails

def _create_save_genomic_files(self):
Expand Down
14 changes: 13 additions & 1 deletion tests/genomic_file/test_genomic_file_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,18 @@ def test_filter_by_bs(client, indexd):
assert gf['external_id'] in _ids


def test_access_urls(client):
"""
The access_urls field should be a field derived from the urls replacing
s3 locations with gen3 http locations
"""
rgs, gfs, studies = _create_all_entities()
gf = list(gfs.values())[0][0]
gf = client.get(f'/genomic-files/{gf.kf_id}').json['results']
assert gf['access_urls'] == [f'gen3/data/{gf["latest_did"]}',
f'https://gen3.something.com/did']


def _new_genomic_file(client, include_seq_exp=True):
""" Creates a genomic file """
body = {
Expand Down Expand Up @@ -494,7 +506,7 @@ def _create_all_entities():
participant=p)
gf = GenomicFile(
external_id='study{}-gf{}'.format(j, i),
urls=['s3://mybucket/key'],
urls=['s3://mybucket/key', 'https://gen3.something.com/did'],
hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'})
study_gfs.append(gf)
b.genomic_files.append(gf)
Expand Down

0 comments on commit 9be2d69

Please sign in to comment.