Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Object.get_or_create_by_full_path() #331

Merged
merged 8 commits into from
Apr 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 3 additions & 4 deletions examples/import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
dataset_name = 'SampleDataset'

# Create a dataset
dataset = solvebio.Dataset.get_or_create_by_full_path(
dataset = solvebio.Object.get_or_create_by_full_path(
'{0}:/{1}/{2}'.format(vault.name, path, dataset_name),
)

Expand All @@ -27,7 +27,7 @@

# Prints updates as the data is processed
# and indexed into SolveBio
imp.follow()
dataset.activity(follow=True)

#
# You now have data!
Expand All @@ -49,5 +49,4 @@
dataset_id=dataset.id,
data_records=new_records
)

imp.follow()
dataset.activity(follow=True)
25 changes: 14 additions & 11 deletions solvebio/cli/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@

import solvebio

from solvebio import Task
from solvebio import Vault
from solvebio import Object
from solvebio import Task
from solvebio import Dataset
from solvebio import DatasetImport
from solvebio import DatasetTemplate
from solvebio.utils.files import check_gzip_path
from solvebio.errors import SolveError
from solvebio.errors import NotFoundError
Expand Down Expand Up @@ -159,11 +162,11 @@ def _create_template_from_file(template_file, dry_run=False):
sys.exit(1)

if dry_run:
template = solvebio.DatasetTemplate(**template_json)
template = DatasetTemplate(**template_json)
print("A new dataset template will be created from: {0}"
.format(template_file))
else:
template = solvebio.DatasetTemplate.create(**template_json)
template = DatasetTemplate.create(**template_json)
print("A new dataset template was created with id: {0}"
.format(template.id))

Expand Down Expand Up @@ -191,7 +194,7 @@ def create_dataset(args, template=None):

try:
# Fail if a dataset already exists.
solvebio.Dataset.get_by_full_path(full_path)
Object.get_by_full_path(full_path, assert_type='dataset')
print('A dataset already exists at path: {0}'.format(full_path))
sys.exit(1)
except NotFoundError:
Expand All @@ -204,8 +207,8 @@ def create_dataset(args, template=None):
pass
elif args.template_id:
try:
template = solvebio.DatasetTemplate.retrieve(args.template_id)
except solvebio.SolveError as e:
template = DatasetTemplate.retrieve(args.template_id)
except SolveError as e:
if e.status_code != 404:
raise e
print("No template with ID {0} found!".format(args.template_id))
Expand Down Expand Up @@ -258,7 +261,7 @@ def create_dataset(args, template=None):
print("Metadata: {}".format(metadata))
return

return solvebio.Dataset.get_or_create_by_full_path(
return Dataset.get_or_create_by_full_path(
full_path,
capacity=args.capacity,
fields=fields,
Expand Down Expand Up @@ -396,8 +399,8 @@ def import_file(args):

if args.template_id:
try:
template = solvebio.DatasetTemplate.retrieve(args.template_id)
except solvebio.SolveError as e:
template = DatasetTemplate.retrieve(args.template_id)
except SolveError as e:
if e.status_code != 404:
raise e
print("No template with ID {0} found!".format(args.template_id))
Expand All @@ -412,7 +415,7 @@ def import_file(args):
dataset = create_dataset(args, template=template)
else:
try:
dataset = solvebio.Dataset.get_by_full_path(full_path)
dataset = Object.get_by_full_path(full_path, assert_type='dataset')
except solvebio.errors.NotFoundError:
print("Dataset not found: {0}".format(full_path))
print("Tip: use the --create-dataset flag "
Expand Down Expand Up @@ -444,7 +447,7 @@ def import_file(args):
kwargs.update(template.import_params)

# Create the import
import_ = solvebio.DatasetImport.create(
import_ = DatasetImport.create(
dataset_id=dataset.id,
commit_mode=args.commit_mode,
**kwargs
Expand Down
18 changes: 18 additions & 0 deletions solvebio/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,23 @@ def __init__(self, message=None, response=None):
if self.json_body:
self.message += ' %s' % self.json_body

# TODO
# NOTE there are other keys that exist in some Errors that
# are not detail or non_field_errors. For instance 'manifest'
# is a key if uploading using a manifest with invalid file format.
# It includes a very useful error message that gets lost.
# Is there harm in just handling any error key here?
# (handle keys with list values and those without)
# Implementation below
#
# for k, v in self.json_body:
# if isinstance(v, list):
# self.message += ' %s Errors: %s' % \
# (k, ', '.join(self.json_body[k]))
# else:
# self.message += ' %s Errors: %s' % \
# (k, self.json_body[k])
# del self.json_body[k]

def __str__(self):
return self.message
104 changes: 12 additions & 92 deletions solvebio/resource/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from ..client import client
from ..query import Query
from ..errors import NotFoundError

from .solveobject import convert_to_solve_object
from .apiresource import CreateableAPIResource
Expand Down Expand Up @@ -67,86 +66,11 @@ def get_by_full_path(cls, full_path, **kwargs):

@classmethod
def get_or_create_by_full_path(cls, full_path, **kwargs):
from solvebio import Vault
from solvebio import Object

_client = kwargs.pop('client', None) or cls._client or client
create_vault = kwargs.pop('create_vault', False)
create_folders = kwargs.pop('create_folders', True)

try:
return Dataset.get_by_full_path(full_path, assert_type='dataset',
client=_client)
except NotFoundError:
pass

# Dataset not found, create it step-by-step
full_path, parts = Object.validate_full_path(full_path, client=_client)

if create_vault:
vault = Vault.get_or_create_by_full_path(
'{0}:{1}'.format(parts['domain'], parts['vault']),
client=_client)
else:
vaults = Vault.all(account_domain=parts['domain'],
name=parts['vault'],
client=_client)
if len(vaults.solve_objects()) == 0:
raise Exception(
'Vault does not exist with name {0}:{1}'.format(
parts['domain'], parts['vault'])
)
vault = vaults.solve_objects()[0]

# Create the folders to hold the dataset if they do not already exist.
object_path = parts['path']
curr_path = os.path.dirname(object_path)
folders_to_create = []
new_folders = []
id_map = {'/': None}

while curr_path != '/':
try:
obj = Object.get_by_path(curr_path,
vault_id=vault.id,
assert_type='folder',
client=_client)
id_map[curr_path] = obj.id
break
except NotFoundError:
if not create_folders:
raise Exception('Folder {} does not exist. Pass '
'create_folders=True to auto-create '
'missing folders')

folders_to_create.append(curr_path)
curr_path = '/'.join(curr_path.split('/')[:-1])
if curr_path == '':
break

for folder in reversed(folders_to_create):
new_folder = Object.create(
object_type='folder',
vault_id=vault.id,
filename=os.path.basename(folder),
parent_object_id=id_map[os.path.dirname(folder)],
client=_client
)
new_folders.append(new_folder)
id_map[folder] = new_folder.id

if os.path.dirname(object_path) == '/':
parent_folder_id = None
elif new_folders:
parent_folder_id = new_folders[-1].id
else:
parent_folder_id = id_map[os.path.dirname(object_path)]

return Dataset.create(name=os.path.basename(object_path),
vault_id=vault.id,
vault_parent_object_id=parent_folder_id,
client=_client,
**kwargs)
# Assert this is a dataset
kwargs['assert_type'] = 'dataset'
kwargs['object_type'] = 'dataset'
return Object.get_or_create_by_full_path(full_path, **kwargs)

def saved_queries(self, **params):
from solvebio import SavedQuery
Expand All @@ -166,9 +90,8 @@ def saved_queries(self, **params):

def fields(self, name=None, **params):
if 'fields_url' not in self:
raise Exception(
'Please use Dataset.retrieve({ID}) before looking '
'up fields')
# Dataset object may not have been retrieved. Grab it.
self.refresh()

if name:
params.update({
Expand All @@ -190,18 +113,16 @@ def fields(self, name=None, **params):

def template(self, **params):
if 'template_url' not in self:
raise Exception(
'Please use Dataset.retrieve({ID}) before retrieving '
'a template')
# Dataset object may not have been retrieved. Grab it.
self.refresh()

response = self._client.get(self.template_url, params)
return convert_to_solve_object(response, client=self._client)

def commits(self, **params):
if 'commits_url' not in self:
raise Exception(
'Please use Dataset.retrieve({ID}) before looking '
'up commits')
# Dataset object may not have been retrieved. Grab it.
self.refresh()

response = self._client.get(self.commits_url, params)
results = convert_to_solve_object(response, client=self._client)
Expand All @@ -214,9 +135,8 @@ def commits(self, **params):

def imports(self, **params):
if 'imports_url' not in self:
raise Exception(
'Please use Dataset.retrieve({ID}) before looking '
'up imports')
# Dataset object may not have been retrieved. Grab it.
self.refresh()

response = self._client.get(self.imports_url, params)
results = convert_to_solve_object(response, client=self._client)
Expand Down