Exercise 1 - Answers for BioBlend
=================================

**Goal**: Upload a file to a new history, import a workflow and run it on the uploaded dataset.

1) Create a `GalaxyInstance` object.

In [1]:
from pprint import pprint

import bioblend.galaxy

server = 'https://usegalaxy.eu/'
api_key = ''
gi = bioblend.galaxy.GalaxyInstance(url=server, key=api_key)

2) Create a new Galaxy history.

In [2]:
new_hist = gi.histories.create_history(name='New history')
pprint(new_hist)

{'annotation': None,
 'contents_url': '/api/histories/e271a7b8bccea52a/contents',
 'create_time': '2020-07-17T04:25:03.259316',
 'deleted': False,
 'empty': True,
 'genome_build': None,
 'id': 'e271a7b8bccea52a',
 'importable': False,
 'model_class': 'History',
 'name': 'New history',
 'published': False,
 'purged': False,
 'size': 0,
 'slug': None,
 'state': 'new',
 'state_details': {'discarded': 0,
                   'empty': 0,
                   'error': 0,
                   'failed_metadata': 0,
                   'new': 0,
                   'ok': 0,
                   'paused': 0,
                   'queued': 0,
                   'running': 0,
                   'setting_metadata': 0,
                   'upload': 0},
 'state_ids': {'discarded': [],
               'empty': [],
               'error': [],
               'failed_metadata': [],
               'new': [],
               'ok': [],
               'paused': [],
               'queued': [],
               'running': [],

3) **Upload** the local file "test-data/1.txt" to a new dataset in the created history using `tools.upload_file()` .

In [3]:
ret = gi.tools.upload_file("test-data/1.txt", new_hist['id'])
pprint(ret)

{'implicit_collections': [],
 'jobs': [{'create_time': '2020-07-17T04:25:05.509737',
           'exit_code': None,
           'galaxy_version': '20.05',
           'history_id': 'e271a7b8bccea52a',
           'id': 'bbd44e69cb8906b5a6fa37dca1500cac',
           'model_class': 'Job',
           'state': 'new',
           'tool_id': 'upload1',
           'update_time': '2020-07-17T04:25:05.575750'}],
 'output_collections': [],
 'outputs': [{'create_time': '2020-07-17T04:25:05.329030',
              'data_type': 'galaxy.datatypes.data.Data',
              'deleted': False,
              'file_ext': 'auto',
              'file_size': 0,
              'genome_build': '?',
              'hda_ldda': 'hda',
              'hid': 1,
              'history_content_type': 'dataset',
              'history_id': 'e271a7b8bccea52a',
              'id': 'bbd44e69cb8906b5d5e5972574c432d6',
              'metadata_dbkey': '?',
              'misc_blurb': None,
              'misc_info': None,
          

4) Find the new uploaded dataset, either from the dict returned by `tools.upload_file()` or from the history contents.

In [4]:
hda = ret['outputs'][0]
pprint(hda)

{'create_time': '2020-07-17T04:25:05.329030',
 'data_type': 'galaxy.datatypes.data.Data',
 'deleted': False,
 'file_ext': 'auto',
 'file_size': 0,
 'genome_build': '?',
 'hda_ldda': 'hda',
 'hid': 1,
 'history_content_type': 'dataset',
 'history_id': 'e271a7b8bccea52a',
 'id': 'bbd44e69cb8906b5d5e5972574c432d6',
 'metadata_dbkey': '?',
 'misc_blurb': None,
 'misc_info': None,
 'model_class': 'HistoryDatasetAssociation',
 'name': '1.txt',
 'output_name': 'output0',
 'peek': None,
 'purged': False,
 'state': 'queued',
 'tags': [],
 'update_time': '2020-07-17T04:25:05.467633',
 'uuid': 'f0d1b2c7-711f-4c36-8313-2a48e3d2e813',
 'validated_state': 'unknown',
 'validated_state_message': None,
 'visible': True}


5) **Import a workflow** from the local file "test-data/convert_to_tab.ga" using `workflows.import_workflow_from_local_path()` .

In [5]:
wf = gi.workflows.import_workflow_from_local_path('test-data/convert_to_tab.ga')
pprint(wf)

{'annotations': [],
 'create_time': '2020-07-17T04:25:31.761870',
 'deleted': False,
 'id': '3c211ce0994f752f',
 'latest_workflow_uuid': '814e6545-db9e-4dc0-b9c4-1c0f9ed68910',
 'model_class': 'StoredWorkflow',
 'name': 'Convert to tab',
 'number_of_steps': 2,
 'owner': 'sloc',
 'published': False,
 'tags': [],
 'update_time': '2020-07-17T04:25:31.761889',
 'url': '/api/workflows/3c211ce0994f752f'}


6) View the details of the imported workflow using `workflows.show_workflow()` .

In [6]:
wf = gi.workflows.show_workflow(wf['id'])
pprint(wf)

{'annotation': None,
 'create_time': '2020-07-17T04:25:31.761870',
 'deleted': False,
 'id': '3c211ce0994f752f',
 'inputs': {'0': {'label': 'Input Dataset',
                  'uuid': '671bca4e-0b76-4a6f-a0a2-70219df56576',
                  'value': ''}},
 'latest_workflow_uuid': '814e6545-db9e-4dc0-b9c4-1c0f9ed68910',
 'model_class': 'StoredWorkflow',
 'name': 'Convert to tab',
 'owner': 'sloc',
 'published': False,
 'steps': {'0': {'annotation': None,
                 'id': 0,
                 'input_steps': {},
                 'tool_id': None,
                 'tool_inputs': {'optional': False},
                 'tool_version': None,
                 'type': 'data_input'},
           '1': {'annotation': None,
                 'id': 1,
                 'input_steps': {'input': {'source_step': 0,
                                           'step_output': 'output'}},
                 'tool_id': 'Convert characters1',
                 'tool_inputs': {'__page__': 0,
                     

7) **Run** the imported workflow on the uploaded dataset **inside the same history** using `workflows.invoke_workflow()` .

In [7]:
inputs = {0: {'id': hda['id'], 'src': 'hda'}}
ret = gi.workflows.invoke_workflow(wf['id'], inputs=inputs, history_id=new_hist['id'])
pprint(ret)

{'create_time': '2020-07-17T04:25:39.737083',
 'history_id': 'e271a7b8bccea52a',
 'id': '485d14fb52e0061d',
 'model_class': 'WorkflowInvocation',
 'state': 'new',
 'update_time': '2020-07-17T04:25:39.737098',
 'uuid': '91e340ce-c7e5-11ea-b4a9-005056ba55fb',
 'workflow_id': '433b301945a92553'}


8) Get the id of the output dataset from the workflow. You can get this from the history, or by getting the job id from `gi.workflows.show_invocation()` and then the output dataset id from the job using `gi.jobs.show_job()`. You'll need to wait until the workflow jobs have been scheduled.

In [8]:
invocation = gi.workflows.show_invocation(wf['id'],ret['id'])
job_id = invocation['steps'][1]['job_id']
pprint(gi.jobs.show_job(job_id))

{'command_version': '',
 'create_time': '2020-07-17T04:25:53.562134',
 'exit_code': 0,
 'galaxy_version': '20.05',
 'history_id': 'e271a7b8bccea52a',
 'id': 'bbd44e69cb8906b5e4d3e25446af7c40',
 'inputs': {'input': {'id': 'bbd44e69cb8906b5d5e5972574c432d6',
                      'src': 'hda',
                      'uuid': 'f0d1b2c7-711f-4c36-8313-2a48e3d2e813'}},
 'model_class': 'Job',
 'outputs': {'out_file1': {'id': 'bbd44e69cb8906b5803d34e5d8b8c823',
                           'src': 'hda',
                           'uuid': '83e8f7b7-5cf8-410d-bee4-55ec067fa24e'}},
 'params': {'__input_ext': '"txt"',
            '__workflow_invocation_uuid__': '"91e340cec7e511eab4a9005056ba55fb"',
            'chromInfo': '"/cvmfs/data.galaxyproject.org/managed/len/ucsc/?.len"',
            'condense': '"true"',
            'convert_from': '"s"',
            'dbkey': '"?"',
            'strip': '"true"'},
 'state': 'ok',
 'tool_id': 'Convert characters1',
 'update_time': '2020-07-17T04:28:00.095472'

In [9]:
hda2 = gi.jobs.show_job(job_id)['outputs']['out_file1']['id']
hda2

'bbd44e69cb8906b5803d34e5d8b8c823'

9) Create a `bioblend.galaxy.tools.ToolClient` object.

In [10]:
tc = bioblend.galaxy.tools.ToolClient(gi)

10) Find the basic **Sort** tool, for sorting tabular data, by calling `get_tools()` and using the `name` parameter. Look for the correct tool in the list and find the tool id. 

In [11]:
pprint(tc.get_tools(name='Sort'))

[{'description': 'data in ascending or descending order',
  'edam_operations': [],
  'edam_topics': [],
  'form_style': 'regular',
  'hidden': '',
  'id': 'sort1',
  'is_workflow_compatible': True,
  'labels': [],
  'link': '/tool_runner?tool_id=sort1',
  'min_width': -1,
  'model_class': 'Tool',
  'name': 'Sort',
  'panel_section_id': 'filter_and_sort',
  'panel_section_name': 'Filter and Sort',
  'target': 'galaxy_main',
  'version': '1.1.0',
  'xrefs': []},
 {'description': 'BAM dataset',
  'edam_operations': [],
  'edam_topics': [],
  'form_style': 'regular',
  'hidden': '',
  'id': 'toolshed.g2.bx.psu.edu/repos/devteam/samtools_sort/samtools_sort/2.0',
  'is_workflow_compatible': True,
  'labels': [],
  'link': '/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fdevteam%2Fsamtools_sort%2Fsamtools_sort%2F2.0',
  'min_width': -1,
  'model_class': 'Tool',
  'name': 'Sort',
  'panel_section_id': 'sam_bam',
  'panel_section_name': 'SAM/BAM',
  'target': 'galaxy_main',
  'tool_shed_r

In [12]:
sort_tool = 'sort1'

11) Check the tool inputs by calling `show_tool()` with `io_details=True`. It might be useful to open Galaxy and compare the inputs you see here to the tool inputs in the Galaxy UI.

In [13]:
tool_details = tc.show_tool(sort_tool, io_details=True)
pprint(tool_details)

{'description': 'data in ascending or descending order',
 'edam_operations': [],
 'edam_topics': [],
 'form_style': 'regular',
 'hidden': '',
 'id': 'sort1',
 'inputs': [{'argument': None,
             'edam': {'edam_data': ['data_0006'],
                      'edam_formats': ['format_3475']},
             'extensions': ['tabular'],
             'help': '',
             'hidden': False,
             'is_dynamic': False,
             'label': 'Sort Dataset',
             'model_class': 'DataToolParameter',
             'multiple': False,
             'name': 'input',
             'optional': False,
             'options': {'hda': [], 'hdca': []},
             'refresh_on_change': True,
             'type': 'data',
             'value': None},
            {'argument': None,
             'data_ref': 'input',
             'display': None,
             'help': '',
             'hidden': False,
             'is_dynamic': True,
             'label': 'on column',
             'model_class': 'C

11) Run the sort tool to sort the tabular dataset so that the first column is in descending numerical order, by calling `run_tool()`. This requires the history id, the tool id, and a `tool_inputs` parameter which should be a dict mapping tool input names to tool input values. 

In [14]:
tc.run_tool(new_hist['id'], sort_tool,
           {'input': {'id': hda2, 'src': 'hda'},
            'column': "c1",
            'order': 'DESC'})

{'outputs': [{'id': 'bbd44e69cb8906b54e297c09ca4f749e',
   'hda_ldda': 'hda',
   'uuid': 'c10ba6de-5b5b-40c9-bc4b-c4758b71dc5c',
   'hid': 3,
   'file_ext': 'tabular',
   'peek': None,
   'model_class': 'HistoryDatasetAssociation',
   'name': 'Sort on data 2',
   'deleted': False,
   'purged': False,
   'visible': True,
   'state': 'new',
   'history_content_type': 'dataset',
   'file_size': 0,
   'create_time': '2020-07-17T04:30:58.621547',
   'update_time': '2020-07-17T04:30:58.689511',
   'data_type': 'galaxy.datatypes.tabular.Tabular',
   'genome_build': '?',
   'validated_state': 'unknown',
   'validated_state_message': None,
   'misc_info': None,
   'misc_blurb': 'queued',
   'tags': [],
   'history_id': 'e271a7b8bccea52a',
   'metadata_dbkey': '?',
   'metadata_data_lines': 4,
   'metadata_comment_lines': None,
   'metadata_columns': 2,
   'metadata_column_types': ['int', 'str'],
   'metadata_column_names': None,
   'metadata_delimiter': '\t',
   'output_name': 'out_file1'}],
 '

12) View the results on the Galaxy server with your web browser.