In [1]:
%load_ext autoreload
%autoreload

In [2]:
import time
import re
import os
import requests
import subprocess
import tarfile
import json
import pandas as pd
from datetime import datetime
import shutil
from tqdm._tqdm_notebook import tqdm_notebook as tqdm 

In [3]:
import sys
sys.path.append('../')

In [4]:
from gdc.download import gdc_tool_download, api_download_iterative, api_download_batch

Custom libraries

In [5]:
GDC_CLIENT_TOOL = '/Users/portizdegalisteo/Google\ Drive/Master/TFM/Pablo\ Ortiz/gdc-client'
DATA_ENDPOINT = "https://api.gdc.cancer.gov/data/"
DATA_DIR = 'data'

RESULTS_FILE = 'test_results.csv'
RESULTS_FIELDS = ['dataset', 'n_files', 'avg_size', 'total_size', 'tool', 
                  'stream', 'iterative', 'time', 'speed', 'multiprocess', 'status']

DEFAULT_CHUNK_SIZE = 1

TMP_DIR = os.path.abspath(os.path.join(DATA_DIR, 'tmp'))

Tests

In [6]:
tests = [
         {'dataset': 'dataset_4.csv', 'tool': 'gdc'},
         {'dataset': 'dataset_4.csv', 'tool': 'api', 'stream': True, 'iterative': True},
         {'dataset': 'dataset_4.csv', 'tool': 'api', 'stream': False, 'iterative': True},
         
         {'dataset': 'dataset_5.csv', 'tool': 'gdc'},
         {'dataset': 'dataset_5.csv', 'tool': 'api', 'stream': True, 'iterative': False},
         {'dataset': 'dataset_5.csv', 'tool': 'api', 'stream': False, 'iterative': False},
    
         {'dataset': 'dataset_6.csv', 'tool': 'api', 'stream': True, 'iterative': True},
         {'dataset': 'dataset_6.csv', 'tool': 'api', 'stream': True, 'iterative': True, 'multiprocess': 2},
         {'dataset': 'dataset_6.csv', 'tool': 'api', 'stream': True, 'iterative': True, 'multiprocess': 4},
         {'dataset': 'dataset_6.csv', 'tool': 'api', 'stream': True, 'iterative': True, 'multiprocess': 8},
         {'dataset': 'dataset_6.csv', 'tool': 'api', 'stream': True, 'iterative': True, 'multiprocess': 16},
    
    
         {'dataset': 'dataset_1.csv', 'tool': 'gdc'},
         {'dataset': 'dataset_1.csv', 'tool': 'api', 'stream': True, 'iterative': False},
         {'dataset': 'dataset_1.csv', 'tool': 'api', 'stream': True, 'iterative': True},
         {'dataset': 'dataset_2.csv', 'tool': 'api', 'stream': False, 'iterative': False},
         {'dataset': 'dataset_2.csv', 'tool': 'api', 'stream': False, 'iterative': True},

         
         {'dataset': 'dataset_2.csv', 'tool': 'gdc'},
         {'dataset': 'dataset_2.csv', 'tool': 'api', 'stream': True, 'iterative': False},
         {'dataset': 'dataset_2.csv', 'tool': 'api', 'stream': True, 'iterative': True},
         {'dataset': 'dataset_2.csv', 'tool': 'api', 'stream': False, 'iterative': False},
         {'dataset': 'dataset_2.csv', 'tool': 'api', 'stream': False, 'iterative': True},

         
         {'dataset': 'dataset_3.csv', 'tool': 'gdc'},
         {'dataset': 'dataset_3.csv', 'tool': 'api', 'stream': True, 'iterative': False},
         {'dataset': 'dataset_3.csv', 'tool': 'api', 'stream': True, 'iterative': True},
         {'dataset': 'dataset_3.csv', 'tool': 'api', 'stream': False, 'iterative': False},
         {'dataset': 'dataset_3.csv', 'tool': 'api', 'stream': False, 'iterative': True}
        ]

Auxiliary functions

In [107]:
def clear_dir(directory):
    
    if os.path.exists(TMP_DIR):
        shutil.rmtree(TMP_DIR)

    os.mkdir(TMP_DIR)
    
def data_summary(df, printed=True):
    
    n_files = len(df)
    avg_size = round(df['file_size'].mean(), 2)
    total_size = round(df['file_size'].sum(), 2)
    
    if printed:
        print('Number of files: {0:>8}'.format(n_files))
        print('Avg size (MB):{0:>11.2f}'.format(avg_size))
        print('Total size (MB):{0:>9.2f}'.format(total_size))
    
    return {'n_files': n_files, 'avg_size': avg_size, 'total_size': total_size}

def run_test(test):
    
    print('Running test...\n')
    for x in test:
        print ('\t', x,': ', test[x], sep='')
    print()
    
    df = pd.read_csv(os.path.join(DATA_DIR, test['dataset']), sep='|')
    
    results = data_summary(df, printed=True)
    results = {**test, **results}
    print()
    
    time_start = time.time()
    
    chunk_size = test['chunk_size'] if 'chunk_size' in test else DEFAULT_CHUNK_SIZE        

    try:

        if test['tool'] == 'gdc':
            gdc_tool_download(df, TMP_DIR, GDC_CLIENT_TOOL)
        elif (test['tool'] == 'api') & (test['iterative'] is True):
            multiprocess = False if 'multiprocess' not in test else test['multiprocess']
            api_download_iterative(df, TMP_DIR, stream=test['stream'], chunk_size=chunk_size, 
                                   multiprocess=multiprocess)        
        elif (test['tool'] == 'api') & (test['iterative'] is False):
            api_download_batch(df, TMP_DIR, stream=test['stream'], chunk_size=chunk_size)
        else:
            raise ValueError('Invalid test parameters combination')

    except Exception as e: 
        print('ERROR!')
        print(type(e).__name__, e.args) 
        results['status'] = 'ERROR'

    else:

        print('OK\n')

        time_elapsed = round(time.time() - time_start, 2)
        speed = round(results['total_size'] / time_elapsed, 2)

        results['time'] = time_elapsed
        results['speed'] = speed
        results['status'] = 'OK'

        print('Time: {}s'.format(time_elapsed))
        print('Speed: {}MB/s'.format(round(results['total_size'] / time_elapsed, 2)))
    
    print('-' * 80 + '\n')
        
    return results

Clear tmp dir

In [108]:
clear_dir(TMP_DIR)

## Datasets Info

In [109]:
datasets = sorted(list(set([x['dataset'] for x in tests])))
datasets = {x: pd.read_csv(os.path.join(DATA_DIR, x), sep='|') for x in datasets}

In [110]:
for name,df in datasets.items():
    print('Dataset:', name)
    data_summary(df)
    print()

Dataset: dataset_1.csv
Number of files:       10
Avg size (MB):       0.44
Total size (MB):     4.36

Dataset: dataset_2.csv
Number of files:        4
Avg size (MB):      26.54
Total size (MB):   106.18

Dataset: dataset_3.csv
Number of files:        4
Avg size (MB):     199.11
Total size (MB):   796.45

Dataset: dataset_4.csv
Number of files:        1
Avg size (MB):    1503.25
Total size (MB):  1503.25

Dataset: dataset_5.csv
Number of files:        3
Avg size (MB):    3280.28
Total size (MB):  9840.83

Dataset: dataset_6.csv
Number of files:       29
Avg size (MB):      41.52
Total size (MB):  1204.07



## Run Tests

In [111]:
with open(RESULTS_FILE, 'w') as f:
    f.write('|'.join(RESULTS_FIELDS) + '\n')

In [112]:
for test in tests:
    
    results = run_test(test)

    clear_dir(TMP_DIR)

    with open(RESULTS_FILE, 'a') as f:
        f.write('|'.join([str(results.get(x, '')) for x in RESULTS_FIELDS]) + '\n')

Running test...

	dataset: dataset_4.csv
	tool: gdc

Number of files:        1
Avg size (MB):    1503.25
Total size (MB):  1503.25

100% [#############################################] Time: 0:04:49   5.19 MB/s 
[32mSuccessfully downloaded[0m: 1

OK

Time: 297.32s
Speed: 5.06MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_4.csv
	tool: api
	stream: True
	iterative: True

Number of files:        1
Avg size (MB):    1503.25
Total size (MB):  1503.25



HBox(children=(IntProgress(value=0, description='Files', max=1, style=ProgressStyle(description_width='initial…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7889-01Z-00-DX1.6D4EE9ED-5AE0-4AC2-B75C-2B26F562D346.…

OK

Time: 479.87s
Speed: 3.13MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_4.csv
	tool: api
	stream: False
	iterative: True

Number of files:        1
Avg size (MB):    1503.25
Total size (MB):  1503.25



HBox(children=(IntProgress(value=0, description='Files', max=1, style=ProgressStyle(description_width='initial…

OK

Time: 496.55s
Speed: 3.03MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_5.csv
	tool: gdc

Number of files:        3
Avg size (MB):    3280.28
Total size (MB):  9840.83

100% [#############################################] Time: 0:11:27   4.92 MB/s 
100% [#############################################] Time: 0:12:26   4.18 MB/s 
100% [#############################################] Time: 0:13:11   4.21 MB/s 
[32mSuccessfully downloaded[0m: 3

OK

Time: 2259.63s
Speed: 4.36MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_5.csv
	tool: api
	stream: True
	iterative: False

Number of files:        3
Avg size (MB):    3280.28
Total size (MB):  9840.83

 

HBox(children=(IntProgress(value=0, description='gdc_download_20190424_233542.354135.tar.gz', max=7751546, sty…

OK

Time: 3071.39s
Speed: 3.2MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_5.csv
	tool: api
	stream: False
	iterative: False

Number of files:        3
Avg size (MB):    3280.28
Total size (MB):  9840.83

ERROR!
OSError (22, 'Invalid argument')
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_6.csv
	tool: api
	stream: True
	iterative: True

Number of files:       29
Avg size (MB):      41.52
Total size (MB):  1204.07



HBox(children=(IntProgress(value=0, description='Files', max=29, style=ProgressStyle(description_width='initia…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-8126-01A-01-TS1.098769c8-f708-45d8-af6e-96747786553a.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7651-01A-01-TS1.93957f30-932b-4a10-a325-d5eaeaf990a5.…

 

HBox(children=(IntProgress(value=0, description='TCGA-2L-AAQA-11A-01-TSA.C5F0A098-9D08-46AA-9468-CD9D7C47C906.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A774-11A-01-TSA.79454D5B-6D4E-4E1F-BE79-C745E8FD09E4.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-7926-01Z-00-DX1.b3bf02d3-bad0-4451-9c39-b0593f19154c.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7647-01A-01-TS1.60574eeb-7935-4607-9ca8-5e1c21684ea5.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7649-01A-01-TS1.bbd1df8b-ea7f-473d-8898-ce66f8c7a9c9.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAQ1-11A-01-TSA.44562F27-B936-4A9D-A176-0A6FA6E1832F.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77E-11A-01-TSA.D46122D7-776B-4346-8918-7C38CD6A77C0.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7646-01A-01-TS1.15de8678-3471-493d-b49a-1e28ab861713.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAPY-11A-01-TSA.088BA594-513D-40BB-90FA-A01BDA16C584.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HV-A5A5-01A-01-TS1.3BB38FAB-0506-4223-B017-10425E9329D7.…

 

HBox(children=(IntProgress(value=0, description='TCGA-2L-AAQJ-11A-01-TS1.27AB61A2-4F5B-4B8E-AC5D-AE1D0E5C7C7A.…

 

HBox(children=(IntProgress(value=0, description='TCGA-F2-6880-01A-01-TS1.b92d57ab-e336-45cb-919f-6fdf5fbacb39.…

 

HBox(children=(IntProgress(value=0, description='TCGA-H6-8124-11A-01-TS1.3be454f5-83ee-4cd0-8a00-f341ce8f3ba2.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77G-11A-01-TSA.18AC9F41-1554-4C39-BDA0-2D139AE487AD.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A779-11A-01-TS1.C060211E-6AFE-4D64-AC9C-D1B3D5633CC4.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7887-01A-01-TS1.8ce76657-42b6-48a8-ba12-c2b697173e10.…

 

HBox(children=(IntProgress(value=0, description='TCGA-YB-A89D-11A-01-TS1.F56832D3-37EC-49D5-8803-7132516B25EE.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAPQ-11A-01-TS1.22B96FE6-C6AB-465F-8F09-83BA9F32609B.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAQ6-11A-01-TSA.9BFA20EC-9D05-413E-8385-405829239799.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7644-01A-01-TS1.8fcd72ca-6fb6-4c0a-9dca-9c13bc4272e8.…

 

HBox(children=(IntProgress(value=0, description='TCGA-F2-7276-01A-01-TS1.010dd1db-9ead-4399-84b9-5381e189054e.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HV-A5A3-11A-01-TS1.29B3EF24-5DA2-4353-848D-7E5B63A1C226.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-8001-01A-01-TS1.d3738e44-a683-4e1f-9794-b0f76ee0bd61.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-7923-01A-01-TS1.e1b46c76-7d5f-434e-b613-1ca91c043f06.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FZ-5924-11A-01-TS1.725380e8-0c30-4507-83a5-5e726e2cc533.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FZ-5924-01A-01-TS1.493daee4-9604-4259-8611-120a6bc241b3.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77J-11A-01-TS1.27A65F65-2CBA-49B7-8F6F-639A0F7AC912.…

OK

Time: 555.96s
Speed: 2.17MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_6.csv
	tool: api
	stream: True
	iterative: True
	multiprocess: 2

Number of files:       29
Avg size (MB):      41.52
Total size (MB):  1204.07



HBox(children=(IntProgress(value=0, description='Files', max=29, style=ProgressStyle(description_width='initia…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7651-01A-01-TS1.93957f30-932b-4a10-a325-d5eaeaf990a5.…

HBox(children=(IntProgress(value=0, description='TCGA-IB-8126-01A-01-TS1.098769c8-f708-45d8-af6e-96747786553a.…

 

HBox(children=(IntProgress(value=0, description='TCGA-2L-AAQA-11A-01-TSA.C5F0A098-9D08-46AA-9468-CD9D7C47C906.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A774-11A-01-TSA.79454D5B-6D4E-4E1F-BE79-C745E8FD09E4.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-7926-01Z-00-DX1.b3bf02d3-bad0-4451-9c39-b0593f19154c.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7647-01A-01-TS1.60574eeb-7935-4607-9ca8-5e1c21684ea5.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7649-01A-01-TS1.bbd1df8b-ea7f-473d-8898-ce66f8c7a9c9.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAQ1-11A-01-TSA.44562F27-B936-4A9D-A176-0A6FA6E1832F.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77E-11A-01-TSA.D46122D7-776B-4346-8918-7C38CD6A77C0.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7646-01A-01-TS1.15de8678-3471-493d-b49a-1e28ab861713.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAPY-11A-01-TSA.088BA594-513D-40BB-90FA-A01BDA16C584.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HV-A5A5-01A-01-TS1.3BB38FAB-0506-4223-B017-10425E9329D7.…

 

HBox(children=(IntProgress(value=0, description='TCGA-2L-AAQJ-11A-01-TS1.27AB61A2-4F5B-4B8E-AC5D-AE1D0E5C7C7A.…

 

HBox(children=(IntProgress(value=0, description='TCGA-F2-6880-01A-01-TS1.b92d57ab-e336-45cb-919f-6fdf5fbacb39.…

 

HBox(children=(IntProgress(value=0, description='TCGA-H6-8124-11A-01-TS1.3be454f5-83ee-4cd0-8a00-f341ce8f3ba2.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77G-11A-01-TSA.18AC9F41-1554-4C39-BDA0-2D139AE487AD.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A779-11A-01-TS1.C060211E-6AFE-4D64-AC9C-D1B3D5633CC4.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7887-01A-01-TS1.8ce76657-42b6-48a8-ba12-c2b697173e10.…

 

HBox(children=(IntProgress(value=0, description='TCGA-YB-A89D-11A-01-TS1.F56832D3-37EC-49D5-8803-7132516B25EE.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAPQ-11A-01-TS1.22B96FE6-C6AB-465F-8F09-83BA9F32609B.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAQ6-11A-01-TSA.9BFA20EC-9D05-413E-8385-405829239799.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7644-01A-01-TS1.8fcd72ca-6fb6-4c0a-9dca-9c13bc4272e8.…

 

HBox(children=(IntProgress(value=0, description='TCGA-F2-7276-01A-01-TS1.010dd1db-9ead-4399-84b9-5381e189054e.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HV-A5A3-11A-01-TS1.29B3EF24-5DA2-4353-848D-7E5B63A1C226.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-8001-01A-01-TS1.d3738e44-a683-4e1f-9794-b0f76ee0bd61.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-7923-01A-01-TS1.e1b46c76-7d5f-434e-b613-1ca91c043f06.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FZ-5924-11A-01-TS1.725380e8-0c30-4507-83a5-5e726e2cc533.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FZ-5924-01A-01-TS1.493daee4-9604-4259-8611-120a6bc241b3.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77J-11A-01-TS1.27A65F65-2CBA-49B7-8F6F-639A0F7AC912.…

OK

Time: 246.69s
Speed: 4.88MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_6.csv
	tool: api
	stream: True
	iterative: True
	multiprocess: 4

Number of files:       29
Avg size (MB):      41.52
Total size (MB):  1204.07



HBox(children=(IntProgress(value=0, description='Files', max=29, style=ProgressStyle(description_width='initia…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-8126-01A-01-TS1.098769c8-f708-45d8-af6e-96747786553a.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7651-01A-01-TS1.93957f30-932b-4a10-a325-d5eaeaf990a5.…

HBox(children=(IntProgress(value=0, description='TCGA-US-A774-11A-01-TSA.79454D5B-6D4E-4E1F-BE79-C745E8FD09E4.…

HBox(children=(IntProgress(value=0, description='TCGA-2L-AAQA-11A-01-TSA.C5F0A098-9D08-46AA-9468-CD9D7C47C906.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-7926-01Z-00-DX1.b3bf02d3-bad0-4451-9c39-b0593f19154c.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7647-01A-01-TS1.60574eeb-7935-4607-9ca8-5e1c21684ea5.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7649-01A-01-TS1.bbd1df8b-ea7f-473d-8898-ce66f8c7a9c9.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAQ1-11A-01-TSA.44562F27-B936-4A9D-A176-0A6FA6E1832F.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77E-11A-01-TSA.D46122D7-776B-4346-8918-7C38CD6A77C0.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7646-01A-01-TS1.15de8678-3471-493d-b49a-1e28ab861713.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAPY-11A-01-TSA.088BA594-513D-40BB-90FA-A01BDA16C584.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HV-A5A5-01A-01-TS1.3BB38FAB-0506-4223-B017-10425E9329D7.…

 

HBox(children=(IntProgress(value=0, description='TCGA-2L-AAQJ-11A-01-TS1.27AB61A2-4F5B-4B8E-AC5D-AE1D0E5C7C7A.…

 

HBox(children=(IntProgress(value=0, description='TCGA-F2-6880-01A-01-TS1.b92d57ab-e336-45cb-919f-6fdf5fbacb39.…

 

HBox(children=(IntProgress(value=0, description='TCGA-H6-8124-11A-01-TS1.3be454f5-83ee-4cd0-8a00-f341ce8f3ba2.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77G-11A-01-TSA.18AC9F41-1554-4C39-BDA0-2D139AE487AD.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A779-11A-01-TS1.C060211E-6AFE-4D64-AC9C-D1B3D5633CC4.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7887-01A-01-TS1.8ce76657-42b6-48a8-ba12-c2b697173e10.…

 

HBox(children=(IntProgress(value=0, description='TCGA-YB-A89D-11A-01-TS1.F56832D3-37EC-49D5-8803-7132516B25EE.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAPQ-11A-01-TS1.22B96FE6-C6AB-465F-8F09-83BA9F32609B.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAQ6-11A-01-TSA.9BFA20EC-9D05-413E-8385-405829239799.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7644-01A-01-TS1.8fcd72ca-6fb6-4c0a-9dca-9c13bc4272e8.…

 

HBox(children=(IntProgress(value=0, description='TCGA-F2-7276-01A-01-TS1.010dd1db-9ead-4399-84b9-5381e189054e.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HV-A5A3-11A-01-TS1.29B3EF24-5DA2-4353-848D-7E5B63A1C226.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-8001-01A-01-TS1.d3738e44-a683-4e1f-9794-b0f76ee0bd61.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-7923-01A-01-TS1.e1b46c76-7d5f-434e-b613-1ca91c043f06.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FZ-5924-11A-01-TS1.725380e8-0c30-4507-83a5-5e726e2cc533.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FZ-5924-01A-01-TS1.493daee4-9604-4259-8611-120a6bc241b3.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77J-11A-01-TS1.27A65F65-2CBA-49B7-8F6F-639A0F7AC912.…

OK

Time: 206.66s
Speed: 5.83MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_6.csv
	tool: api
	stream: True
	iterative: True
	multiprocess: 8

Number of files:       29
Avg size (MB):      41.52
Total size (MB):  1204.07



HBox(children=(IntProgress(value=0, description='Files', max=29, style=ProgressStyle(description_width='initia…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-8126-01A-01-TS1.098769c8-f708-45d8-af6e-96747786553a.…

HBox(children=(IntProgress(value=0, description='TCGA-2L-AAQA-11A-01-TSA.C5F0A098-9D08-46AA-9468-CD9D7C47C906.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7647-01A-01-TS1.60574eeb-7935-4607-9ca8-5e1c21684ea5.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-7926-01Z-00-DX1.b3bf02d3-bad0-4451-9c39-b0593f19154c.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7651-01A-01-TS1.93957f30-932b-4a10-a325-d5eaeaf990a5.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAQ1-11A-01-TSA.44562F27-B936-4A9D-A176-0A6FA6E1832F.…

HBox(children=(IntProgress(value=0, description='TCGA-US-A774-11A-01-TSA.79454D5B-6D4E-4E1F-BE79-C745E8FD09E4.…

HBox(children=(IntProgress(value=0, description='TCGA-IB-7649-01A-01-TS1.bbd1df8b-ea7f-473d-8898-ce66f8c7a9c9.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77E-11A-01-TSA.D46122D7-776B-4346-8918-7C38CD6A77C0.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7646-01A-01-TS1.15de8678-3471-493d-b49a-1e28ab861713.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAPY-11A-01-TSA.088BA594-513D-40BB-90FA-A01BDA16C584.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HV-A5A5-01A-01-TS1.3BB38FAB-0506-4223-B017-10425E9329D7.…

 

HBox(children=(IntProgress(value=0, description='TCGA-2L-AAQJ-11A-01-TS1.27AB61A2-4F5B-4B8E-AC5D-AE1D0E5C7C7A.…

 

HBox(children=(IntProgress(value=0, description='TCGA-H6-8124-11A-01-TS1.3be454f5-83ee-4cd0-8a00-f341ce8f3ba2.…

 

HBox(children=(IntProgress(value=0, description='TCGA-F2-6880-01A-01-TS1.b92d57ab-e336-45cb-919f-6fdf5fbacb39.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77G-11A-01-TSA.18AC9F41-1554-4C39-BDA0-2D139AE487AD.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A779-11A-01-TS1.C060211E-6AFE-4D64-AC9C-D1B3D5633CC4.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7887-01A-01-TS1.8ce76657-42b6-48a8-ba12-c2b697173e10.…

 

HBox(children=(IntProgress(value=0, description='TCGA-YB-A89D-11A-01-TS1.F56832D3-37EC-49D5-8803-7132516B25EE.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAPQ-11A-01-TS1.22B96FE6-C6AB-465F-8F09-83BA9F32609B.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAQ6-11A-01-TSA.9BFA20EC-9D05-413E-8385-405829239799.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7644-01A-01-TS1.8fcd72ca-6fb6-4c0a-9dca-9c13bc4272e8.…

 

HBox(children=(IntProgress(value=0, description='TCGA-F2-7276-01A-01-TS1.010dd1db-9ead-4399-84b9-5381e189054e.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-8001-01A-01-TS1.d3738e44-a683-4e1f-9794-b0f76ee0bd61.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HV-A5A3-11A-01-TS1.29B3EF24-5DA2-4353-848D-7E5B63A1C226.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-7923-01A-01-TS1.e1b46c76-7d5f-434e-b613-1ca91c043f06.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FZ-5924-11A-01-TS1.725380e8-0c30-4507-83a5-5e726e2cc533.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FZ-5924-01A-01-TS1.493daee4-9604-4259-8611-120a6bc241b3.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77J-11A-01-TS1.27A65F65-2CBA-49B7-8F6F-639A0F7AC912.…

OK

Time: 273.68s
Speed: 4.4MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_6.csv
	tool: api
	stream: True
	iterative: True
	multiprocess: 16

Number of files:       29
Avg size (MB):      41.52
Total size (MB):  1204.07



HBox(children=(IntProgress(value=0, description='Files', max=29, style=ProgressStyle(description_width='initia…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7651-01A-01-TS1.93957f30-932b-4a10-a325-d5eaeaf990a5.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAQ1-11A-01-TSA.44562F27-B936-4A9D-A176-0A6FA6E1832F.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A774-11A-01-TSA.79454D5B-6D4E-4E1F-BE79-C745E8FD09E4.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7646-01A-01-TS1.15de8678-3471-493d-b49a-1e28ab861713.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-7926-01Z-00-DX1.b3bf02d3-bad0-4451-9c39-b0593f19154c.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77G-11A-01-TSA.18AC9F41-1554-4C39-BDA0-2D139AE487AD.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77E-11A-01-TSA.D46122D7-776B-4346-8918-7C38CD6A77C0.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAPY-11A-01-TSA.088BA594-513D-40BB-90FA-A01BDA16C584.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HV-A5A5-01A-01-TS1.3BB38FAB-0506-4223-B017-10425E9329D7.…

HBox(children=(IntProgress(value=0, description='TCGA-IB-8126-01A-01-TS1.098769c8-f708-45d8-af6e-96747786553a.…

 

HBox(children=(IntProgress(value=0, description='TCGA-2L-AAQA-11A-01-TSA.C5F0A098-9D08-46AA-9468-CD9D7C47C906.…

HBox(children=(IntProgress(value=0, description='TCGA-F2-6880-01A-01-TS1.b92d57ab-e336-45cb-919f-6fdf5fbacb39.…

HBox(children=(IntProgress(value=0, description='TCGA-H6-8124-11A-01-TS1.3be454f5-83ee-4cd0-8a00-f341ce8f3ba2.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7649-01A-01-TS1.bbd1df8b-ea7f-473d-8898-ce66f8c7a9c9.…

HBox(children=(IntProgress(value=0, description='TCGA-2L-AAQJ-11A-01-TS1.27AB61A2-4F5B-4B8E-AC5D-AE1D0E5C7C7A.…

HBox(children=(IntProgress(value=0, description='TCGA-IB-7647-01A-01-TS1.60574eeb-7935-4607-9ca8-5e1c21684ea5.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A779-11A-01-TS1.C060211E-6AFE-4D64-AC9C-D1B3D5633CC4.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7887-01A-01-TS1.8ce76657-42b6-48a8-ba12-c2b697173e10.…

 

HBox(children=(IntProgress(value=0, description='TCGA-YB-A89D-11A-01-TS1.F56832D3-37EC-49D5-8803-7132516B25EE.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAQ6-11A-01-TSA.9BFA20EC-9D05-413E-8385-405829239799.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7644-01A-01-TS1.8fcd72ca-6fb6-4c0a-9dca-9c13bc4272e8.…

 

HBox(children=(IntProgress(value=0, description='TCGA-F2-7276-01A-01-TS1.010dd1db-9ead-4399-84b9-5381e189054e.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FB-AAPQ-11A-01-TS1.22B96FE6-C6AB-465F-8F09-83BA9F32609B.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HV-A5A3-11A-01-TS1.29B3EF24-5DA2-4353-848D-7E5B63A1C226.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-7923-01A-01-TS1.e1b46c76-7d5f-434e-b613-1ca91c043f06.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FZ-5924-11A-01-TS1.725380e8-0c30-4507-83a5-5e726e2cc533.…

 

HBox(children=(IntProgress(value=0, description='TCGA-FZ-5924-01A-01-TS1.493daee4-9604-4259-8611-120a6bc241b3.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-8001-01A-01-TS1.d3738e44-a683-4e1f-9794-b0f76ee0bd61.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77J-11A-01-TS1.27A65F65-2CBA-49B7-8F6F-639A0F7AC912.…

OK

Time: 261.88s
Speed: 4.6MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_1.csv
	tool: gdc

Number of files:       10
Avg size (MB):       0.44
Total size (MB):     4.36

100% [#############################################] Time: 0:00:01   0.57  B/s 
100% [#############################################] Time: 0:00:01   0.57  B/s 
100% [#############################################] Time: 0:00:01   0.61  B/s 
100% [#############################################] Time: 0:00:01 286.57 kB/s 
100% [#############################################] Time: 0:00:01 213.01 kB/s 
[32mSuccessfully downloaded[0m: 10

OK

Time: 14.4s
Speed: 0.3MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_1.csv
	tool: api
	stream: True
	iterative: False

Number of files:       10
Avg size (MB):       0.44
Total size (MB):     4.36

 

HBox(children=(IntProgress(value=0, description='gdc_download_20190425_014656.258701.tar.gz', max=5120, style=…

OK

Time: 3.58s
Speed: 1.22MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_1.csv
	tool: api
	stream: True
	iterative: True

Number of files:       10
Avg size (MB):       0.44
Total size (MB):     4.36



HBox(children=(IntProgress(value=0, description='Files', max=10, style=ProgressStyle(description_width='initia…

 

HBox(children=(IntProgress(value=0, description='a2a33be8-232b-44bf-a003-349017a5bc5a.FPKM.txt.gz', max=532, s…

 

HBox(children=(IntProgress(value=0, description='dd2f0366-10f3-40e4-90d5-c5c2c7a65289.FPKM-UQ.txt.gz', max=532…

 

HBox(children=(IntProgress(value=0, description='4172e3f8-3578-4f33-9168-6f8c2b8d0783.FPKM.txt.gz', max=543, s…

 

HBox(children=(IntProgress(value=0, description='c1f4dcd4-26b5-4b90-9564-44e03242e8e2.htseq.counts.gz', max=26…

 

HBox(children=(IntProgress(value=0, description='7051e52f-069d-48d5-966e-064a01bf2725.FPKM-UQ.txt.gz', max=522…

 

HBox(children=(IntProgress(value=0, description='69675771-14b4-4edf-bbda-63c38051ca1c.htseq.counts.gz', max=25…

 

HBox(children=(IntProgress(value=0, description='51d0123d-f082-4ce1-8f06-e3dafd224a88.htseq.counts.gz', max=25…

 

HBox(children=(IntProgress(value=0, description='0be94b2f-fccb-4482-b0ea-695c101aa65a.FPKM-UQ.txt.gz', max=502…

 

HBox(children=(IntProgress(value=0, description='7d0fbed4-b5e1-4633-ac04-e0ee614b90ee.FPKM-UQ.txt.gz', max=512…

 

HBox(children=(IntProgress(value=0, description='e228d8be-74db-4b34-90d6-5d757f15310b.FPKM-UQ.txt.gz', max=543…

OK

Time: 13.19s
Speed: 0.33MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_2.csv
	tool: api
	stream: False
	iterative: False

Number of files:        4
Avg size (MB):      26.54
Total size (MB):   106.18

OK

Time: 29.51s
Speed: 3.6MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_2.csv
	tool: api
	stream: False
	iterative: True

Number of files:        4
Avg size (MB):      26.54
Total size (MB):   106.18



HBox(children=(IntProgress(value=0, description='Files', max=4, style=ProgressStyle(description_width='initial…

OK

Time: 37.75s
Speed: 2.81MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_2.csv
	tool: gdc

Number of files:        4
Avg size (MB):      26.54
Total size (MB):   106.18

100% [#############################################] Time: 0:00:07   4.17 MB/s 
100% [#############################################] Time: 0:00:06   4.06 MB/s 
100% [#############################################] Time: 0:00:05   4.13 MB/s 
100% [#############################################] Time: 0:00:05   4.45 MB/s 
[32mSuccessfully downloaded[0m: 4

OK

Time: 31.87s
Speed: 3.33MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_2.csv
	tool: api
	stream: True
	iterative: False

Number of files:        4
Avg size (MB):      26.54
Total size (MB):   106.18

 

HBox(children=(IntProgress(value=0, description='gdc_download_20190425_014851.802503.tar.gz', max=83637, style…

OK

Time: 26.93s
Speed: 3.94MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_2.csv
	tool: api
	stream: True
	iterative: True

Number of files:        4
Avg size (MB):      26.54
Total size (MB):   106.18



HBox(children=(IntProgress(value=0, description='Files', max=4, style=ProgressStyle(description_width='initial…

 

HBox(children=(IntProgress(value=0, description='TCGA-HZ-7926-01Z-00-DX1.b3bf02d3-bad0-4451-9c39-b0593f19154c.…

 

HBox(children=(IntProgress(value=0, description='TCGA-US-A77G-11A-01-TSA.18AC9F41-1554-4C39-BDA0-2D139AE487AD.…

 

HBox(children=(IntProgress(value=0, description='TCGA-IB-7644-01A-01-TS1.8fcd72ca-6fb6-4c0a-9dca-9c13bc4272e8.…

 

HBox(children=(IntProgress(value=0, description='TCGA-HV-A5A3-11A-01-TS1.29B3EF24-5DA2-4353-848D-7E5B63A1C226.…

OK

Time: 38.04s
Speed: 2.79MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_2.csv
	tool: api
	stream: False
	iterative: False

Number of files:        4
Avg size (MB):      26.54
Total size (MB):   106.18

OK

Time: 28.52s
Speed: 3.72MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_2.csv
	tool: api
	stream: False
	iterative: True

Number of files:        4
Avg size (MB):      26.54
Total size (MB):   106.18



HBox(children=(IntProgress(value=0, description='Files', max=4, style=ProgressStyle(description_width='initial…

OK

Time: 41.77s
Speed: 2.54MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_3.csv
	tool: gdc

Number of files:        4
Avg size (MB):     199.11
Total size (MB):   796.45

100% [#############################################] Time: 0:00:40   4.92 MB/s 
100% [#############################################] Time: 0:00:37   5.33 MB/s 
100% [#############################################] Time: 0:00:38   5.25 MB/s 
100% [#############################################] Time: 0:00:39   5.06 MB/s 
[32mSuccessfully downloaded[0m: 4

OK

Time: 163.97s
Speed: 4.86MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_3.csv
	tool: api
	stream: True
	iterative: False

Number of files:        4
Avg size (MB):     199.11
Total size (MB):   796.45

 

HBox(children=(IntProgress(value=0, description='gdc_download_20190425_015351.222525.tar.gz', max=627358, styl…

OK

Time: 186.29s
Speed: 4.28MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_3.csv
	tool: api
	stream: True
	iterative: True

Number of files:        4
Avg size (MB):     199.11
Total size (MB):   796.45



HBox(children=(IntProgress(value=0, description='Files', max=4, style=ProgressStyle(description_width='initial…

 

HBox(children=(IntProgress(value=0, description='TCGA-FZ-5920-01A-01-TS1.ed875f57-3afb-4d20-8e2d-e250ddb6c462.…

 

HBox(children=(IntProgress(value=0, description='TCGA-3A-A9I9-01A-01-TS1.B7DF9D8A-653C-4748-8B02-5FDCB3ED9EB6.…

 

HBox(children=(IntProgress(value=0, description='TCGA-2J-AAB8-01A-01-TSA.62840521-53CE-4FE7-B653-892E62CFC996.…

 

HBox(children=(IntProgress(value=0, description='TCGA-2J-AABA-01A-02-TS2.5D2E8749-33EE-40B8-8D84-591FC71EBE03.…

OK

Time: 311.05s
Speed: 2.56MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_3.csv
	tool: api
	stream: False
	iterative: False

Number of files:        4
Avg size (MB):     199.11
Total size (MB):   796.45

OK

Time: 249.02s
Speed: 3.2MB/s
--------------------------------------------------------------------------------

Running test...

	dataset: dataset_3.csv
	tool: api
	stream: False
	iterative: True

Number of files:        4
Avg size (MB):     199.11
Total size (MB):   796.45



HBox(children=(IntProgress(value=0, description='Files', max=4, style=ProgressStyle(description_width='initial…

OK

Time: 320.47s
Speed: 2.49MB/s
--------------------------------------------------------------------------------

