In [None]:
import os
import synapseclient
from tqdm import tqdm

def get_table_info(synID):
    """
    Get table information from file view 

    Args:
         synID: File view Synapse ID
    """
    token = os.environ.get('SYNAPSE_AUTH_TOKEN')
    if not token:
        raise ValueError("Set env variable: export SYNAPSE_AUTH_TOKEN=your_token_here")

    syn = synapseclient.Synapse()
    syn.login(authToken=token)

    query_results = syn.tableQuery("SELECT * FROM " + synID)
    syn_df = query_results.asDataFrame()
    
    return syn, syn_df

def download_data(synID, dest_folder):
    """
    Downloading training dataset

    Args:
         synID: File view Synapse ID
         dest_folder: Directory path to download training dataset
    """
    syn, syn_df = get_table_info(synID)
    for index, row in tqdm(syn_df.iterrows()):
        syn.get(row['id'], downloadLocation=dest_folder)
    print('Downloaded all the files successfully...')

# download_data('syn41833327', 'data/solicited/')    # Download solicited coughs only
# download_data('syn41833579', 'data/longitudinal/') # Download longitudinal coughs only
# download_data('syn41833579', 'data/metadata/') # Download longitudinal coughs only

In [6]:
syn, df = get_table_info('syn41833579')
print(df.columns.tolist())
print(df.shape)
print(df.head(3))

Welcome, CupSurg!



  query_results = syn.tableQuery("SELECT * FROM " + synID)

Create CSV FileHandle: 100%|██████████| 1.43M/1.43M [00:14<00:00, 99.9kit/s]             
Downloading files:  97%|█████████▋| 314M/322M [02:20<00:03, 2.23MB/s, syn41833579] 

[syn41833579]: Downloaded to /Users/aida/.synapseCache/422/169333422/SYNAPSE_TABLE_QUERY_169333422.csv


Downloading files: 100%|██████████| 322M/322M [02:22<00:00, 2.27MB/s, syn41833579]


['id', 'name', 'createdOn', 'createdBy', 'etag', 'type', 'currentVersion', 'parentId', 'benefactorId', 'projectId', 'modifiedOn', 'modifiedBy', 'dataFileHandleId', 'dataFileSizeBytes', 'dataFileMD5Hex', 'dataFileConcreteType', 'dataFileBucket', 'dataFileKey']
(714922, 18)
                                                          id  \
40392638_1_f03e0be0-5ff7-4159-9c5e-61ea4610189d  syn40392638   
40392639_1_468a2154-74bf-442d-8c87-4fa8f47648ef  syn40392639   
40392640_1_be3319bb-afde-4c30-b438-f028cb5b17c3  syn40392640   

                                                                          name  \
40392638_1_f03e0be0-5ff7-4159-9c5e-61ea4610189d  1620627631190-recording-1.wav   
40392639_1_468a2154-74bf-442d-8c87-4fa8f47648ef  1620627635670-recording-1.wav   
40392640_1_be3319bb-afde-4c30-b438-f028cb5b17c3  1620633332149-recording-1.wav   

                                                     createdOn  createdBy  \
40392638_1_f03e0be0-5ff7-4159-9c5e-61ea4610189d  1666064958022  

In [7]:
syn, df = get_table_info('syn41833327')  # solicited file view
print("Parent IDs:")
print(df['parentId'].unique())

Welcome, CupSurg!



  query_results = syn.tableQuery("SELECT * FROM " + synID)

Create CSV FileHandle: 100%|██████████| 19.5k/19.5k [00:01<00:00, 12.9kit/s]            
Downloading files:  95%|█████████▌| 4.19M/4.40M [00:03<00:00, 1.29MB/s, syn41833327]

[syn41833327]: Downloaded to /Users/aida/.synapseCache/453/169333453/SYNAPSE_TABLE_QUERY_169333453.csv


Downloading files: 100%|██████████| 4.40M/4.40M [00:03<00:00, 1.26MB/s, syn41833327]

Parent IDs:
['syn40358494']





In [11]:
children = list(syn.getChildren('syn31472953'))
for c in children:
    print(f"{c['name']:>40s}  {c['id']}  {c['type']}")

  children = list(syn.getChildren('syn31472953'))



                      CODA Challenge SC1  syn51366229  org.sagebionetworks.repo.model.table.TableEntity
CODA Challenge SC1 Final Results (Original)  syn51196305  org.sagebionetworks.repo.model.table.TableEntity
        CODA Challenge SC2 Final Results  syn51196438  org.sagebionetworks.repo.model.table.TableEntity
                                    Data  syn38943763  org.sagebionetworks.repo.model.Folder
                            General Info  syn46738742  org.sagebionetworks.repo.model.Folder
                 Leaderboard SC1 Ranking  syn50970432  org.sagebionetworks.repo.model.table.TableEntity
                 Leaderboard SC2 Ranking  syn49581544  org.sagebionetworks.repo.model.table.TableEntity
                                    Logs  syn31476674  org.sagebionetworks.repo.model.Folder
                 Longitudinal Train View  syn41833579  org.sagebionetworks.repo.model.table.EntityView
                                New-Logs  syn64951434  org.sagebionetworks.repo.model.Folder
  

In [12]:
children = list(syn.getChildren('syn38943763'))
for c in children:
    print(f"{c['name']:>40s}  {c['id']}  {c['type']}")

  children = list(syn.getChildren('syn38943763'))



                                   Train  syn39711065  org.sagebionetworks.repo.model.Folder


In [13]:
syn.get('participant_data.csv_syn31472953', downloadLocation='data/metadata/')
syn.get('sound_to_participant.csv_syn31472953', downloadLocation='data/metadata/')
syn.get('data_dictionary.csv_syn31472953', downloadLocation='data/metadata/')

  syn.get('participant_data.csv_syn31472953', downloadLocation='data/metadata/')



SynapseFileNotFoundError: The parameter participant_data.csv_syn31472953 is neither a local file path  or a valid entity id

In [14]:
children = list(syn.getChildren('syn38943763'))
for c in children:
    print(f"{c['name']:>40s}  {c['id']}")

  children = list(syn.getChildren('syn38943763'))



                                   Train  syn39711065


In [15]:
children = list(syn.getChildren('syn39711065'))
for c in children:
    print(f"{c['name']:>40s}  {c['id']}")

  children = list(syn.getChildren('syn39711065'))



                               meta_data  syn39711415
                                raw_data  syn39711400


In [16]:
children = list(syn.getChildren('syn39711415'))
for c in children:
    print(f"{c['name']:>40s}  {c['id']}")

  children = list(syn.getChildren('syn39711415'))



  CODA TB Challenge data dictionary.xlsx  syn41743692
                                Clinical  syn53710097
                          Cough Metadata  syn53710098


In [17]:
syn.get('syn41743692', downloadLocation='data/metadata/')  # data dictionary
syn.get('syn53710097', downloadLocation='data/metadata/')   # Clinical
syn.get('syn53710098', downloadLocation='data/metadata/')   # Cough Metadata

  syn.get('syn41743692', downloadLocation='data/metadata/')  # data dictionary

Downloading files: 100%|██████████| 19.7k/19.7k [00:00<00:00, 31.0kB/s, syn41743692]

[syn41743692]: Downloaded to /Users/aida/code/development/tb-datasets/data/metadata/CODA TB Challenge data dictionary_update.xlsx


Downloading files: 100%|██████████| 19.7k/19.7k [00:00<00:00, 30.9kB/s, syn41743692]
  syn.get('syn53710097', downloadLocation='data/metadata/')   # Clinical

  syn.get('syn53710098', downloadLocation='data/metadata/')   # Cough Metadata



Folder(etag='cb23fa81-57a5-4284-9cbc-d0de0bdc77ba', id='syn53710098', modifiedBy='273959', concreteType='org.sagebionetworks.repo.model.Folder', createdBy='273959', modifiedOn='2024-02-29T18:02:27.483Z', createdOn='2024-02-29T18:02:27.483Z', name='Cough Metadata', parentId='syn39711415')

In [18]:
for sid, name in [('syn53710097', 'Clinical'), ('syn53710098', 'Cough Metadata')]:
    children = list(syn.getChildren(sid))
    print(f"\n=== {name} ===")
    for c in children:
        print(f"  {c['name']:>50s}  {c['id']}")

  children = list(syn.getChildren(sid))




=== Clinical ===
                      CODA_TB_Clinical_Meta_Info.csv  syn41604915
              CODA_TB_additional_variables_train.csv  syn52357041

=== Cough Metadata ===
                   CODA_TB_Longitudnal_Meta_Info.csv  syn41604935
                     CODA_TB_Solicited_Meta_Info.csv  syn41604939


In [19]:
for sid in ['syn41604915', 'syn52357041', 'syn41604935', 'syn41604939']:
    syn.get(sid, downloadLocation='data/metadata/')
    print(f"Downloaded {sid}")

  syn.get(sid, downloadLocation='data/metadata/')

Downloading files: 100%|██████████| 79.8k/79.8k [00:00<00:00, 98.4kB/s, syn41604915]

[syn41604915]: Downloaded to /Users/aida/code/development/tb-datasets/data/metadata/CODA_TB_Clinical_Meta_Info.csv


Downloading files: 100%|██████████| 79.8k/79.8k [00:00<00:00, 97.9kB/s, syn41604915]


Downloaded syn41604915


Downloading files: 100%|██████████| 74.6k/74.6k [00:00<00:00, 96.8kB/s, syn52357041]

[syn52357041]: Downloaded to /Users/aida/code/development/tb-datasets/data/metadata/CODA_TB_additional_variables_train.csv


Downloading files: 100%|██████████| 74.6k/74.6k [00:00<00:00, 96.2kB/s, syn52357041]


Downloaded syn52357041


Downloading files: 100%|██████████| 44.2M/44.2M [00:20<00:00, 2.16MB/s, syn41604935]

[syn41604935]: Downloaded to /Users/aida/code/development/tb-datasets/data/metadata/CODA_TB_Longitudnal_Meta_Info.csv


Downloading files: 100%|██████████| 44.2M/44.2M [00:20<00:00, 2.16MB/s, syn41604935]


Downloaded syn41604935


Downloading files: 100%|██████████| 604k/604k [00:00<00:00, 612kB/s, syn41604939]

[syn41604939]: Downloaded to /Users/aida/code/development/tb-datasets/data/metadata/CODA_TB_Solicited_Meta_Info.csv


Downloading files: 100%|██████████| 604k/604k [00:00<00:00, 611kB/s, syn41604939]

Downloaded syn41604939



