In [3]:
## This script will demonstrate how to get imaging/item metadata from something that has been indexed on the DSA
### Will also demonstrate how we can POST metadata to a specific image/item as well as how to pull internal imaging
### details
import girder_client
import dsaSecrets as s  ## Put the API Key in here

gc = girder_client.GirderClient(apiUrl="https://imaging.htan.dev/girder/api/v1")
gc.authenticate(apiKey=s.dsaApiKey)

{'_id': '5ebee66965f992910fa19570'}

In [4]:
## The API is documented at https://imaging.htan.dev/girder/api/v1

### Conventions for the S3/Google Bucket Sync for the DCC
## All of the raw data is synchronized into a collection called DCC_BucketSync
## Each bucket has been registered in this collection using the bucket name

## Get the ID for the DCC_BucketSync collection

#https://imaging.htan.dev/girder/api/v1/collection?text=DCC_Bucket_Sync&limit=50&sort=name&sortdir=1

DCC_collection = gc.get("collection?text='DCC_Bucket_Sync'")[0]
## And now get a list of folders, each one representing a separate S3/GCS bucket

## List folders in this collection; since this is the root of the collection, I also have to tell the system
# the parentFolderType = 'collection'; this is a girderism 

##This yields a generator, for convenience I like making a dictionary

bucketFolderdict = {}
for f in gc.listFolder(DCC_collection['_id'],parentFolderType='collection'):
    print(f['name'])
    bucketFolderdict[f['name']] = f

htan-dcc-bu
htan-dcc-center-a
htan-dcc-chop
htan-dcc-dfci
htan-dcc-dsa-test
htan-dcc-duke
htan-dcc-hms
htan-dcc-htapp
htan-dcc-msk
htan-dcc-ohsu
htan-dcc-pcapp
htan-dcc-sardana-dsaSync
htan-dcc-stanford
htan-dcc-tma-tnp
htan-dcc-vanderbilt
htan-dcc-washu


In [5]:
## Now let's investigate the OHSU folder as it has an interpretable folder structure

ohsuFolderDict = {}
for of in gc.listFolder(bucketFolderdict['htan-dcc-ohsu']['_id']):
    print(of['name'])
    ohsuFolderDict[of['name']] = of

3341471
3413795
biospecimen
bulk_dnaseq_level_1
bulk_rnaseq_level_1
clinical_demographics
clinical_diagnosis
clinical_exposure
clinical_family_history
clinical_follow_up
clinical_therapy
em_level_1
em_level_2
em_level_3
em_level_4
imaging_level_1
imaging_level_2
nanostring_level_1
nanostring_level_2
nanostring_level_3
rppa_level_2
rppa_level_3
rppa_level_4


In [6]:
## Let's inspect the first item in the OHSU imaging_level_2 folder
ohsu_image_level_2_items = list(gc.listItem(ohsuFolderDict['imaging_level_2']['_id']))
print(ohsu_image_level_2_items[0])

{'_id': '60403e803c5a0f47ead21e7f', '_modelType': 'item', 'baseParentId': '5fa99a0051de21dd08ca7dfa', 'baseParentType': 'collection', 'created': '2021-03-04T01:57:20.643000+00:00', 'creatorId': '5ebee66965f992910fa19570', 'description': '', 'folderId': '60403e803c5a0f47ead21e7e', 'largeImage': {'fileId': '60403e803c5a0f47ead21e80', 'sourceName': 'ometiff'}, 'meta': {'htanMeta': {'Acquisition_Method_Type': 'Biopsy', 'Age_at_Diagnosis': '63', 'Bucket_url': 's3://htan-dcc-ohsu/imaging_level_2/BEMS267865_Scene-001.ome.tif', 'Center': 'HTAN OHSU', 'DimensionOrder': 'XYZCT', 'Ethnicity': 'not hispanic or latino', 'File_Format': 'OME-TIFF', 'File_size': '7537466355', 'Fixative_Type': 'Formalin', 'GCS_timestamp': '', 'Gender': 'female', 'HTAN_Biospecimen_ID': 'HTA9_1_51', 'HTAN_Center': 'OHSU', 'HTAN_Data_File_ID': 'HTA9_1_19373', 'HTAN_Parent_ID': 'HTA9_1_34', 'HTAN_Participant_ID': 'HTA9_1', 'Histologic_Morphology_Code': 'Not Reported', 'Imaging_Assay_Type': 'CyCIF', 'Morphology': '8500/3', 

In [7]:
## This is the metadata for the first item I obtained from Synapse
sampleMetaData = {
    "Acquisition_Method_Type": "Biopsy",
    "Age_at_Diagnosis": "63",
    "Bucket_url": "s3://htan-dcc-ohsu/imaging_level_2/BEMS267865_Scene-001.ome.tif",
    "Center": "HTAN OHSU",
    "DimensionOrder": "XYZCT",
    "Ethnicity": "not hispanic or latino",
    "File_Format": "OME-TIFF",
    "File_size": "7537466355",
    "Fixative_Type": "Formalin",
    "GCS_timestamp": "",
    "Gender": "female",
    "HTAN_Biospecimen_ID": "HTA9_1_51",
    "HTAN_Center": "OHSU",
    "HTAN_Data_File_ID": "HTA9_1_19373",
    "HTAN_Parent_ID": "HTA9_1_34",
    "HTAN_Participant_ID": "HTA9_1",
    "Histologic_Morphology_Code": "Not Reported",
    "Imaging_Assay_Type": "CyCIF",
    "Morphology": "8500/3",
    "PhysicalSizeX": "0.325",
    "PhysicalSizeXUnit": "µm",
    "PhysicalSizeY": "0.325",
    "PhysicalSizeYUnit": "µm",
    "Preservation_Method": "Formalin fixed paraffin embedded - FFPE",
    "Primary_Diagnosis": "Infiltrating duct carcinoma NOS",
    "Race": "white",
    "S3_timestamp": "2021-02-13 23:02:15+00:00",
    "Site_of_Resection_or_Biopsy": "Liver",
    "SizeC": "40",
    "SizeT": "1",
    "SizeX": "8001",
    "SizeY": "20001",
    "SizeZ": "12",
    "Storage_Method": "Cut slide",
    "SynapseID": "syn24829473",
    "Synapse_file_path": "imaging_level_2/BEMS267865_Scene-001.ome.tif",
    "Tissue_or_Organ_of_Origin": "Breast NOS",
    "Tseries": "No",
    "Tumor_Grade": "G3",
    "Tumor_Tissue_Type": "Metastatic",
    "Vital_Status": "Alive",
    "Year_of_Diagnosis": "2010",
    "Zstack": "No"
}

In [8]:
## By convention I am going to put this type of metadata in the htanMeta as the root key..
gc.addMetadataToItem(ohsu_image_level_2_items[0]['_id'],{'htanMeta': sampleMetaData})

# You can also browse the DSA link below and see the metadata we just added to the item
# https://imaging.htan.dev/girder/#item/60403e803c5a0f47ead21e7f

{'_id': '60403e803c5a0f47ead21e7f',
 '_modelType': 'item',
 'baseParentId': '5fa99a0051de21dd08ca7dfa',
 'baseParentType': 'collection',
 'created': '2021-03-04T01:57:20.643000+00:00',
 'creatorId': '5ebee66965f992910fa19570',
 'description': '',
 'folderId': '60403e803c5a0f47ead21e7e',
 'largeImage': {'fileId': '60403e803c5a0f47ead21e80', 'sourceName': 'ometiff'},
 'meta': {'htanMeta': {'Acquisition_Method_Type': 'Biopsy',
   'Age_at_Diagnosis': '63',
   'Bucket_url': 's3://htan-dcc-ohsu/imaging_level_2/BEMS267865_Scene-001.ome.tif',
   'Center': 'HTAN OHSU',
   'DimensionOrder': 'XYZCT',
   'Ethnicity': 'not hispanic or latino',
   'File_Format': 'OME-TIFF',
   'File_size': '7537466355',
   'Fixative_Type': 'Formalin',
   'GCS_timestamp': '',
   'Gender': 'female',
   'HTAN_Biospecimen_ID': 'HTA9_1_51',
   'HTAN_Center': 'OHSU',
   'HTAN_Data_File_ID': 'HTA9_1_19373',
   'HTAN_Parent_ID': 'HTA9_1_34',
   'HTAN_Participant_ID': 'HTA9_1',
   'Histologic_Morphology_Code': 'Not Reported'

In [None]:
htanMeta_imageMetaDataKeys = 


#    'Imaging_Assay_Type': 'CyCIF',
#    'PhysicalSizeX': '0.325',
#    'PhysicalSizeXUnit': 'µm',
#    'PhysicalSizeY': '0.325',
#    'PhysicalSizeYUnit': 'µm',
#    'SizeC': '40',
#    'SizeT': '1',
#    'SizeX': '8001',
#    'SizeY': '20001',
#    'SizeZ': '12',
#    'Zstack': 'No'}},
#  'name': 'BEMS267865_Scene-001.ome.tif',

 


In [17]:
## May also parse out the internal metadata here;  need to write a function to pull out specific internal metadata
## We need to validate and/or extract to make sure the supplied metadata from Synapse is consistent

#itemInternalMetadata = gc.get("item/%s/tiles/internal_metadata" % ohsu_image_level_2_items[2]['_id'])
gc.get("item/%s/tiles/internal_metadata" % '60403e803c5a0f47ead21e7f')
#parseOmeTiffToHtanSpec(itemInternalMetadata)

HttpError: HTTP error 400: GET https://imaging.htan.dev/girder/api/v1/item/60403e803c5a0f47ead21e7f/tiles/internal_metadata
Response text: {"message": "Not a recognized OME Tiff", "type": "rest"}

In [14]:
ohsu_image_level_2_items[0]

{'_id': '60403e803c5a0f47ead21e7f',
 '_modelType': 'item',
 'baseParentId': '5fa99a0051de21dd08ca7dfa',
 'baseParentType': 'collection',
 'created': '2021-03-04T01:57:20.643000+00:00',
 'creatorId': '5ebee66965f992910fa19570',
 'description': '',
 'folderId': '60403e803c5a0f47ead21e7e',
 'largeImage': {'fileId': '60403e803c5a0f47ead21e80', 'sourceName': 'ometiff'},
 'meta': {'htanMeta': {'Acquisition_Method_Type': 'Biopsy',
   'Age_at_Diagnosis': '63',
   'Bucket_url': 's3://htan-dcc-ohsu/imaging_level_2/BEMS267865_Scene-001.ome.tif',
   'Center': 'HTAN OHSU',
   'DimensionOrder': 'XYZCT',
   'Ethnicity': 'not hispanic or latino',
   'File_Format': 'OME-TIFF',
   'File_size': '7537466355',
   'Fixative_Type': 'Formalin',
   'GCS_timestamp': '',
   'Gender': 'female',
   'HTAN_Biospecimen_ID': 'HTA9_1_51',
   'HTAN_Center': 'OHSU',
   'HTAN_Data_File_ID': 'HTA9_1_19373',
   'HTAN_Parent_ID': 'HTA9_1_34',
   'HTAN_Participant_ID': 'HTA9_1',
   'Histologic_Morphology_Code': 'Not Reported'

In [10]:
def parseOmeTiffToHtanSpec(dsa_internal_metadata):
    ### This expects the output from the DSA endpoint item/%s/tiles/internal_metadata
    ### and parses out the fields needed to compare against the provided DCC metadata
    if 'omeinfo' in dsa_internal_metadata:
        print("Found OME Metadata")
        omeImageData = dsa_internal_metadata['omeinfo']['Image']
        
        if len(omeImageData) == 1:
            omeImageData = omeImageData[0]
        else:
            ## Throw an exception and/or return None... not sure what to do if there is more than one image returned
            print("More than one image detected; not sure what this means!")
            return None

In [12]:
## This actually is an array.. for now I am assuming the array has a length of 1... will add a check

imageMetaDataKey = ['PhysicalSizeX','PhysicalSizeXUnit','PhysicalSizeY','PhysicalSizeYUnit','SignificantBits','SizeC','SizeT','SizeX','SizeY','SizeZ']

itemInternalMetadata['omeinfo']['Image'][0]

## The 'TiffData' is actually extractef by the DSA I believe... and not part of the OME XML file 

NameError: name 'itemInternalMetadata' is not defined

In [None]:
itemIn