In [1]:
my_ProjectID = "data-idc"

import os
os.environ["GCP_PROJECT_ID"] = my_ProjectID

In [2]:
from google.colab import auth
auth.authenticate_user()

In [3]:
from google.cloud import bigquery
bq_client = bigquery.Client(my_ProjectID)

In [4]:
selection_query = """
  SELECT  
    StudyInstanceUID, 
    SeriesInstanceUID, 
    SOPInstanceUID, 
    instance_size, 
    gcs_url 
  FROM 
    `bigquery-public-data.idc_current.dicom_all` 
  WHERE 
    PatientID = \"C3L-01063\"
"""

selection_result = bq_client.query(selection_query)
selection_df = selection_result.result().to_dataframe()

In [5]:
size_gb = selection_df["instance_size"].sum()/(1024*1024*1024)
print(f"Cohort size on disk: {size_gb} Gb")

Cohort size on disk: 0.25197976268827915 Gb


In [None]:
%%bigquery --project=$my_ProjectID

In [7]:
def get_idc_viewer_url(studyUID, seriesUID=None):
  url = "https://viewer.imaging.datacommons.cancer.gov/viewer/"+studyUID
  if seriesUID is not None:
    url = url+"?seriesInstanceUID="+seriesUID
  return url

my_StudyInstanceUID = selection_df["StudyInstanceUID"][0]
my_SeriesInstanceUID = selection_df[selection_df["StudyInstanceUID"] == selection_df["StudyInstanceUID"][0]]["SeriesInstanceUID"][0]

print("URL to view the entire study:")
print(get_idc_viewer_url(my_StudyInstanceUID))

URL to view the entire study:
https://viewer.imaging.datacommons.cancer.gov/viewer/2.25.84514367312977501343770431102582089990


In [8]:
import os
os.environ["DOWNLOAD_DEST"] = "/content/IDC_downloads"
os.environ["MANIFEST"] = "/content/idc_manifest.txt"
     

!mkdir -p ${DOWNLOAD_DEST}
!echo "gsutil cp $* $DOWNLOAD_DEST" > gsutil_download.sh
!chmod +x gsutil_download.sh
     

# creating a manifest file for the subsequent download of files
selection_df["gcs_url"].to_csv(os.environ["MANIFEST"], header=False, index=False)

In [9]:
!cat ${MANIFEST} | gsutil -m cp -I ${DOWNLOAD_DEST}

Copying gs://public-datasets-idc/4eef2feb-7cff-471b-9358-467e77656cd9.dcm...
/ [0 files][    0.0 B/100.8 MiB]                                                Copying gs://public-datasets-idc/a17aca9b-cecf-421c-9514-a40cccba222a.dcm...
Copying gs://public-datasets-idc/3eaab5a2-96fb-4f88-a940-ab983c9630f2.dcm...
Copying gs://public-datasets-idc/b8c54e0c-377f-444b-9472-46bbc6b5759e.dcm...
Copying gs://public-datasets-idc/925a4e17-aa7d-4834-a406-918132a48109.dcm...
Copying gs://public-datasets-idc/865f522b-db0e-4b6d-8d1b-38dbe12b8e3a.dcm...
Copying gs://public-datasets-idc/d0d2ed69-915e-41f3-8f19-f5f4f348b8c1.dcm...
Copying gs://public-datasets-idc/850589b3-1165-42be-a961-3f8d35350fee.dcm...
Copying gs://public-datasets-idc/fcbf11d4-1d0c-4e79-abd9-f0a9e125b38d.dcm...
Copying gs://public-datasets-idc/3dcc74bc-ba81-43f9-8c00-031e1a1d40af.dcm...
Copying gs://public-datasets-idc/b524e94f-dcb0-45ec-a87e-f52813a47002.dcm...
Copying gs://public-datasets-idc/a6ba5aff-a652-49bf-ab24-d1cae23566be.dc

In [10]:
!ls IDC_downloads

3dcc74bc-ba81-43f9-8c00-031e1a1d40af.dcm
3eaab5a2-96fb-4f88-a940-ab983c9630f2.dcm
4eef2feb-7cff-471b-9358-467e77656cd9.dcm
850589b3-1165-42be-a961-3f8d35350fee.dcm
865f522b-db0e-4b6d-8d1b-38dbe12b8e3a.dcm
925a4e17-aa7d-4834-a406-918132a48109.dcm
a17aca9b-cecf-421c-9514-a40cccba222a.dcm
a6ba5aff-a652-49bf-ab24-d1cae23566be.dcm
b524e94f-dcb0-45ec-a87e-f52813a47002.dcm
b8c54e0c-377f-444b-9472-46bbc6b5759e.dcm
d0d2ed69-915e-41f3-8f19-f5f4f348b8c1.dcm
fcbf11d4-1d0c-4e79-abd9-f0a9e125b38d.dcm
