# Download ADNI images that are uploaded in Azure imaging database

### Prerequisites
* Locate your imaging files: local paths, cloud storage services (e.g., Microsoft Azure)

In [None]:
import requests
import pandas as pd
import pydicom
from pathlib import Path
from urllib3.filepost import encode_multipart_formdata, choose_boundary
from azure.identity import DefaultAzureCredential

In [None]:
!pip install azure-cli

In [None]:
!az login

In [None]:
# Run the az command and capture its output
captured_output = !az account get-access-token --resource https://dicom.healthcareapis.azure.com --query accessToken --output tsv

# The output is a special IPython.utils.text.SList object which behaves like a list.
# If the command outputs a single line (like an access token), it will be the first element.
bearer_token = captured_output[2].strip()

In [None]:
# check connection
base_dicomweb_url = 'https://imgextpoc-adnidicom.dicom.azurehealthcareapis.com/v2'

headers = {
    'Accept': "application/json",
    'Authorization': 'Bearer ' + bearer_token
    }
dicomweb_study_query_url =  base_dicomweb_url + '/studies/'
response = requests.request("GET", dicomweb_study_query_url, data="", headers=headers)
response

In [None]:
# Extract all metadata from DICOM server
# To avoid repetitive information, we filter extracting one instance metadata per series
import requests
import pandas as pd

# Function to fetch metadata for the first instance in a given series
def fetch_series_metadata(base_url, study_uid, series_uid, headers):
    # Endpoint to fetch instances within the series
    instances_url = f"{base_url}/studies/{study_uid}/series/{series_uid}/instances"
    instances_response = requests.get(instances_url, headers=headers)
    if instances_response.status_code != 200:
        print(f"Error fetching instances for series {series_uid}: {instances_response.status_code}")
        return {}

    instances = instances_response.json()
    if not instances:
        print(f"No instances found for series {series_uid}")
        return {}

    # Fetch metadata for the first instance
    instance_uid = instances[0]['00080018']['Value'][0]
    instance_metadata_url = f"{base_url}/studies/{study_uid}/series/{series_uid}/instances/{instance_uid}/metadata"
    instance_metadata_response = requests.get(instance_metadata_url, headers=headers)
    if instance_metadata_response.status_code == 200:
        return instance_metadata_response.json()
    else:
        print(f"Error fetching metadata for instance {instance_uid}: {instance_metadata_response.status_code}")
        return {}

# Function to extract all metadata
def extract_all_metadata(base_url, headers):
    all_metadata = []
    limit = 200
    offset = 0

    while True:
        # Fetch the list of studies with pagination
        studies_url = f"{base_url}/studies"
        studies_response = requests.get(studies_url, params={"limit": limit, "offset": offset}, headers=headers)
        if studies_response.status_code == 204:
            break
        if studies_response.status_code != 200:
            print(f"Error fetching studies: {studies_response.status_code}")
            break
        studies = studies_response.json()

        # Loop through each study and fetch series within it
        for study in studies:
            study_uid = study['0020000D']['Value'][0]

            # Endpoint for fetching series within a study; adjust as needed
            series_url = f"{base_url}/studies/{study_uid}/series"
            
            # Fetch the list of series in the current study
            series_response = requests.get(series_url, headers=headers)
            if series_response.status_code == 200:
                series_list = series_response.json()

                # Extract metadata for each series and add to the list
                for series in series_list:
                    series_uid = series['0020000E']['Value'][0]  # Adjust based on actual response format
                    series_metadata = fetch_series_metadata(base_url, study_uid, series_uid, headers)
                    if series_metadata:
                        metadata_record = {"StudyUID": study_uid, "SeriesUID": series_uid, "Metadata": series_metadata}
                        all_metadata.append(metadata_record)

        # Update offset for next batch of studies
        offset += limit

    # Convert the list of records to a DataFrame
    return pd.DataFrame(all_metadata)

In [None]:
headers = {
    'Authorization': 'Bearer ' + bearer_token,
    'Accept': 'application/dicom+json'
}
base_dicomweb_url = 'https://imgextpoc-adnidicom.dicom.azurehealthcareapis.com/v2'

metadata_df = extract_all_metadata(base_dicomweb_url, headers)
metadata_df.shape

In [None]:
metadata_df['Metadata'].iloc[0]

In [None]:
metadata_df.to_pickle('./files/ADNI/all_metadata.pkl')

In [None]:
print(metadata_df.shape)
metadata_df.head()

In [None]:
# Function to flatten the nested dictionary of metadata
def flatten_metadata(row):
    flat_metadata = []
    for metadata_dict in row['Metadata']:
        for tag, attributes in metadata_dict.items():
            value = attributes.get('Value', None)
            if isinstance(value, list):
                value = ', '.join(str(v) for v in value)
            flat_metadata.append({
                'StudyUID': row['StudyUID'],
                'SeriesUID': row['SeriesUID'],
                'Tag': tag,
                'vr': attributes.get('vr', None),
                'Value': value
            })
    return flat_metadata

# Flatten the metadata DataFrame
flat_metadata_list = []
for index, row in metadata_df.iterrows():
    flat_metadata_list.extend(flatten_metadata(row))

flat_metadata_df = pd.DataFrame(flat_metadata_list)
flat_metadata_df.shape

In [None]:
flat_metadata_df.head()

In [None]:
flat_metadata_df.to_pickle('./files/ADNI/all_metadata_long.pkl')