# Upload DICOM metadata

* Download DICOM metadata you tranformed to OMOP CDM
* Download ODBC Driver 18 from web <https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/install-microsoft-odbc-driver-sql-server-macos?view=sql-server-ver16>
* OMOP CDM instance with the connection string and authentication information. If your database access limits IP addresses, make sure to add your IP address before running the connection strings.
* OMOP CDM Person table should be updated with demographic information, if available, before running this code. If not, you can upload the Person table using DICOM Patient metadata information.
* Install SQL processing package, i.e., pyodbc.

In [3]:
# Load staging files
import pandas as pd

image_occurrence = pd.read_pickle('./files/OMOP CDM Staging/image_occurrence_staging_v3.pkl')
image_feature = pd.read_pickle('./files/ADNI/image_series_metadata_v3.pkl')

In [11]:
print(image_occurrence.shape, image_feature.shape)

(4756, 17) (283948, 22)


In [5]:
image_feature['concept_id_y'] = image_feature['concept_id_y'].fillna(0)

In [16]:
# Connect to the database
import psycopg2

conn = psycopg2.connect(
    database="",
    user="",
    password="",
    host="",
    port="",
    connect_timeout = 6000
)

cursor = conn.cursor()

In [None]:
# Update PROCERURE_OCCURRENCE
sql = '''
    INSERT INTO dbo.procedure_occurrence (procedure_occurrence_id, person_id, procedure_concept_id, procedure_date, procedure_type_concept_id) 
    VALUES (%s,%s,%s,%s,%s)
    '''
for index, row in image_occurrence.iterrows():
    cursor.execute(sql, (row['procedure_occurrence_id'], row['person_id'], row['modality_concept_id'], row['image_occurrence_date'], row['visit_type_concept_id']))

conn.commit()

In [None]:
# Update Visit_OCCURRENCE
sql = '''
    INSERT INTO dbo.visit_occurrence (visit_occurrence_id, person_id, visit_concept_id, visit_start_date, visit_end_date, visit_type_concept_id) 
    VALUES (%s,%s,%s,%s,%s,%s)
    '''
for index, row in image_occurrence.iterrows():
    cursor.execute(sql, (row['visit_occurrence_id'], row['person_id'], row['visit_concept_id'], row['image_occurrence_date'], row['image_occurrence_date'], row['visit_type_concept_id']))

conn.commit()

In [None]:
# Update IMAGE_OCCURRENCE
sql = '''
    INSERT INTO dbo.image_occurrence (image_occurrence_id, person_id, procedure_occurrence_id, anatomic_site_concept_id, wadors_uri, image_occurrence_date, image_study_uid, image_series_uid, modality_concept_id) 
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)
    '''
for index, row in image_occurrence.iterrows():
    cursor.execute(sql, (row['image_occurrence_id'], row['person_id'], row['procedure_occurrence_id'],  row['anatomic_site_concept_id'], row['wadors_uri'], row['image_occurrence_date'],
                    row['StudyUID'], row['SeriesUID'], row['modality_concept_id']))

conn.commit()

In [9]:
image_feature['image_feature_event_id'].max()

283948

In [12]:
# Update MEASUREMENT for numeric values
sql = '''
    INSERT INTO dbo.measurement (measurement_id, person_id, measurement_concept_id, measurement_date, measurement_type_concept_id, value_as_number, value_as_concept_id, measurement_source_value, measurement_source_concept_id) 
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)
'''

# Define batch size
batch_size = 20000
batch = []

# Iterate through the DataFrame
for index, row in image_feature[~image_feature['value_as_number'].isna()].iterrows():
    # Append row data to the batch
    batch.append((
        row['image_feature_event_id'], 
        row['person_id'], 
        row['concept_id_x'], 
        row['image_occurrence_date'], 
        row['image_feature_event_type_id'], 
        row['value_as_number'], 
        row['concept_id_y'], 
        row['measurement_source_value'], 
        row['concept_id_x']
    ))

    # Check if batch size is reached or if it's the last row
    if len(batch) == batch_size or (index == len(image_feature) - 1):
        # Execute the batch
        cursor.executemany(sql, batch)
        conn.commit()  # Commit after each batch
        batch = []  # Clear the batch for the next set of rows

In [14]:
# Update MEASUREMENT for non-numeric values
sql = '''
    INSERT INTO dbo.measurement (measurement_id, person_id, measurement_concept_id, measurement_date, measurement_type_concept_id, value_as_concept_id, measurement_source_value, measurement_source_concept_id) 
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
'''

# Define batch size
batch_size = 20000
batch = []

# Iterate through the DataFrame
for index, row in image_feature[image_feature['value_as_number'].isna()].iterrows():
    # Append row data to the batch
    batch.append((
        row['image_feature_event_id'], 
        row['person_id'], 
        row['concept_id_x'], 
        row['image_occurrence_date'], 
        row['image_feature_event_type_id'], 
        row['concept_id_y'], 
        row['measurement_source_value'], 
        row['concept_id_x']
    ))

    # Check if batch size is reached or if it's the last row
    if len(batch) == batch_size or (index == len(image_feature) - 1):
        # Execute the batch
        cursor.executemany(sql, batch)
        conn.commit()  # Commit after each batch
        batch = []  # Clear the batch for the next set of rows

In [17]:
# Update IMAGE_FEATURE
sql = '''
    INSERT INTO dbo.image_feature (image_feature_id, person_id, image_occurrence_id, image_feature_event_id,
      image_feature_event_field_concept_id, image_feature_concept_id, image_feature_type_concept_id, anatomic_site_concept_id) 
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
'''

# Define batch size
batch_size = 20000
batch = []

# Iterate through the DataFrame
for index, row in image_feature.iterrows():
    # Append row data to the batch
    batch.append((row['image_feature_event_id'], row['person_id'], row['image_occurrence_id'], row['image_feature_event_id'], 
                  row['image_feature_event_field_id'], row['concept_id_x'], row['image_feature_event_type_id'],
                  row['anatomic_site_concept_id']))

    # Check if batch size is reached or if it's the last row
    if len(batch) == batch_size or (index == len(image_feature) - 1):
        # Execute the batch
        cursor.executemany(sql, batch)
        conn.commit()  # Commit after each batch
        batch = []  # Clear the batch for the next set of rows

In [18]:
# Close the cursor and connection
cursor.close()
conn.close()