# Part I: Build Medical Imaging Extension

### Prerequisites
* Download ODBC Driver 18 from web <https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/install-microsoft-odbc-driver-sql-server-macos?view=sql-server-ver16>
* OMOP CDM instance with the connection string and authentication information.
* Install SQL processing package, i.e., pyodbc.

In [6]:
import psycopg2
import pandas as pd

conn = psycopg2.connect(
    database="postgres",
    user="",
    password="",
    host="",
    port="5432",
    connect_timeout = 6000
)

## Steps
### Tables to update
1. Vocabulary
2. Concept_class
3. Concept
4. Concept_relationship

### Tables to create
1. Image_occurrence
2. Image_feature

> **Clinical domain tables and imaging tables will be filled after extracting DICOM images**

In [8]:
# Update VOCABULARY
sql = '''
    INSERT INTO adni.vocabulary (vocabulary_id, vocabulary_name, vocabulary_reference, vocabulary_version, vocabulary_concept_id)
    VALUES ('DICOM', 'Digital Imaging and Communications in Medicine (National Electrical Manufacturers Association)',  'https://www.dicomstandard.org/current', 'NEMA Standard PS3', 2128000000)
    '''
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()

In [9]:
# Update CONCEPT_CLASS
sql = '''
    INSERT INTO adni.CONCEPT_CLASS (concept_class_id, concept_class_name, concept_class_concept_id)
    VALUES ('DICOM Attributes', 'DICOM Attributes', 2128000001),
           ('DICOM Value Sets', 'DICOM Value Sets', 2128000002)
    '''
cursor.execute(sql)
conn.commit()
cursor.close()

In [None]:
# Before inserting concepts into the Concept table, let's verify the data types
cursor = conn.cursor()

# SQL to get column data types
sql = """
SELECT COLUMN_NAME, DATA_TYPE
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'CONCEPT' AND TABLE_SCHEMA = 'adni'
"""

# Execute the query
cursor.execute(sql)

# Fetch and print the results
for row in cursor.fetchall():
    print(f"Column: {row.COLUMN_NAME}, Data Type: {row.DATA_TYPE}")

# Clean up
cursor.close()
conn.close()

In [9]:
import pandas as pd

# Load the file for DICOM attributes and value sets
omop_table_staging = pd.read_csv('./files/omop_table_staging.csv')

# Adjust its data types
omop_table_staging['valid_end_date'] = pd.to_datetime('2099-12-31')
omop_table_staging['valid_start_date'] = pd.to_datetime('1993-01-01')
omop_table_staging['standard_concept'] = ' '
omop_table_staging['invalid_reason'] = ' '

# make sure string values have datatype of str
varchar_columns = ['concept_name', 'domain_id', 'vocabulary_id', 'concept_class_id', 'standard_concept', 'concept_code', 'invalid_reason']
for col in varchar_columns:
    omop_table_staging[col] = omop_table_staging[col].astype(str)

# handle NULLs for SQL 
omop_table_staging = omop_table_staging.where(pd.notnull(omop_table_staging), None)

In [10]:
#### Update the staging file to reformat DICOM tags since when pulling attributes, parentheses and commas are not used.
attributes = omop_table_staging[omop_table_staging['concept_class_id'] == 'DICOM Attributes'].copy()

def reformat_attribute(attribute):
    return attribute.translate({ord(i): None for i in '()'}).replace(',', '')

# Apply the reformatting function and create a new column
attributes['concept_code_reformatted'] = attributes['concept_code'].apply(reformat_attribute)

# Convert the reformatted concept codes to a list
selected_attributes = attributes['concept_code_reformatted'].tolist()

In [11]:
omop_table_staging['concept_code_reformatted'] = omop_table_staging['concept_code'].apply(reformat_attribute)

In [12]:
omop_table_staging.head()

Unnamed: 0,concept_id,concept_name,domain_id,vocabulary_id,concept_class_id,standard_concept,concept_code,valid_start_date,valid_end_date,invalid_reason,concept_code_reformatted
0,2128000010,Length to End,Measurement,DICOM,DICOM Attributes,,"(0008,0001)",1993-01-01,2099-12-31,,80001
1,2128000011,Specific Character Set,Measurement,DICOM,DICOM Attributes,,"(0008,0005)",1993-01-01,2099-12-31,,80005
2,2128000012,Image Type,Measurement,DICOM,DICOM Attributes,,"(0008,0008)",1993-01-01,2099-12-31,,80008
3,2128000013,Instance Creation Date,Measurement,DICOM,DICOM Attributes,,"(0008,0012)",1993-01-01,2099-12-31,,80012
4,2128000014,Instance Creation Time,Measurement,DICOM,DICOM Attributes,,"(0008,0013)",1993-01-01,2099-12-31,,80013


In [13]:
cursor = conn.cursor()

sql = '''
    DELETE FROM adni.concept
    WHERE concept_id > 2000000000
'''
cursor.execute(sql)
conn.commit()

In [14]:
# Update CONCEPT
#cursor = conn.cursor()

sql = '''
    INSERT INTO adni.concept (concept_id,concept_name,domain_id,vocabulary_id,concept_class_id,standard_concept,concept_code,valid_start_date,valid_end_date,invalid_reason) 
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
    '''
for index, row in omop_table_staging.iterrows():
    cursor.execute(sql, (row['concept_id'], row['concept_name'], row['domain_id'], row['vocabulary_id'], row['concept_class_id'], row['standard_concept'], row['concept_code_reformatted'],
     row['valid_start_date'], row['valid_end_date'],row['invalid_reason']))

conn.commit()
cursor.close()
conn.close()

In [15]:
# check if DICOM concepts are well established
conn = psycopg2.connect(
    database="postgres",
    user="",
    password="",
    host="",
    port="5432",
    connect_timeout = 6000
)
cursor = conn.cursor()

sql_query = "SELECT * FROM adni.concept WHERE concept_id > 2000000000"
df_concept = pd.read_sql_query(sql_query, conn)
cursor.close()
conn.close()

  df_concept = pd.read_sql_query(sql_query, conn)


In [16]:
df_concept

Unnamed: 0,concept_id,concept_name,domain_id,vocabulary_id,concept_class_id,standard_concept,concept_code,valid_start_date,valid_end_date,invalid_reason
0,2128000010,Length to End,Measurement,DICOM,DICOM Attributes,,00080001,1993-01-01,2099-12-31,
1,2128000011,Specific Character Set,Measurement,DICOM,DICOM Attributes,,00080005,1993-01-01,2099-12-31,
2,2128000012,Image Type,Measurement,DICOM,DICOM Attributes,,00080008,1993-01-01,2099-12-31,
3,2128000013,Instance Creation Date,Measurement,DICOM,DICOM Attributes,,00080012,1993-01-01,2099-12-31,
4,2128000014,Instance Creation Time,Measurement,DICOM,DICOM Attributes,,00080013,1993-01-01,2099-12-31,
...,...,...,...,...,...,...,...,...,...,...
8042,2128011218,Plane through Posterior Extent,Measurement,DICOM,DICOM Value Sets,,128129,1993-01-01,2099-12-31,
8043,2128011219,Plane through Anterior Extent,Measurement,DICOM,DICOM Value Sets,,128128,1993-01-01,2099-12-31,
8044,2128011220,Plane through Center,Measurement,DICOM,DICOM Value Sets,,128130,1993-01-01,2099-12-31,
8045,2128011221,Plane through Inferior Extent,Measurement,DICOM,DICOM Value Sets,,128121,1993-01-01,2099-12-31,


In [2]:
import psycopg2
conn = psycopg2.connect(
    database="postgres",
    user="",
    password="",
    host="",
    port="5432",
    connect_timeout = 6000
)
cursor = conn.cursor()

In [3]:
# Create Image_occurrence table

# postgre sql does not have varchar(max) so we will use varchar(10000)

ddl_statement = """
CREATE TABLE adni.image_occurrence(
    image_occurrence_id integer NOT NULL,
    person_id integer NOT NULL,
    procedure_occurrence_id integer NOT NULL,
    visit_occurrence_id integer,
    anatomic_site_concept_id integer,
    wadors_uri varchar(10000),
    local_path varchar(10000),
    image_occurrence_date date NOT NULL,
    image_study_uid varchar(10000) NOT NULL,
    image_series_uid varchar(10000) NOT NULL,
    modality_concept_id integer NOT NULL
);
"""

cursor = conn.cursor()
cursor.execute(ddl_statement)
conn.commit()

In [4]:
# Create Image_feature table

# postgre sql does not have varchar(max) so we will use varchar(10000)
# also don't have datetime datatype so we will use timestamp

ddl_statement = """
CREATE TABLE adni.image_feature(
    image_feature_id integer NOT NULL,
    person_id integer NOT NULL,
    image_occurrence_id integer NOT NULL,
    image_feature_event_field_concept_id integer,
    image_feature_event_id integer,
    image_feature_concept_id integer NOT NULL,
    image_feature_type_concept_id integer NOT NULL,
    image_finding_concept_id integer,
    image_finding_id integer,
    anatomic_site_concept_id integer,
    alg_system varchar(10000),
    alg_datetime timestamp
);
"""

cursor.execute(ddl_statement)
conn.commit()

# Clean up
cursor.close()
conn.close()