# Transform Imaging Features from Segmentation Algorithm Results

After selecting a cohort using DICOM concepts on Atlas, the images were sent to go under a segmentation algorithm OpenMAP T1. The result file was added to the files folder. This notebook transform and upload the results.

* Prerequisites
  * DICOM custom concepts loaded tom OMOP CDM
  * Imaging extension tables are updated for the images used for the algorithm

In [3]:
# Load result files and custom concepts for brain volume OpenMAP segmentation
import pandas as pd

new_vol = pd.read_csv('./files/OpenMAP/cohort1_volumes_0726.csv')

In [16]:
# OpenMAP has unique set of brain parts that are not in standard concepts. We will add them as custom concepts. 
neuroanatomics = [col for col in new_vol.columns if col not in ['series_id', 'study_id']]
len(neuroanatomics)

280

In [17]:
neuroanatomics_id = [2129000000 + i for i in range(len(neuroanatomics))]

neuro_concept_ids = pd.DataFrame({
    'concept_id': neuroanatomics_id,
    'concept_name': neuroanatomics,
    'domain_id': "Spec Anatomic Site",
    'vocabulary_id': "0",
    'concept_class_id': "Body Structure",
    'concept_code': neuroanatomics,
    'valid_start_date': "2024-06-01",
    'valid_end_date': "2099-12-31"
})

neuro_concept_ids.head()

Unnamed: 0,concept_id,concept_name,domain_id,vocabulary_id,concept_class_id,concept_code,valid_start_date,valid_end_date
0,2129000000,SFG_L,Spec Anatomic Site,0,Body Structure,SFG_L,2024-06-01,2099-12-31
1,2129000001,SFG_R,Spec Anatomic Site,0,Body Structure,SFG_R,2024-06-01,2099-12-31
2,2129000002,SFG_PFC_L,Spec Anatomic Site,0,Body Structure,SFG_PFC_L,2024-06-01,2099-12-31
3,2129000003,SFG_PFC_R,Spec Anatomic Site,0,Body Structure,SFG_PFC_R,2024-06-01,2099-12-31
4,2129000004,SFG_pole_L,Spec Anatomic Site,0,Body Structure,SFG_pole_L,2024-06-01,2099-12-31


In [18]:
neuro_vol_concept_ids = pd.DataFrame()
neuro_vol_concept_ids['concept_id'] = neuro_concept_ids['concept_id'] + 1000
neuro_vol_concept_ids['concept_name'] = neuro_concept_ids['concept_name'].apply(lambda x: f"Volume Estimation of {x}")
neuro_vol_concept_ids['domain_id'] = 'Measurement'
neuro_vol_concept_ids['vocabulary_id'] = 0
neuro_vol_concept_ids['concept_class_id'] = 'Clinical Observation'
neuro_vol_concept_ids['concept_code'] = neuro_concept_ids['concept_code'].apply(lambda x: f"Volume Estimation of {x}")
neuro_vol_concept_ids['valid_start_date'] = "2024-06-01"
neuro_vol_concept_ids['valid_end_date'] = "2099-12-31"
neuro_vol_concept_ids.head()

Unnamed: 0,concept_id,concept_name,domain_id,vocabulary_id,concept_class_id,concept_code,valid_start_date,valid_end_date
0,2129001000,Volume Estimation of SFG_L,Measurement,0,Clinical Observation,Volume Estimation of SFG_L,2024-06-01,2099-12-31
1,2129001001,Volume Estimation of SFG_R,Measurement,0,Clinical Observation,Volume Estimation of SFG_R,2024-06-01,2099-12-31
2,2129001002,Volume Estimation of SFG_PFC_L,Measurement,0,Clinical Observation,Volume Estimation of SFG_PFC_L,2024-06-01,2099-12-31
3,2129001003,Volume Estimation of SFG_PFC_R,Measurement,0,Clinical Observation,Volume Estimation of SFG_PFC_R,2024-06-01,2099-12-31
4,2129001004,Volume Estimation of SFG_pole_L,Measurement,0,Clinical Observation,Volume Estimation of SFG_pole_L,2024-06-01,2099-12-31


In [None]:
new_concepts = pd.concat([neuro_concept_ids, neuro_vol_concept_ids])
print(new_concepts.shape)
new_concepts.head()

In [27]:
new_concepts.to_csv('./files/OMOP CDM Staging/concept_vol_brain.csv')

## Transform brain segmentation values

In [None]:
new_vol_long = new_vol.melt(id_vars = ['study_id', 'series_id'],var_name = 'brain_vol_name', value_name = 'value')
new_vol_long.head()

In [7]:
new_vol_long['series_id'].nunique()

550

In [None]:
new_vol_long = new_vol_long.merge(neuro_concept_ids[['concept_code', 'concept_id']], how = 'left', left_on = 'brain_vol_name', right_on = 'concept_code')
new_vol_long['vol_concept_id'] = new_vol_long['concept_id'] + 1000
new_vol_long.head()

In [None]:
# Import image_occurrence from the database

# Connect to the database
import psycopg2

conn = psycopg2.connect(
    database="",
    user="",
    password="",
    host="",
    port="",
    connect_timeout = 6000
)

cursor = conn.cursor()

sql = "select * from dbo.image_occurrence"
image_occurrence = pd.read_sql_query(sql, conn)

sql = "select max(image_feature_id) from dbo.image_feature"
image_feature_id_max = pd.read_sql_query(sql, conn)

sql = "select max(measurement_id) from dbo.measurement"
measurement_id_max = pd.read_sql_query(sql, conn)

# close the cursor and connection
cursor.close()
conn.close()

In [None]:
new_vol_long = new_vol_long.merge(image_occurrence[['image_series_uid', 'image_occurrence_id', 'image_occurrence_date', 'person_id']], how ='left', left_on = 'series_id', right_on = 'image_series_uid')
new_vol_long.head()

In [18]:
measurement_id_max = int(measurement_id_max.iloc[0, 0])
image_feature_id_max = int(image_feature_id_max.iloc[0,0])

In [None]:
new_vol_long['image_feature_event_field_concept_id'] = 1147330
new_vol_long['image_feature_type_concept_id'] = 32880
new_vol_long['measurement_id'] = range(measurement_id_max+1, measurement_id_max+1 + len(new_vol_long), 1)
new_vol_long['image_feature_id'] = range(image_feature_id_max+1, image_feature_id_max+1 + len(new_vol_long), 1)
new_vol_long.head()

## Upload the transformed datasets

In [None]:
import psycopg2

conn = psycopg2.connect(
    database="",
    user="",
    password="",
    host="",
    port="",
    connect_timeout = 6000
)

cursor = conn.cursor()

In [None]:
# Update CONCEPT

sql = '''
    INSERT INTO dbo.concept (concept_id,concept_name,domain_id,vocabulary_id,concept_class_id,concept_code,valid_start_date,valid_end_date) 
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
    '''
for index, row in new_concepts.iterrows():
    cursor.execute(sql, (row['concept_id'], row['concept_name'], row['domain_id'], row['vocabulary_id'], row['concept_class_id'], row['concept_code'],row['valid_start_date'], row['valid_end_date']))

conn.commit()

In [None]:
# Update MEASUREMENT
sql = '''
    INSERT INTO dbo.measurement (measurement_id, person_id, measurement_concept_id, measurement_date, measurement_type_concept_id, value_as_number, measurement_source_value, measurement_source_concept_id) 
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
'''

# Define batch size
batch_size = 20000
batch = []

# Iterate through the DataFrame
for index, row in new_vol_long.iterrows():
    # Append row data to the batch
    batch.append((row['measurement_id'], row['person_id'], row['vol_concept_id'], row['image_occurrence_date'], row['image_feature_type_concept_id'], row['value'], row['value'], row['vol_concept_id']))

    # Check if batch size is reached or if it's the last row
    if len(batch) == batch_size or (index == len(new_vol_long) - 1):
        # Execute the batch
        cursor.executemany(sql, batch)
        conn.commit()  # Commit after each batch
        batch = []  # Clear the batch for the next set of rows

In [None]:
# Update IMAGE_FEATURE
sql = '''
    INSERT INTO dbo.image_feature (image_feature_id, person_id, image_occurrence_id, image_feature_event_id,
      image_feature_event_field_concept_id, image_feature_concept_id, image_feature_type_concept_id, anatomic_site_concept_id) 
    VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
'''

# Define batch size
batch_size = 20000
batch = []

# Iterate through the DataFrame
for index, row in new_vol_long.iterrows():
    # Append row data to the batch
    batch.append((row['image_feature_id'], row['person_id'], row['image_occurrence_id'], row['measurement_id'], 
                  row['image_feature_event_field_concept_id'], row['vol_concept_id'], row['image_feature_type_concept_id'],row['concept_id']))

    # Check if batch size is reached or if it's the last row
    if len(batch) == batch_size or (index == len(new_vol_long) - 1):
        # Execute the batch
        cursor.executemany(sql, batch)
        conn.commit()  # Commit after each batch
        batch = []  # Clear the batch for the next set of rows

In [None]:
# Close the cursor and connection
cursor.close()
conn.close()