# Part II: Transform Non-Image Data into OMOP CDM

This part can be skipped if your non-imaging data already exist in OMOP CDM. In this case, you would need to go to the `Part II: Tranform Image Data` notebook.

### Prerequisites
* Download ODBC Driver 18 from web <https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/install-microsoft-odbc-driver-sql-server-macos?view=sql-server-ver16>
* OMOP CDM instance with the connection string and authentication information. If your database access limits IP addresses, make sure to add your IP address before running the connection strings.
* Install SQL processing package, i.e., pyodbc.
* Files or database that non-image data is stored: demographic, clinical assessments, labs, visits, etc.

In [None]:
# !!! redact before publishing !!!

# Create database connection
import pyodbc

driver = '{ODBC Driver 18 for SQL Server}'
server = 'tcp:ohdsicdm.database.windows.net'
database = 'ohdsicdm'
username = '<username>'
password = '<password>'

conn_str = f'DRIVER={driver};SERVER={server};DATABASE={database};UID={username};PWD={password}'
conn = pyodbc.connect(conn_str)
cursor = conn.cursor()

### Patient demographic data
- Downloaded from ADNI demographic file () and saved in my local folder.
- This will be transformed and loaded to Person table

In [None]:
import pandas as pd
# Load the file
patient_demo = pd.read_csv('./files/PTDEMOG_28Mar2024.csv')
# Subbset required fields from the file
patient_demo_staging = patient_demo[['PTID', 'PTGENDER', 'PTDOB', 'PTDOBYY', 'PTRACCAT', 'PTETHCAT']].copy() 

In [None]:
# Standard omop gender concepts includes female/male
patient_demo_staging = patient_demo_staging[patient_demo_staging['PTGENDER'].isin([1,2])]

# Assign an integer value for each unique PatientID
patient_demo_staging['person_id'], _ = pd.factorize(patient_demo_staging['PTID'])
patient_demo_staging['person_id'] = patient_demo_staging['person_id'] + 1 

# Codify Gender - replace 'F' with 8532 and 'M' with 8507
gender_codification = {2: 8532, 1: 8507}
patient_demo_staging['gender_concept_id'] = patient_demo_staging['PTGENDER'].replace(gender_codification)

# birth year and month
patient_demo_staging['year_of_birth'] = patient_demo_staging['PTDOBYY']
patient_demo_staging['month_of_birth'] = patient_demo_staging['PTDOB'].str.slice(0,2).astype(int)

# race category
race_codification = {1:8657, 2:8515, 3: 8557, 4:8516, 5:8527}
patient_demo_staging['race_concept_id'] = patient_demo_staging['PTRACCAT'].map(lambda x: race_codification.get(x, 0)).astype(int)

# ethnicity
ethnicity_codification = {1:38003563, 2:38003564}
patient_demo_staging['ethnicity_concept_id'] = patient_demo_staging['PTETHCAT'].map(lambda x: ethnicity_codification.get(x, 0)).astype(int)

# drop duplicate if applicable
patient_demo_staging = patient_demo_staging.drop_duplicates(subset='PTID', keep = 'first')

# source name
patient_demo_staging['source'] = 'ADNI'

patient_demo_staging.head()

In [None]:
# Update PERSON
cursor = conn.cursor()

# Update PERSON
sql = '''
    INSERT INTO dbo.person (person_id, gender_concept_id, year_of_birth, month_of_birth, race_concept_id, ethnicity_concept_id, gender_source_value) 
    VALUES (?,?,?,?,?,?,?)
    '''
for index, row in patient_demo_staging.iterrows():
    cursor.execute(sql, row['person_id'], row['gender_concept_id'], row['year_of_birth'], row['month_of_birth'], row['race_concept_id'], row['ethnicity_concept_id'], row['PTGENDER'])

conn.commit()

In [None]:
# Create registry_idmap table if it doesn't exist and needed
# This will keep track of which OMOP Person_id belongs to ADNI PTID

ddl_statement = """
CREATE TABLE dbo.registry_idmap(
    source_id varchar(250) NOT NULL,
    person_id integer NOT NULL,
    source_name varchar(250)
);
"""

cursor.execute(ddl_statement)
conn.commit()

sql = '''
    INSERT INTO dbo.registry_idmap (source_id, person_id, source_name) 
    VALUES (?,?,?)
    '''
for index, row in patient_demo_staging.iterrows():
    cursor.execute(sql, row['PTID'], row['person_id'], row['source'])

conn.commit()

In [None]:
# Check the updates
sql_query = "SELECT * FROM registry_idmap"
df_registry_idmap = pd.read_sql_query(sql_query, conn)

sql_query = "SELECT * FROM person"
df_person = pd.read_sql_query(sql_query, conn)

cursor.close()
conn.close()

df_registry_idmap