In [1]:
import duckdb
import pandas as pd

In [2]:
con = duckdb.connect(database='/workspaces/practice/terminology.db')

In [3]:
con.execute('create schema if not exists medispan')

<duckdb.DuckDBPyConnection at 0xffff5365f1b0>

In [4]:
# Create the ddauom table
con.execute("""
CREATE OR REPLACE TABLE medispan.ddauom (
  unit_of_measure_id VARCHAR(5),
  transaction_code VARCHAR(1),
  unit_of_measure_description VARCHAR(50),
  unit_of_measure_type_code VARCHAR(2),
  reserve VARCHAR(22)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/DDA/USAENG/DB/DDAAUOM', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    unit_of_measure_id = line[0:5]
    transaction_code = line[5:6]
    unit_of_measure_description = line[11:56].strip()
    unit_of_measure_type_code = line[56:58]
    reserve = line[58:80]

    # Insert the fields into the ddauom table using parameterized queries
    con.execute(
        "INSERT INTO medispan.ddauom VALUES (?, ?, ?, ?, ?)",
        (
            unit_of_measure_id,
            transaction_code,
            unit_of_measure_description,
            unit_of_measure_type_code,
            reserve
        )
    )

con.execute('select * from medispan.ddauom limit 5').fetchdf()

Unnamed: 0,unit_of_measure_id,transaction_code,unit_of_measure_description,unit_of_measure_type_code,reserve
0,1,,mgs,,
1,2,,milligram,,
2,3,,milligrams,,
3,4,,mcgs,,
4,5,,microgram,,


In [5]:
# Create the ddadict table
con.execute("""
CREATE OR REPLACE TABLE medispan.ddadict (
  field_identifier VARCHAR(4),
  field_description VARCHAR(35),
  field_type VARCHAR(1),
  field_length INTEGER,
  implied_decimal_flag VARCHAR(1),
  decimal_places INTEGER,
  field_validation_flag VARCHAR(1),
  field_abbreviation_flag VARCHAR(1),
  reserve VARCHAR(16)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/DDA/USAENG/DB/DDADICT', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    field_identifier = line[0:4]
    field_description = line[4:39]
    field_type = line[39]
    field_length = int(line[40:43])
    implied_decimal_flag = line[43]
    decimal_places = int(line[44:46])
    field_validation_flag = line[46]
    field_abbreviation_flag = line[47]
    reserve = line[48:64]

    # Insert the fields into the ddadict table using parameterized queries
    con.execute(
        "INSERT INTO medispan.ddadict VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (
            field_identifier,
            field_description,
            field_type,
            field_length,
            implied_decimal_flag,
            decimal_places,
            field_validation_flag,
            field_abbreviation_flag,
            reserve
        )
    )

# Test the ddadict table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.ddadict LIMIT 5').fetchdf()

Unnamed: 0,field_identifier,field_description,field_type,field_length,implied_decimal_flag,decimal_places,field_validation_flag,field_abbreviation_flag,reserve
0,A001,Record Type,C,3,N,0,N,N,
1,A004,Reserve-1,C,1,N,0,N,N,
2,A005,Sequence Number,N,3,N,0,N,N,
3,A008,Reserve-2,C,1,N,0,N,N,
4,A009,Comment Marker,C,1,N,0,N,N,


In [6]:
con.execute("""
CREATE or replace TABLE medispan.ddadose (
  dose_id VARCHAR,
  transaction_code CHAR(1),
  profile_id VARCHAR,
  generic_product_id CHAR(14),
  reserve_1 CHAR(6),
  route_id VARCHAR,
  dose_type_code CHAR(2),
  daily_dose_low NUMERIC,
  daily_dose_low_uom_id VARCHAR,
  daily_dose_high NUMERIC,
  daily_dose_high_uom_id VARCHAR,
  daily_dose_form_low NUMERIC,
  daily_dose_form_low_uom_id VARCHAR,
  daily_dose_form_high NUMERIC,
  daily_dose_form_high_uom_id VARCHAR,
  max_daily_dose NUMERIC,
  max_daily_dose_uom_id VARCHAR,
  max_daily_dose_form NUMERIC,
  max_daily_dose_form_uom_id VARCHAR,
  max_single_dose NUMERIC,
  max_single_dose_uom_id VARCHAR,
  max_single_dose_form NUMERIC,
  max_single_dose_form_uom_id VARCHAR,
  max_lifetime_dose NUMERIC,
  max_lifetime_dose_uom_id VARCHAR,
  max_lifetime_dose_form NUMERIC,
  max_lifetime_dose_form_uom_id VARCHAR,
  frequency_low NUMERIC,
  frequency_high NUMERIC,
  max_frequency NUMERIC,
  duration_low NUMERIC,
  duration_high NUMERIC,
  max_duration NUMERIC,
  drug_half_life_low NUMERIC,
  drug_half_life_high NUMERIC,
  drug_half_life_uom_code CHAR(2),
  reserve_2 CHAR(66)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/DDA/USAENG/DB/DDADOSE', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    dose_id = line[0:10]
    transaction_code = line[10:11]
    profile_id = line[11:21]
    generic_product_id = line[21:35]
    reserve_1 = line[35:41]
    route_id = line[41:46]
    dose_type_code = line[46:48]
    daily_dose_low = float(line[48:57])
    daily_dose_low_uom_id = line[57:62]
    daily_dose_high = float(line[62:71])
    daily_dose_high_uom_id = line[71:76]
    daily_dose_form_low = float(line[76:85])
    daily_dose_form_low_uom_id = line[85:90]
    daily_dose_form_high = float(line[90:99])
    daily_dose_form_high_uom_id = line[99:104]
    max_daily_dose = float(line[104:113])
    max_daily_dose_uom_id = line[113:118]
    max_daily_dose_form = float(line[118:127])
    max_daily_dose_form_uom_id = line[127:132]
    max_single_dose = float(line[132:141])
    max_single_dose_uom_id = line[141:146]
    max_single_dose_form = float(line[146:155])
    max_single_dose_form_uom_id = line[155:160]
    max_lifetime_dose = float(line[160:169])
    max_lifetime_dose_uom_id = line[169:174]
    max_lifetime_dose_form = float(line[174:183])
    max_lifetime_dose_form_uom_id = line[183:188]
    frequency_low = float(line[188:197])
    frequency_high = float(line[197:206])
    max_frequency = float(line[206:215])
    duration_low = float(line[215:224])
    duration_high = float(line[224:233])
    max_duration = float(line[233:242])
    drug_half_life_low = float(line[242:247])
    drug_half_life_high = float(line[247:252])
    drug_half_life_uom_code = line[252:254]
    reserve_2 = line[254:320]

    # Insert the fields into the ddadose table using parameterized queries
    con.execute(
        "INSERT INTO medispan.ddadose VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (
            dose_id,
            transaction_code,
            profile_id,
            generic_product_id,
            reserve_1,
            route_id,
            dose_type_code,
            daily_dose_low,
            daily_dose_low_uom_id,
            daily_dose_high,
            daily_dose_high_uom_id,
            daily_dose_form_low,
            daily_dose_form_low_uom_id,
            daily_dose_form_high,
            daily_dose_form_high_uom_id,
            max_daily_dose,
            max_daily_dose_uom_id,
            max_daily_dose_form,
            max_daily_dose_form_uom_id,
            max_single_dose,
            max_single_dose_uom_id,
            max_single_dose_form,
            max_single_dose_form_uom_id,
            max_lifetime_dose,
            max_lifetime_dose_uom_id,
            max_lifetime_dose_form,
            max_lifetime_dose_form_uom_id,
            frequency_low,
            frequency_high,
            max_frequency,
            duration_low,
            duration_high,
            max_duration,
            drug_half_life_low,
            drug_half_life_high,
            drug_half_life_uom_code,
            reserve_2
        )
    )

# Test the ddadose table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.ddadose LIMIT 5').fetchdf()

Unnamed: 0,dose_id,transaction_code,profile_id,generic_product_id,reserve_1,route_id,dose_type_code,daily_dose_low,daily_dose_low_uom_id,daily_dose_high,...,frequency_low,frequency_high,max_frequency,duration_low,duration_high,max_duration,drug_half_life_low,drug_half_life_high,drug_half_life_uom_code,reserve_2
0,46,,260,44505050100520,,24,4,50000.0,1,50000.0,...,10000.0,10000.0,0.0,10000.0,999990000.0,0.0,0.0,0.0,99,...
1,47,,1349,44505050100520,,24,4,50000.0,1,50000.0,...,10000.0,10000.0,0.0,10000.0,999990000.0,0.0,0.0,0.0,99,...
2,50,,260,44505050100330,,24,4,50000.0,1,50000.0,...,10000.0,10000.0,0.0,10000.0,999999999.0,0.0,0.0,0.0,99,...
3,51,,1349,44505050100330,,24,4,50000.0,1,50000.0,...,10000.0,10000.0,0.0,10000.0,999999999.0,0.0,0.0,0.0,99,...
4,161,,269,28100010100305,,24,4,25000.0,4,60000.0,...,10000.0,10000.0,0.0,10000.0,999999999.0,0.0,0.0,0.0,99,...


In [7]:
con.execute("""
CREATE or replace TABLE medispan.ddadpp (
    profile_id       VARCHAR(10),
    transaction_code CHAR(1),
    indication_code  VARCHAR(7),
    special_condition_code VARCHAR(7),
    age_type_code CHAR(2),
    age_days_low     INT,
    age_days_high    INT,
    additional_age_type_code CHAR(2),
    additional_age_days_low INT,
    additional_age_days_high INT,
    renal_function_msmt_type_code CHAR(2),
    renal_function_msmt_low NUMERIC,
    renal_function_msmt_high NUMERIC,
    renal_function_msmt_uom_code CHAR(2),
    weight_category_low NUMERIC,
    weight_category_high NUMERIC,
    weight_category_uom_code CHAR(2),
    renal_function_msmt_type_2_code CHAR(2),
    reserve          VARCHAR(41)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/DDA/USAENG/DB/DDADPP', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    profile_id = line[0:10]
    transaction_code = line[10:11]
    indication_code = line[11:18]
    special_condition_code = line[18:25]
    age_type_code = line[25:27]
    age_days_low = int(line[27:32])
    age_days_high = int(line[32:37])
    additional_age_type_code = line[37:39]
    additional_age_days_low = int(line[39:44])
    additional_age_days_high = int(line[44:49])
    renal_function_msmt_type_code = line[49:51]
    renal_function_msmt_low = float(line[51:59])
    renal_function_msmt_high = float(line[59:67])
    renal_function_msmt_uom_code = line[67:69]
    weight_category_low = float(line[69:78])
    weight_category_high = float(line[78:87])
    weight_category_uom_code = line[87:89]
    renal_function_msmt_type_2_code = line[89:91]
    reserve = line[91:132]

    # Insert the fields into the ddadpp table using parameterized queries
    con.execute(
        "INSERT INTO medispan.ddadpp VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (
            profile_id,
            transaction_code,
            indication_code,
            special_condition_code,
            age_type_code,
            age_days_low,
            age_days_high,
            additional_age_type_code,
            additional_age_days_low,
            additional_age_days_high,
            renal_function_msmt_type_code,
            renal_function_msmt_low,
            renal_function_msmt_high,
            renal_function_msmt_uom_code,
            weight_category_low,
            weight_category_high,
            weight_category_uom_code,
            renal_function_msmt_type_2_code,
            reserve
        )
    )

# Test the ddadpp table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.ddadpp LIMIT 5').fetchdf()

Unnamed: 0,profile_id,transaction_code,indication_code,special_condition_code,age_type_code,age_days_low,age_days_high,additional_age_type_code,additional_age_days_low,additional_age_days_high,renal_function_msmt_type_code,renal_function_msmt_low,renal_function_msmt_high,renal_function_msmt_uom_code,weight_category_low,weight_category_high,weight_category_uom_code,renal_function_msmt_type_2_code,reserve
0,2,,1,0,1,4380,99999,99,0,0,99,0.0,0.0,99,0.0,9999.0,,,\n
1,9,,11,0,1,4380,99999,99,0,0,99,0.0,0.0,99,0.0,9999.0,,,\n
2,11,,11,0,1,23725,99999,99,0,0,99,0.0,0.0,99,0.0,9999.0,,,\n
3,12,,11,0,1,6570,23724,99,0,0,99,0.0,0.0,99,0.0,9999.0,,,\n
4,14,,11,799,1,6570,99999,99,0,0,99,0.0,0.0,99,0.0,9999.0,,,\n


In [8]:
# Create the ddauom table
con.execute("""
CREATE OR REPLACE TABLE medispan.ddauom (
  unit_of_measure_id VARCHAR,
  transaction_code VARCHAR(1),
  unit_of_measure_description VARCHAR(50),
  unit_of_measure_type_code VARCHAR(2),
  reserve VARCHAR(22)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/DDA/USAENG/DB/DDAUOM', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    unit_of_measure_id = int(line[0:5])
    transaction_code = line[5]
    unit_of_measure_description = line[6:56].strip()
    unit_of_measure_type_code = line[56:58]
    reserve = line[58:80]

    # Insert the fields into the ddauom table using parameterized queries
    con.execute(
        "INSERT INTO medispan.ddauom VALUES (?, ?, ?, ?, ?)",
        (
            unit_of_measure_id,
            transaction_code,
            unit_of_measure_description,
            unit_of_measure_type_code,
            reserve
        )
    )

# Test the ddauom table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.ddauom LIMIT 5').fetchdf()

Unnamed: 0,unit_of_measure_id,transaction_code,unit_of_measure_description,unit_of_measure_type_code,reserve
0,1,,mg,1,
1,2,,mcg,1,
2,3,,mg/kg,1,
3,4,,mcg/kg,1,
4,5,,drop,2,


In [9]:
# Create the ddaval table
con.execute("""
CREATE OR REPLACE TABLE medispan.ddaval (
  field_identifier VARCHAR(4),
  field_value VARCHAR(15),
  language_code INTEGER,
  value_description VARCHAR(40),
  value_abbreviation VARCHAR(15),
  reserve VARCHAR(20)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/DDA/USAENG/DB/DDAVAL', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    field_identifier = line[0:4]
    field_value = line[4:19]
    language_code = int(line[19:21])
    value_description = line[21:61]
    value_abbreviation = line[61:76]
    reserve = line[76:96]

    # Insert the fields into the ddaval table using parameterized queries
    con.execute(
        "INSERT INTO medispan.ddaval VALUES (?, ?, ?, ?, ?, ?)",
        (
            field_identifier,
            field_value,
            language_code,
            value_description,
            value_abbreviation,
            reserve
        )
    )

# Test the ddaval table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.ddaval LIMIT 5').fetchdf()

Unnamed: 0,field_identifier,field_value,language_code,value_description,value_abbreviation,reserve
0,C040,C,1,Character,,
1,C040,N,1,Number,,
2,C044,N,1,No,,
3,C044,Y,1,Yes,,
4,C047,N,1,No,,


In [10]:
con.close()