In [1]:
import duckdb
import pandas as pd

In [2]:
con = duckdb.connect(database='/workspaces/practice/terminology.db')

In [3]:
con.execute('create schema if not exists medispan')

<duckdb.DuckDBPyConnection at 0xffff3081f1b0>

In [4]:
# Create the mixi10 table
con.execute("""
CREATE OR REPLACE TABLE medispan.mixi10 (
  icd10_type_code VARCHAR(1),
  icd10_unformatted_code VARCHAR(20),
  transaction_code VARCHAR(1),
  icd10_formatted_code VARCHAR(20),
  icd10_description VARCHAR(100),
  reserve VARCHAR(50)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230201/ICD10/USAENG/DB/FIXED/MIXI10', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    icd10_type_code = line[0]
    icd10_unformatted_code = line[1:21]
    transaction_code = line[21]
    icd10_formatted_code = line[22:42]
    icd10_description = line[42:142]
    reserve = line[142:192]

    # Insert the fields into the mixi10 table using parameterized queries
    con.execute(
        "INSERT INTO medispan.mixi10 VALUES (?, ?, ?, ?, ?, ?)",
        (
            icd10_type_code,
            icd10_unformatted_code,
            transaction_code,
            icd10_formatted_code,
            icd10_description,
            reserve
        )
    )

# Test the mixi10 table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.mixi10 LIMIT 5').fetchdf()

Unnamed: 0,icd10_type_code,icd10_unformatted_code,transaction_code,icd10_formatted_code,icd10_description,reserve
0,1,A00,,A00,Cholera ...,...
1,1,A000,,A00.0,"Cholera due to Vibrio cholerae 01, biovar chol...",...
2,1,A001,,A00.1,"Cholera due to Vibrio cholerae 01, biovar elto...",...
3,1,A009,,A00.9,"Cholera, unspecified ...",...
4,1,A01,,A01,Typhoid and paratyphoid fevers ...,...


In [5]:
# Create the mixmmap table
con.execute("""
CREATE OR REPLACE TABLE medispan.mixmmap (
  medical_condition_code VARCHAR,
  icd10_type_code VARCHAR(1),
  icd10_unformatted_code VARCHAR(20),
  transaction_code VARCHAR(1),
  medcond_to_icd10_flag VARCHAR(1),
  icd10_to_medcond_flag VARCHAR(1),
  reserve VARCHAR(33)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230201/ICD10/USAENG/DB/FIXED/MIXMMAP', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    medical_condition_code = line[0:7]
    icd10_type_code = line[7:8]
    icd10_unformatted_code = line[8:28]
    transaction_code = line[28:29]
    medcond_to_icd10_flag = line[29:30]
    icd10_to_medcond_flag = line[30:31]
    reserve = line[31:64]

    # Insert the fields into the mixmmap table using parameterized queries
    con.execute(
        "INSERT INTO medispan.mixmmap VALUES (?, ?, ?, ?, ?, ?, ?)",
        (
            medical_condition_code,
            icd10_type_code,
            icd10_unformatted_code,
            transaction_code,
            medcond_to_icd10_flag,
            icd10_to_medcond_flag,
            reserve
        )
    )

# Test the mixmmap table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.mixmmap LIMIT 5').fetchdf()

Unnamed: 0,medical_condition_code,icd10_type_code,icd10_unformatted_code,transaction_code,medcond_to_icd10_flag,icd10_to_medcond_flag,reserve
0,1,1,L29,,Y,Y,
1,1,1,L298,,N,Y,
2,1,1,L299,,Y,Y,
3,2,1,R630,,Y,Y,
4,3,1,L745,,N,Y,


In [6]:
# Create the mcmclass table
con.execute("""
CREATE OR REPLACE TABLE medispan.mcmclass (
  medical_condition_code VARCHAR,
  parent_medical_condition_code VARCHAR,
  relationship_type INTEGER,
  reserve VARCHAR(15),
  transaction_code VARCHAR(1)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230201/MCM/USAENG/DB/MCMCLASS', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    medical_condition_code = line[0:7]
    parent_medical_condition_code = line[7:14]
    relationship_type = int(line[14:16])
    reserve = line[16:31]
    transaction_code = line[31]

    # Insert the fields into the mcmclass table using parameterized queries
    con.execute(
        "INSERT INTO medispan.mcmclass VALUES (?, ?, ?, ?, ?)",
        (
            medical_condition_code,
            parent_medical_condition_code,
            relationship_type,
            reserve,
            transaction_code
        )
    )

# Test the mcmclass table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.mcmclass LIMIT 5').fetchdf()

Unnamed: 0,medical_condition_code,parent_medical_condition_code,relationship_type,reserve,transaction_code
0,1,3207,1,,
1,2,1658,1,,
2,3,2865,1,,
3,4,4151,1,,
4,5,3517,1,,


In [7]:
# Create the mcmcond table
con.execute("""
CREATE OR REPLACE TABLE medispan.mcmcond (
  medical_condition_code VARCHAR,
  condition_type INTEGER,
  classification_only_flag VARCHAR(1),
  gender_code VARCHAR(1),
  pregnancy_code VARCHAR(1),
  lactation_code VARCHAR(1),
  from_age INTEGER,
  through_age INTEGER,
  age_units_code VARCHAR(1),
  duration_code VARCHAR,
  reserve VARCHAR(41),
  transaction_code VARCHAR(1)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230201/MCM/USAENG/DB/MCMCOND', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    medical_condition_code = line[0:7]
    condition_type = int(line[7:9])
    classification_only_flag = line[9]
    gender_code = line[10]
    pregnancy_code = line[11]
    lactation_code = line[12]
    from_age = int(line[13:16])
    through_age = int(line[16:19])
    age_units_code = line[19]
    duration_code = line[20:22]
    reserve = line[22:63]
    transaction_code = line[63]

    # Insert the fields into the mcmcond table using parameterized queries
    con.execute(
        "INSERT INTO medispan.mcmcond VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (
            medical_condition_code,
            condition_type,
            classification_only_flag,
            gender_code,
            pregnancy_code,
            lactation_code,
            from_age,
            through_age,
            age_units_code,
            duration_code,
            reserve,
            transaction_code
        )
    )

# Test the mcmcond table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.mcmcond LIMIT 5').fetchdf()


Unnamed: 0,medical_condition_code,condition_type,classification_only_flag,gender_code,pregnancy_code,lactation_code,from_age,through_age,age_units_code,duration_code,reserve,transaction_code
0,0,4,N,B,N,N,0,999,Y,99,,
1,1,2,N,B,N,N,0,999,Y,1,,
2,2,2,N,B,N,N,0,999,Y,1,,
3,3,2,N,B,N,N,0,999,Y,1,,
4,4,2,N,B,N,N,0,999,Y,1,,


In [8]:
# Create the mcmdict table
con.execute("""
CREATE OR REPLACE TABLE medispan.mcmdict (
  field_identifier VARCHAR(4),
  field_description VARCHAR(35),
  field_type VARCHAR(1),
  field_length INTEGER,
  implied_decimal_flag VARCHAR(1),
  decimal_places INTEGER,
  field_validation_flag VARCHAR(1),
  field_abbreviation_flag VARCHAR(1),
  reserve VARCHAR(16)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230201/MCM/USAENG/DB/MCMDICT', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    field_identifier = line[0:4].strip()
    field_description = line[4:39].strip()
    field_type = line[39]
    field_length = int(line[40:43])
    implied_decimal_flag = line[43]
    decimal_places = int(line[44:46])
    field_validation_flag = line[46]
    field_abbreviation_flag = line[47]
    reserve = line[48:64]

    # Insert the fields into the mcmdict table using parameterized queries
    con.execute(
        "INSERT INTO medispan.mcmdict VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (
            field_identifier,
            field_description,
            field_type,
            field_length,
            implied_decimal_flag,
            decimal_places,
            field_validation_flag,
            field_abbreviation_flag,
            reserve
        )
    )

# Test the mcmdict table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.mcmdict LIMIT 5').fetchdf()


Unnamed: 0,field_identifier,field_description,field_type,field_length,implied_decimal_flag,decimal_places,field_validation_flag,field_abbreviation_flag,reserve
0,A001,Record Type,C,3,N,0,N,N,
1,A004,Reserve-1,C,1,N,0,N,N,
2,A005,Sequence Number,N,3,N,0,N,N,
3,A008,Reserve-2,C,1,N,0,N,N,
4,A009,Comment Marker,C,1,N,0,N,N,


In [9]:
# Create the mcminm table
con.execute("""
CREATE OR REPLACE TABLE medispan.mcminm (
  icd_9_code VARCHAR(20),
  icd_9_name VARCHAR(50),
  reserve VARCHAR(25),
  transaction_code VARCHAR(1)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230201/MCM/USAENG/DB/MCMINM', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    icd_9_code = line[0:20].strip()
    icd_9_name = line[20:70].strip()
    reserve = line[70:95].strip()
    transaction_code = line[95]

    # Insert the fields into the mcminm table using parameterized queries
    con.execute(
        "INSERT INTO medispan.mcminm VALUES (?, ?, ?, ?)",
        (
            icd_9_code,
            icd_9_name,
            reserve,
            transaction_code
        )
    )

# Test the mcminm table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.mcminm LIMIT 5').fetchdf()

Unnamed: 0,icd_9_code,icd_9_name,reserve,transaction_code
0,1,CHOLERA,,
1,10,CHOLERA D/T VIB CHOLERAE,,
2,11,CHOLERA D/T VIB EL TOR,,
3,19,CHOLERA NOS,,
4,2,TYPHOID/PARATYPHOID FEV,,


In [10]:
# Create the mcmmdds table
con.execute("""
CREATE OR REPLACE TABLE medispan.mcmmdds (
  medical_condition_code VARCHAR,
  disease_code VARCHAR,
  reserve_1 VARCHAR(14),
  medical_condition_to_disease_flag VARCHAR(1),
  disease_to_medical_condition_flag VARCHAR(1),
  reserve_2 VARCHAR(34),
  transaction_code VARCHAR(1)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230201/MCM/USAENG/DB/MCMMDDS', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    medical_condition_code = line[0:7]
    disease_code = line[7:13]
    reserve_1 = line[13:27]
    medical_condition_to_disease_flag = line[27]
    disease_to_medical_condition_flag = line[28]
    reserve_2 = line[29:63]
    transaction_code = line[63]

    # Insert the fields into the mcmmdds table using parameterized queries
    con.execute(
        "INSERT INTO medispan.mcmmdds VALUES (?, ?, ?, ?, ?, ?, ?)",
        (
            medical_condition_code,
            disease_code,
            reserve_1,
            medical_condition_to_disease_flag,
            disease_to_medical_condition_flag,
            reserve_2,
            transaction_code
        )
    )

# Test the mcmmdds table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.mcmmdds LIMIT 5').fetchdf()

Unnamed: 0,medical_condition_code,disease_code,reserve_1,medical_condition_to_disease_flag,disease_to_medical_condition_flag,reserve_2,transaction_code
0,0,0,,Y,Y,,
1,0,1000,,N,Y,,
2,1,352000,,Y,Y,,
3,5,256000,,Y,N,,
4,6,153510,,Y,Y,,


In [11]:
# Create the mcmmicd table
con.execute("""
CREATE OR REPLACE TABLE medispan.mcmmicd (
  medical_condition_code VARCHAR,
  icd9_code VARCHAR(20),
  medical_condition_to_icd9_flag VARCHAR(1),
  icd9_to_medical_condition_flag VARCHAR(1),
  reserve VARCHAR(34),
  transaction_code VARCHAR(1)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230201/MCM/USAENG/DB/MCMMICD', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    medical_condition_code = line[0:7]
    icd9_code = line[7:27].strip()
    medical_condition_to_icd9_flag = line[27]
    icd9_to_medical_condition_flag = line[28]
    reserve = line[29:63]
    transaction_code = line[63]

    # Insert the fields into the mcmmicd table using parameterized queries
    con.execute(
        "INSERT INTO medispan.mcmmicd VALUES (?, ?, ?, ?, ?, ?)",
        (
            medical_condition_code,
            icd9_code,
            medical_condition_to_icd9_flag,
            icd9_to_medical_condition_flag,
            reserve,
            transaction_code
        )
    )

# Test the mcmmicd table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.mcmmicd LIMIT 5').fetchdf()

Unnamed: 0,medical_condition_code,icd9_code,medical_condition_to_icd9_flag,icd9_to_medical_condition_flag,reserve,transaction_code
0,1,698,Y,Y,,
1,1,6984,N,Y,,
2,1,6988,Y,Y,,
3,1,6989,Y,Y,,
4,2,7830,Y,Y,,


In [12]:
# Create the mcmmsct table
con.execute("""
CREATE OR REPLACE TABLE medispan.mcmmsct (
  medical_condition_code VARCHAR,
  snomed_ct_concept_id VARCHAR,
  transaction_code VARCHAR(1),
  mcm_snomed_relationship_type VARCHAR(2),
  reserve VARCHAR(20)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230201/MCM/USAENG/DB/MCMMSCT', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    medical_condition_code = line[0:7]
    snomed_ct_concept_id = line[7:25]
    transaction_code = line[25]
    mcm_snomed_relationship_type = line[26:28]
    reserve = line[28:48]

    # Insert the fields into the mcmmsct table using parameterized queries
    con.execute(
        "INSERT INTO medispan.mcmmsct VALUES (?, ?, ?, ?, ?)",
        (
            medical_condition_code,
            snomed_ct_concept_id,
            transaction_code,
            mcm_snomed_relationship_type,
            reserve
        )
    )

# Test the mcmmsct table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.mcmmsct LIMIT 5').fetchdf()

Unnamed: 0,medical_condition_code,snomed_ct_concept_id,transaction_code,mcm_snomed_relationship_type,reserve
0,1,109252001,,3,
1,1,201024003,,3,
2,1,238549003,,3,
3,1,238695001,,3,
4,1,239102001,,3,


In [13]:
# Create the mcmname table
con.execute("""
CREATE OR REPLACE TABLE medispan.mcmname (
  medical_condition_code VARCHAR,
  country_code VARCHAR,
  language_code VARCHAR,
  name_type_code VARCHAR,
  medical_condition_name VARCHAR(58),
  reserve VARCHAR(24),
  transaction_code VARCHAR(1)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230201/MCM/USAENG/DB/MCMNAME', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    medical_condition_code = line[0:7]
    country_code = line[7:9]
    language_code = line[9:11]
    name_type_code = line[11:13]
    medical_condition_name = line[13:71].strip()
    reserve = line[71:95].strip()
    transaction_code = line[95]

    # Insert the fields into the mcmname table using parameterized queries
    con.execute(
        "INSERT INTO medispan.mcmname VALUES (?, ?, ?, ?, ?, ?, ?)",
        (
            medical_condition_code,
            country_code,
            language_code,
            name_type_code,
            medical_condition_name,
            reserve,
            transaction_code
        )
    )

# Test the mcmname table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.mcmname LIMIT 5').fetchdf()

Unnamed: 0,medical_condition_code,country_code,language_code,name_type_code,medical_condition_name,reserve,transaction_code
0,0,1,1,1,,,
1,1,1,1,1,Pruritus,,
2,1,1,1,2,Itching Skin,,
3,1,1,1,3,Itching,,
4,1,1,1,4,Itch,,


In [14]:
# Create the mcmval table
con.execute("""
CREATE OR REPLACE TABLE medispan.mcmval (
  field_identifier VARCHAR(4),
  field_value VARCHAR(15),
  language_code VARCHAR(2),
  value_description VARCHAR(40),
  value_abbreviation VARCHAR(15),
  reserve VARCHAR(20)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230201/MCM/USAENG/DB/MCMVAL', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    field_identifier = line[0:4]
    field_value = line[4:19].strip()
    language_code = line[19:21]
    value_description = line[21:61].strip()
    value_abbreviation = line[61:76].strip()
    reserve = line[76:96].strip()

    # Insert the fields into the mcmval table using parameterized queries
    con.execute(
        "INSERT INTO medispan.mcmval VALUES (?, ?, ?, ?, ?, ?)",
        (
            field_identifier,
            field_value,
            language_code,
            value_description,
            value_abbreviation,
            reserve
        )
    )

# Test the mcmval table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.mcmval LIMIT 5').fetchdf()

Unnamed: 0,field_identifier,field_value,language_code,value_description,value_abbreviation,reserve
0,C040,C,1,Character,,
1,C040,N,1,Number,,
2,C044,N,1,No,,
3,C044,Y,1,Yes,,
4,C047,N,1,No,,


In [15]:
con.close()