In [1]:
import duckdb
import pandas as pd

In [2]:
con = duckdb.connect(database='/workspaces/practice/terminology.db')

In [3]:
con.execute('create schema if not exists medispan')

<duckdb.DuckDBPyConnection at 0xffff3aeab3f0>

In [4]:
# Create the rxxcncpt table
con.execute("""
CREATE OR REPLACE TABLE medispan.rxxcncpt (
  concept_type_identifier VARCHAR(5),
  transaction_code VARCHAR(1),
  description VARCHAR(34),
  reserve VARCHAR(24)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/RXX/USAENG/DB/FIXED/RXXCNCPT', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    concept_type_identifier = line[0:5].strip()
    transaction_code = line[5]
    description = line[6:40].strip()
    reserve = line[40:64].strip()

    # Insert the fields into the rxxcncpt table using parameterized queries
    con.execute(
        "INSERT INTO medispan.rxxcncpt VALUES (?, ?, ?, ?)",
        (
            concept_type_identifier,
            transaction_code,
            description,
            reserve
        )
    )

# Test the rxxcncpt table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.rxxcncpt LIMIT 5').fetchdf()

Unnamed: 0,concept_type_identifier,transaction_code,description,reserve
0,1,,DRUG NAME,
1,3,,ROUTED DOSE FORM DRUG,
2,4,,DISPENSABLE DRUG,
3,5,,GENERIC PRODUCT,
4,6,,GENERIC PRODUCT PACKAGE CODE CORE,


In [5]:
# Create the rxxdict table
con.execute("""
CREATE OR REPLACE TABLE medispan.rxxdict (
  field_identifier VARCHAR(4),
  field_description VARCHAR(35),
  field_type VARCHAR(1),
  field_length INTEGER,
  implied_decimal_flag VARCHAR(1),
  decimal_places INTEGER,
  field_validation_flag VARCHAR(1),
  field_abbreviation_flag VARCHAR(1),
  reserve VARCHAR(16)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/RXX/USAENG/DB/FIXED/RXXDICT', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    field_identifier = line[0:4]
    field_description = line[4:39].strip()
    field_type = line[39]
    field_length = int(line[40:43])
    implied_decimal_flag = line[43]
    decimal_places = int(line[44:46])
    field_validation_flag = line[46]
    field_abbreviation_flag = line[47]
    reserve = line[48:64]

    # Insert the fields into the rxxdict table using parameterized queries
    con.execute(
        "INSERT INTO medispan.rxxdict VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (
            field_identifier,
            field_description,
            field_type,
            field_length,
            implied_decimal_flag,
            decimal_places,
            field_validation_flag,
            field_abbreviation_flag,
            reserve
        )
    )

# Test the rxxdict table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.rxxdict LIMIT 5').fetchdf()

Unnamed: 0,field_identifier,field_description,field_type,field_length,implied_decimal_flag,decimal_places,field_validation_flag,field_abbreviation_flag,reserve
0,A001,Record Type,C,3,N,0,N,N,
1,A004,Reserve-1,C,1,N,0,N,N,
2,A005,Sequence Number,N,3,N,0,N,N,
3,A008,Reserve-2,C,1,N,0,N,N,
4,A009,Comment Marker,C,1,N,0,N,N,


In [6]:
con.execute("""
CREATE OR REPLACE TABLE medispan.rxxexcon (
  external_source VARCHAR(10),
  external_source_code VARCHAR(30),
  transaction_code VARCHAR(1),
  umls_concept_id VARCHAR(12),
  external_type VARCHAR(10),
  external_source_set VARCHAR(10),
  rxnorm_code VARCHAR(10),
  reserve VARCHAR(13)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/RXX/USAENG/DB/FIXED/RXXEXCON', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    external_source = line[0:10].strip()
    external_source_code = line[10:40].strip()
    transaction_code = line[40]
    umls_concept_id = line[41:53].strip()
    external_type = line[53:63].strip()
    external_source_set = line[63:73].strip()
    rxnorm_code = line[73:83].strip()
    reserve = line[83:96].strip()

    # Insert the fields into the rxxexcon table using parameterized queries
    con.execute(
        "INSERT INTO medispan.rxxexcon VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
        (
            external_source,
            external_source_code,
            transaction_code,
            umls_concept_id,
            external_type,
            external_source_set,
            rxnorm_code,
            reserve
        )
    )

# Test the rxxexcon table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.rxxexcon LIMIT 5').fetchdf()


Unnamed: 0,external_source,external_source_code,transaction_code,umls_concept_id,external_type,external_source_set,rxnorm_code,reserve
0,RXNORM,1000000,,C2930060,SBD,MONTHLY,1000000,
1,RXNORM,1000001,,C2930061,SCD,MONTHLY,1000001,
2,RXNORM,1000002,,C2930062,BN,MONTHLY,1000002,
3,RXNORM,1000003,,C2930063,SBDC,MONTHLY,1000003,
4,RXNORM,1000004,,C2930064,SBDF,MONTHLY,1000004,


In [7]:
# Create the rxxval table
con.execute("""
CREATE OR REPLACE TABLE medispan.rxxval (
  field_identifier VARCHAR(4),
  field_value VARCHAR(15),
  language_code VARCHAR(2),
  value_description VARCHAR(40),
  value_abbreviation VARCHAR(15),
  reserve VARCHAR(20)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/RXX/USAENG/DB/FIXED/RXXVAL', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    field_identifier = line[0:4]
    field_value = line[4:19]
    language_code = line[19:21]
    value_description = line[21:61]
    value_abbreviation = line[61:76]
    reserve = line[76:96]

    # Insert the fields into the rxxval table using parameterized queries
    con.execute(
        "INSERT INTO medispan.rxxval VALUES (?, ?, ?, ?, ?, ?)",
        (
            field_identifier,
            field_value,
            language_code,
            value_description,
            value_abbreviation,
            reserve
        )
    )

# Test the rxxval table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.rxxval LIMIT 5').fetchdf()

Unnamed: 0,field_identifier,field_value,language_code,value_description,value_abbreviation,reserve
0,C040,C,1,Character,,
1,C040,N,1,Number,,
2,C044,N,1,No,,
3,C044,Y,1,Yes,,
4,C047,N,1,No,,


In [8]:
con.execute("""
CREATE OR REPLACE TABLE medispan.rxxvrdsc (
  variance_identifier VARCHAR,
  transaction_code VARCHAR(1),
  variance_short_description VARCHAR(15),
  variance_description VARCHAR(50),
  reserve VARCHAR(25)
)
""")

with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/RXX/USAENG/DB/FIXED/RXXVRDSC', 'r') as file:
    lines = file.readlines()

for line in lines:
    variance_identifier = line[0:5]
    transaction_code = line[5]
    variance_short_description = line[6:21]
    variance_description = line[21:71]
    reserve = line[71:96]

    con.execute(
        "INSERT INTO medispan.rxxvrdsc VALUES (?, ?, ?, ?, ?)",
        (
            variance_identifier,
            transaction_code,
            variance_short_description,
            variance_description,
            reserve
        )
    )

con.execute('SELECT * FROM medispan.rxxvrdsc LIMIT 5').fetchdf()

Unnamed: 0,variance_identifier,transaction_code,variance_short_description,variance_description,reserve
0,4061,,SALT,Salt ...,
1,4062,,PRES-UOM,Prescription unit of measure ...,
2,4063,,PKG,Packaging ...,
3,4064,,DOSE-FORM,Dose Form ...,
4,4065,,VEHICLE,Vehicle ...,


In [9]:
# Create the rxxxref table
con.execute("""
CREATE OR REPLACE TABLE medispan.rxxxref (
  external_source VARCHAR(10),
  external_source_code VARCHAR(30),
  concept_type_identifier VARCHAR,
  concept_value VARCHAR(20),
  transaction_code VARCHAR(1),
  match_type VARCHAR(2),
  umls_concept_id VARCHAR(12),
  rxnorm_code VARCHAR(10),
  reserve VARCHAR(22)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/RXX/USAENG/DB/FIXED/RXXXREF', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    external_source = line[0:10].strip()
    external_source_code = line[10:40].strip()
    concept_type_identifier = line[40:42]
    concept_value = line[42:62].strip()
    transaction_code = line[62]
    match_type = line[63:65].strip()
    umls_concept_id = line[65:77].strip()
    rxnorm_code = line[77:87].strip()
    reserve = line[87:109].strip()

    # Insert the fields into the rxxxref table using parameterized queries
    con.execute(
        "INSERT INTO medispan.rxxxref VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (
            external_source,
            external_source_code,
            concept_type_identifier,
            concept_value,
            transaction_code,
            match_type,
            umls_concept_id,
            rxnorm_code,
            reserve
        )
    )

con.execute('SELECT * FROM medispan.rxxxref LIMIT 5').fetchdf()

Unnamed: 0,external_source,external_source_code,concept_type_identifier,concept_value,transaction_code,match_type,umls_concept_id,rxnorm_code,reserve
0,RXNORM,,0,1100,,,5,,
1,RXNORM,,0,11000,,,6,,
2,RXNORM,,0,1100006,,,6,,
3,RXNORM,,0,110001,,,6,,
4,RXNORM,,0,1100010,,,6,,


In [10]:
# Create the rxxxref table
con.execute("""
CREATE OR REPLACE TABLE medispan.rxxxvar (
  external_source VARCHAR(10),
  external_source_code VARCHAR(30),
  concept_type_id VARCHAR,
  concept_value VARCHAR(20),
  transaction_code VARCHAR(1),
  match_type VARCHAR(2),
  umls_concept_id VARCHAR(12),
  rxnorm_code VARCHAR(10),
  reserve VARCHAR(22)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/RXX/USAENG/DB/FIXED/RXXXVAR', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    external_source = line[0:10]
    external_source_code = line[10:40]
    concept_type_id = line[40:45]
    concept_value = line[45:65]
    transaction_code = line[65]
    match_type = line[66:68]
    umls_concept_id = line[68:80]
    rxnorm_code = line[80:90]
    reserve = line[90:112]

    # Insert the fields into the rxxxvar table using parameterized queries
    con.execute(
        "INSERT INTO medispan.rxxxvar VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (
            external_source,
            external_source_code,
            concept_type_id,
            concept_value,
            transaction_code,
            match_type,
            umls_concept_id,
            rxnorm_code,
            reserve
        )
    )

# Test the rxxxref table by selecting the first 5 rows
con.execute('SELECT * FROM medispan.rxxxvar LIMIT 5').fetchdf()

Unnamed: 0,external_source,external_source_code,concept_type_id,concept_value,transaction_code,match_type,umls_concept_id,rxnorm_code,reserve
0,RXNORM,1000001,4,158150,0,40,61,,\n
1,RXNORM,1000001,5,36994503450330,0,40,61,,\n
2,RXNORM,1000001,6,69983,0,40,61,,\n
3,RXNORM,1000024,4,13351,0,40,62,,\n
4,RXNORM,1000024,5,16800020101810,0,40,62,,\n


In [11]:
con.close()