In [1]:
import duckdb
import pandas as pd

In [2]:
con = duckdb.connect(database='/workspaces/practice/terminology.db')

In [3]:
con.execute('create schema if not exists medispan')

<duckdb.DuckDBPyConnection at 0xffffa461f770>

In [4]:
# Create the ahfcon table
con.execute("""
CREATE or replace TABLE medispan.ahfcon (
  concept_type_id INTEGER,
  transaction_code VARCHAR(1),
  description VARCHAR(35),
  reserve VARCHAR(24)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/AHF/USAENG/DB/FIXED/AHFCON', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    concept_type_id = int(line[0:5])
    transaction_cd = line[5:6]
    description = line[6:41].strip()
    reserve = line[41:65].strip()

    # Insert the fields into the ahfcon table using parameterized queries
    con.execute(
        "INSERT INTO medispan.ahfcon VALUES (?, ?, ?, ?)",
        (
            concept_type_id,
            transaction_cd,
            description,
            reserve
        )
    )

con.execute('select * from medispan.ahfcon limit 5').fetchdf()

Unnamed: 0,concept_type_id,transaction_code,description,reserve
0,1,,DRUG NAME,
1,2,,ROUTED DRUG,
2,3,,ROUTED DOSE FORM DRUG,
3,4,,DISPENSABLE DRUG,
4,5,,GPI,


In [5]:
# Create the ahfdesc table
con.execute("""
CREATE or replace TABLE medispan.ahfdesc (
  ahfs_cd VARCHAR(10),
  transaction_cd VARCHAR(1),
  ahfs_level_cd VARCHAR(2),
  ahfs_description VARCHAR(80),
  reserve VARCHAR(51)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/AHF/USAENG/DB/FIXED/AHFDESC', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    ahfs_cd = line[0:10]
    transaction_cd = line[10]
    ahfs_level_cd = line[11:13]
    ahfs_description = line[13:93]
    reserve = line[93:144]

    # Insert the fields into the ahfdesc table using parameterized queries
    con.execute(
        "INSERT INTO medispan.ahfdesc VALUES (?, ?, ?, ?, ?)",
        (
            ahfs_cd,
            transaction_cd,
            ahfs_level_cd,
            ahfs_description,
            reserve
        )
    )

con.execute('select * from medispan.ahfdesc limit 5').fetchdf()

Unnamed: 0,ahfs_cd,transaction_cd,ahfs_level_cd,ahfs_description,reserve
0,4,,2,Antihistamine Drugs ...,...
1,400,,4,Antihistamine Drugs ...,...
2,40000,,6,Antihistamine Drugs ...,...
3,4000000,,8,Antihistamine Drugs ...,...
4,404,,4,First Generation Antihistamines ...,...


In [6]:
# Create the ahfdict table
con.execute("""
CREATE OR REPLACE TABLE medispan.ahfdict (
  field_id VARCHAR(4),
  field_description VARCHAR(35),
  field_type VARCHAR(1),
  field_length INTEGER,
  implied_decimal_flag VARCHAR(1),
  decimal_places INTEGER,
  field_validation_flag VARCHAR(1),
  field_abbreviation_flag VARCHAR(1),
  reserve VARCHAR(16)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/AHF/USAENG/DB/FIXED/AHFDICT', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    field_id = line[0:4]
    field_description = line[4:39]
    field_type = line[39]
    field_length = int(line[40:43])
    implied_decimal_flag = line[43]
    decimal_places = int(line[44:46])
    field_validation_flag = line[46]
    field_abbreviation_flag = line[47]
    reserve = line[48:64]

    # Insert the fields into the ahfdict table using parameterized queries
    con.execute(
        "INSERT INTO medispan.ahfdict VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (
            field_id,
            field_description,
            field_type,
            field_length,
            implied_decimal_flag,
            decimal_places,
            field_validation_flag,
            field_abbreviation_flag,
            reserve
        )
    )

con.execute('select * from medispan.ahfdict limit 5').fetchdf()

Unnamed: 0,field_id,field_description,field_type,field_length,implied_decimal_flag,decimal_places,field_validation_flag,field_abbreviation_flag,reserve
0,A001,Record Type,C,3,N,0,N,N,
1,A004,Reserve-1,C,1,N,0,N,N,
2,A005,Sequence Number,N,3,N,0,N,N,
3,A008,Reserve-2,C,1,N,0,N,N,
4,A009,Comment Marker,C,1,N,0,N,N,


In [7]:
# Create the ahfdrug table
con.execute("""
CREATE or replace TABLE medispan.ahfdrug (
  drug_id VARCHAR(20),
  concept_type_id INTEGER,
  country_cd VARCHAR(2),
  ahfs_cd VARCHAR(10),
  transaction_cd VARCHAR(1),
  ahfs_limiter_cd INTEGER,
  reserve VARCHAR(25)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/AHF/USAENG/DB/FIXED/AHFDRUG', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    drug_id = line[0:20]
    concept_type_id = int(line[20:25])
    country_cd = line[25:27]
    ahfs_cd = line[27:37]
    transaction_cd = line[37]
    ahfs_limiter_cd = int(line[38])
    reserve = line[39:64]

    # Insert the fields into the ahfdrug table using parameterized queries
    con.execute(
        "INSERT INTO medispan.ahfdrug VALUES (?, ?, ?, ?, ?, ?, ?)",
        (
            drug_id,
            concept_type_id,
            country_cd,
            ahfs_cd,
            transaction_cd,
            ahfs_limiter_cd,
            reserve
        )
    )

con.execute('select * from medispan.ahfdrug limit 5').fetchdf()

Unnamed: 0,drug_id,concept_type_id,country_cd,ahfs_cd,transaction_cd,ahfs_limiter_cd,reserve
0,2091155,7,1,94000000,,1,
1,2094755,7,1,94000000,,1,
2,2094855,7,1,94000000,,1,
3,2120001,7,1,78000000,,1,
4,2140701,7,1,8300800,,2,


In [8]:
# Create the ahfval table
con.execute("""
CREATE or replace TABLE medispan.ahfval (
  field_id VARCHAR(4),
  field_value VARCHAR(15),
  language_cd VARCHAR(2),
  value_description VARCHAR(40),
  value_abbreviation VARCHAR(15),
  reserve VARCHAR(20)
)
""")

# Open the file and read its content
with open('/workspaces/practice/terminology/medispan/CD148480D_MO_20230301/AHF/USAENG/DB/FIXED/AHFVAL', 'r') as file:
    lines = file.readlines()

# Loop through each line and extract the fields
for line in lines:
    field_id = line[0:4]
    field_value = line[4:19].strip()
    language_cd = line[19:21]
    value_description = line[21:61].strip()
    value_abbreviation = line[61:76].strip()
    reserve = line[76:96].strip()

    # Insert the fields into the ahfval table using parameterized queries
    con.execute(
        "INSERT INTO medispan.ahfval VALUES (?, ?, ?, ?, ?, ?)",
        (
            field_id,
            field_value,
            language_cd,
            value_description,
            value_abbreviation,
            reserve
        )
    )

con.execute('select * from medispan.ahfval limit 5').fetchdf()

Unnamed: 0,field_id,field_value,language_cd,value_description,value_abbreviation,reserve
0,C040,C,1,Character,,
1,C040,N,1,Number,,
2,C044,N,1,No,,
3,C044,Y,1,Yes,,
4,C047,N,1,No,,


In [9]:
con.close()