In [7]:
import os
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from dotenv import load_dotenv
from datetime import datetime

In [8]:
def parse_llt_file(file_path):
    """Parse the MedDRA LLT file and return a DataFrame"""
    columns = [
        'llt_code', 'llt_name', 'pt_code', 
        'llt_whoart_code', 'llt_harts_code', 'llt_costart_sym',
        'llt_icd9_code', 'llt_icd9cm_code', 'llt_icd10_code',
        'llt_currency', 'llt_jart_code', 'null_field'
    ]
    
    df = pd.read_csv(
        file_path, 
        sep='$', 
        names=columns,
        on_bad_lines='skip',
        encoding='latin1'
    )
    
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = df[col].apply(lambda x: None if pd.isna(x) or x == '' else x)
    
    # Add standard fields for the database
    df['created_at'] = datetime.now()
    df['updated_at'] = datetime.now()
    df['language'] = 'en'
    df['version'] = 28.0
    
    df = df.replace({np.nan: None})
    
    return df

In [9]:
parse_llt_file('/home/juanes/Downloads/MedDRA_28_0_ENglish/MedAscii/llt.asc')

Unnamed: 0,llt_code,llt_name,pt_code,llt_whoart_code,llt_harts_code,llt_costart_sym,llt_icd9_code,llt_icd9cm_code,llt_icd10_code,llt_currency,llt_jart_code,null_field,created_at,updated_at,language,version
0,10000001,Ventilation pneumonitis,10081988,,,,,,,N,,,2025-07-13 14:23:20.792125,2025-07-13 14:23:20.792485,en,28.0
1,10000002,11-beta-hydroxylase deficiency,10000002,,,,,,,Y,,,2025-07-13 14:23:20.792125,2025-07-13 14:23:20.792485,en,28.0
2,10000003,11-oxysteroid activity incr,10033315,,,,,,,N,,,2025-07-13 14:23:20.792125,2025-07-13 14:23:20.792485,en,28.0
3,10000004,11-oxysteroid activity increased,10033315,,,,,,,Y,,,2025-07-13 14:23:20.792125,2025-07-13 14:23:20.792485,en,28.0
4,10000005,17 ketosteroids urine,10000005,,,,,,,Y,,,2025-07-13 14:23:20.792125,2025-07-13 14:23:20.792485,en,28.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89769,10092088,Out of specification result not investigated,10092086,,,,,,,Y,,,2025-07-13 14:23:20.792125,2025-07-13 14:23:20.792485,en,28.0
89770,10092089,Missing batch production record,10092085,,,,,,,Y,,,2025-07-13 14:23:20.792125,2025-07-13 14:23:20.792485,en,28.0
89771,10092090,Out of specification result invalidated withou...,10092086,,,,,,,Y,,,2025-07-13 14:23:20.792125,2025-07-13 14:23:20.792485,en,28.0
89772,10092091,Out of specification result invalidated withou...,10092086,,,,,,,Y,,,2025-07-13 14:23:20.792125,2025-07-13 14:23:20.792485,en,28.0
