In [1]:
import os
import pandas as pd
import shutil
import datetime as dt
import csv
import re

today = dt.date.today().strftime("%d_%m_%Y")
directory = os.chdir('../')
df = pd.read_csv('sampledata.csv')

In [2]:
target_files = ['ADT_sample.txt', 'Sample_ORU.txt', 'sampledata.csv']
dirs_to_make = ['Archive', 'Archive/Original', 'Archive/Modified']

for dir in dirs_to_make:
    if not os.path.exists(dir):
        os.mkdir(dir)
for file in target_files:
    if not os.path.exists(file):
        print(f'File {file} does not exist. Ensure all files are present.')
        break
    else:
        print(f'File {file} exists. Copying file to Archive/Original/ folder...')
        shutil.copy(file, './Archive/Original/')



File ADT_sample.txt exists. Copying file to Archive/Original/ folder...
File Sample_ORU.txt exists. Copying file to Archive/Original/ folder...
File sampledata.csv exists. Copying file to Archive/Original/ folder...


In [3]:
df.head()

Unnamed: 0,#,id,site_id,service_location,message_type,message_time,message_id,account_number,discharge_disposition,financial_class,...,bill_amount,patient_drivers_license_number,guarantor_first_name,guarantor_last_name,guarantor_middle_name,guarantor_address_1,guarantor_address_2,guarantor_city,guarantor_state,guarantor_zip
0,1,30056263,C22,1,ADT-A08,19:00.0,5885975,CA00001,,PPO,...,700,,Testing,MU,,12345,SOMEWHERE,VALENCIA,CA,91355
1,2,30056267,C22,1,ADT-A08,44:00.0,5885976,CA00001,,PPO,...,727,,Testing,MU,,12345,SOMEWHERE,VALENCIA,CA,91355
2,3,30056269,C22,1,ADT-A08,14:00.0,5885977,CA00001,,PPO,...,728,,Testing,MU,,12345,SOMEWHERE,VALENCIA,CA,91355
3,4,30058986,C22,1,ADT-A08,30:00.0,5886054,CA00001,,PPO,...,912,,Testing,MU,,12345,SOMEWHERE,VALENCIA,CA,91355
4,5,30630702,I99,1,ADT-A04,48:28.0,5886293,CA00003,,Self-pay,...,984,,QINEVAAcuteprone,Printegratedtest,A,951 S Hebron Ave,,EVANSVILLE,IN,47714


In [4]:
def parse_and_save_csv(df, message_type):
    parsed_df = df[df['message_type'].str.contains(message_type, na=False)]
    parsed_df.to_csv(f'./Archive/Modified/{message_type}_{today}_Modified_file.csv', index=False)
    return parsed_df

adt_df = parse_and_save_csv(df, 'ADT')

adt_df


Unnamed: 0,#,id,site_id,service_location,message_type,message_time,message_id,account_number,discharge_disposition,financial_class,...,bill_amount,patient_drivers_license_number,guarantor_first_name,guarantor_last_name,guarantor_middle_name,guarantor_address_1,guarantor_address_2,guarantor_city,guarantor_state,guarantor_zip
0,1,30056263,C22,1,ADT-A08,19:00.0,5885975,CA00001,,PPO,...,700,,Testing,MU,,12345,SOMEWHERE,VALENCIA,CA,91355
1,2,30056267,C22,1,ADT-A08,44:00.0,5885976,CA00001,,PPO,...,727,,Testing,MU,,12345,SOMEWHERE,VALENCIA,CA,91355
2,3,30056269,C22,1,ADT-A08,14:00.0,5885977,CA00001,,PPO,...,728,,Testing,MU,,12345,SOMEWHERE,VALENCIA,CA,91355
3,4,30058986,C22,1,ADT-A08,30:00.0,5886054,CA00001,,PPO,...,912,,Testing,MU,,12345,SOMEWHERE,VALENCIA,CA,91355
4,5,30630702,I99,1,ADT-A04,48:28.0,5886293,CA00003,,Self-pay,...,984,,QINEVAAcuteprone,Printegratedtest,A,951 S Hebron Ave,,EVANSVILLE,IN,47714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191,192,66085,I71,1,ADT-A02,28:35.0,3352202,CA00001,,P,...,861,,,,,,,,,
192,193,66088,I71,1,ADT-A02,28:53.0,3352205,CA00002,,P,...,485,,,,,,,,,
197,198,59157,I71,1,ADT-A04,05:10.0,3345345,CA00005,,P,...,771,,Uner,Upttest,,,,,IN,
198,199,59158,I71,1,ADT-A02,05:11.0,3345346,CA00006,,P,...,614,,,,,,,,,


In [5]:
oru_df = parse_and_save_csv(df, 'ORU')
oru_df

Unnamed: 0,#,id,site_id,service_location,message_type,message_time,message_id,account_number,discharge_disposition,financial_class,...,bill_amount,patient_drivers_license_number,guarantor_first_name,guarantor_last_name,guarantor_middle_name,guarantor_address_1,guarantor_address_2,guarantor_city,guarantor_state,guarantor_zip
120,121,2231379,I71,1,ORU-R01,57:35.0,3358130,CA00001,,P,...,661,,,,,,,,TN,


In [6]:
def extract_msg_fields(message):
    with open(message, 'r') as file_3:
        message = file_3.read()

        # Split the message by line
        lines = message.split('\n')
        
        # Define a dictionary to hold the extracted values
        extracted_data = {}
        
        # Extract data from PID segment
        pid_segment = [line for line in lines if line.startswith('PID')]
        if pid_segment:
            pid_fields = pid_segment[0].split('|')
            ### B. ADD LAST NAME
            extracted_data['patient_last_name'] = pid_fields[5].split('^')[0] if len(pid_fields) > 5 else None
            ### ADD FIRST NAME
            extracted_data['patient_first_name'] = pid_fields[5].split('^')[1] if len(pid_fields) > 5 else None
            ### ADD MIDDLE NAME
            extracted_data['patient_middle_name'] = pid_fields[5].split('^')[2] if len(pid_fields) > 5 and len(pid_fields[5].split('^')) > 2 else None
            ### ADD ADDRESS
            extracted_data['patient_address_1'] = pid_fields[11].split('^')[0] if len(pid_fields) > 11 else None
            ### ADD STATE
            extracted_data['patient_state'] = pid_fields[11].split('^')[3] if len(pid_fields) > 11 and len(pid_fields[11].split('^')) > 3 else None
            ### ADD ACCOUNT NUMBER
            extracted_data['account_number'] = pid_fields[3] if len(pid_fields) > 3 else None
            ### ADD BILL AMOUNT AS 1234
            extracted_data['bill_amount'] = 1234
            ### C. ADD DATE OF SERVICE
            extracted_data['date_of_service'] = today
            phone_match = re.search(r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', pid_fields[13])
            extracted_data['patient_phone_number'] = phone_match.group(0) if phone_match else None

        # Extract data from GT1 segment (Guarantor information)
        gt1_segment = [line for line in lines if line.startswith('GT1')]
        if gt1_segment:
            gt1_fields = gt1_segment[0].split('|')
            extracted_data['guarantor_first_name'] = gt1_fields[3].split('^')[0] if len(gt1_fields) > 3 else None
            extracted_data['guarantor_last_name'] = gt1_fields[3].split('^')[1] if len(gt1_fields) > 3 and len(gt1_fields[3].split('^')) > 1 else None
            extracted_data['guarantor_middle_name'] = gt1_fields[3].split('^')[2] if len(gt1_fields) > 3 and len(gt1_fields[3].split('^')) > 2 else None
            extracted_data['guarantor_address'] = gt1_fields[5].split('^')[0] if len(gt1_fields) > 5 else None

        return extracted_data

        
adt_parsed = extract_msg_fields('./Archive/Original/ADT_sample.txt')
print(adt_parsed)
oru_parsed = extract_msg_fields('./Archive/Original/Sample_ORU.txt')
print(oru_parsed)

{'patient_last_name': 'MOUSE', 'patient_first_name': 'MICKEY', 'patient_middle_name': 'LITTLE', 'patient_address': '100 WALDO AVE', 'patient_state': 'CA', 'account_number': '1395357', 'bill_amount': 1234, 'date_of_service': '17_08_2023', 'patient_phone_number': '(800)545-0000', 'guarantor_first_name': 'MINNIE', 'guarantor_last_name': 'MOUSE', 'guarantor_middle_name': 'ANN', 'guarantor_address': '100 SNOW WHITE LANE'}
{'patient_last_name': 'DUCK', 'patient_first_name': 'DONALD', 'patient_middle_name': 'L', 'patient_address': '1111 MATTERHORN RD', 'patient_state': 'FL', 'account_number': 'SA00401418', 'bill_amount': 1234, 'date_of_service': '17_08_2023', 'patient_phone_number': '999-999-9999'}


In [7]:
adt_df_from_txt = pd.concat([adt_df, pd.DataFrame([adt_parsed])], ignore_index=True)

oru_df_from_txt = pd.concat([oru_df, pd.DataFrame([oru_parsed])], ignore_index=True)


In [8]:
oru_df_from_txt

Unnamed: 0,#,id,site_id,service_location,message_type,message_time,message_id,account_number,discharge_disposition,financial_class,...,guarantor_last_name,guarantor_middle_name,guarantor_address_1,guarantor_address_2,guarantor_city,guarantor_state,guarantor_zip,patient_address,date_of_service,patient_phone_number
0,121.0,2231379.0,I71,1.0,ORU-R01,57:35.0,3358130.0,CA00001,,P,...,,,,,,TN,,,,
1,,,,,,,,SA00401418,,,...,,,,,,,,1111 MATTERHORN RD,17_08_2023,999-999-9999


In [9]:
adt_df_from_txt.to_csv(f'./Archive/Modified/ADT_{today}_Modified_file.csv', index=False)
oru_df_from_txt.to_csv(f'./Archive/Modified/ORU_{today}_Modified_file.csv', index=False)