
# #QuickGrab 

#### For cases where I open the file for the ONLY purpose of copying important code.

*This markdown cell and the following code cell were created after finishing this whole script


In [2]:
from pj_funcs import *

# Import past few days' PHESS data
file = pd.read_csv('../data/raw/export.csv', encoding = 'Cp1252')

# impliment my custom ADT-parsing function
df = NSSP_Element_Grabber(file,Timed=True)

100.2149727344513


# NSSP Element Grabber Explanation

### The Main Function

In [None]:
def NSSP_Element_Grabber(data,Timed = True, Priority_only=False, outfile='None'):
    '''
    Creates dataframe of important elements from PHESS data.
    
    Parameters
    ----------
    data: pandas DataFrame, required, from PHESS sql pull
    
    Timed:  Default is True.  Prints total runtime at end.
    Priority_only:  Default is False.  
        If True, only gives priority 1 or 2 elements
    outfile:  Default is 'None':
        Replace with file name for dataframe to be wrote to as csv
        DO NOT INCLUDE .csv IF YOU CHOOSE TO MAKE ONE
    
    Returns
    -------
    dataframe
        
    Requirements
    ------------
    - import pandas as pd
    - import numpy as np
    - import time
    '''
    # Start our runtime clock.
    start_time = time.time()
    
    
    # Read in reader file as pandas dataframe
    reader = pd.read_excel('../data/processed/NSSP_Element_Reader.xlsx')
    
    # Create empty dataframe with rows we want interpreted from reader file
    df = pd.DataFrame(columns=reader['Processed Column'])
    
    # Create a few extra columns straight from our data file
    df['MESSAGE'] = data['MESSAGE']
    df['FACILITY_NAME'] = data['FACILITY_NAME']
    df['PATIENT_VISIT_NUMBER'] = data['PATIENT_VISIT_NUMBER']
    df['PATIENT_MRN'] = data['PATIENT_MRN']

    # Create a subset of rows from our reader file.  Only ones to loop through.
    # Order by 'Group_Order' so that some run before others that rely on previous.
    reader_sub = reader[reader.Ignore == 0].sort_values('Group_Order')

    # Loop through all data rows
    for z in np.arange(0,len(data)):
        
        # Locate our message
        message = df['MESSAGE'][z]
        
        # Decipher using hl7 function
        m = hl7.parse(message)
        
        # For each row in our reader file subset
        for j in np.arange(0,len(reader_sub)):
            
            # Initialize object.  Don't want one recycled from last loop
            obj=''
            
            # Choose the row we will use from the reader file
            row = reader_sub.iloc[j]
            
            # Identify element name we're working with.  Also a column name in output dataframe
            col_name = str(row['Processed Column'])
            
            # Identify code from our reader file we use to find the element in the HL7 message
            subcode = row['Code']
            
            # Does executing this code (originally a string) cause an error?
            ### NOTE:  calling locals and globals allows you to access all home-grown functions
            if NoError(exec,subcode,globals(), locals()):
                
                # If no errors, execute the code.
                exec(subcode,globals(), locals())
                
    # End time stopwatch
    end_time = time.time()

    # Unless they did not want it, print runtime
    if Timed != False:
        print(end_time-start_time)
    
    # If they only want priority elements:
    if Priority_only==True:
        # left = all columns interpreted from reader file
        left = df.iloc[:,:-4] 
        # right = MESSAGE, FACNAME, PATIENT_VN, PATIENT_MRN
        right = df.iloc[:,-4:]
        # find all cols we want from reader file. Priority cols
        priority_cols = reader['Processed Column'][(reader['Priority'] == 1.0)|(reader['Priority'] == 2.0)]
        # Index our left set by these columns 
        col_cut = left.loc[:,priority_cols]
        # glue left indexed with right again
        df = col_cut.join(right)
        
    # If they want an output file...
    if outfile!='None':
        # Specify output path and add csv bit.
        outpath = '../data/processed/'+outfile+'.csv'
        # No index
        df.to_csv(outpath, index=False)
    
    # return the dataframe!
    return df

# README

#### Above, we have the main function that I use to parse an HL7 message for NSSP Priority Elements.  For each message in a dataset, I loop through a seperate dataframe (the reader file) that has important code that I use to locate an element within an HL7 message.  The exec() function allows me to acess this code and impliment it. 

#### All material below mostly describes each Element's code.





Core concepts to understand.
* I use an Excel Document as a 'reader' file.  Within this file:
    * Each row represents a NSSP Priority Element OR something to help support a NSSP element.  Supporting info can be a data type description, the data source of a specific element, or even a separate element within the list of a heirchally defined element.  
    * Important columns are:
        * Processed Column - Name of Element.  Will be in outputted dataframe.
        * HL7 Processed Element Description - Describes steps for how to locate an element within the HL7 message.
        * <b>Code - The python code as a string that will be executed in our main HL7 parsing function.</b>
        * Priority - Defined by NSSP.  1,2, or NaN (for supporting elements)
        * Group_Order - The order in which to find elements.  Elements that require supporting element information have a later order.
        * Ignore - 0 or 1.  1 indicates it WILL be ignored.  0 indicates that we will call it in our main HL7 Parsing function.  When ignore=0, the row contains valid Code.
   
* The code in our excel file is read in as a string in Python using a pandas.read_excel() function.  
    * The string is then interpreted as code using the exec('string') function.

* The code in the excel file is ugly and not annotated so that the exec() function doesn't have to repeatedly think to interperet new lines and comments.
    * The code is copied here. (How it looks)
    * An <u>annotated</u> version is also kept here (Annotated Version)

* If you are confused as to how an element is attained, use `CTRL+F` to search for it specifically in this document

<hr style="border:1px solid gray"> </hr>


\* Functions that work for multiple elements have example codes.  You may see something say 

`Field = ['ABC']`

This is just an example, there is no field called ABC

## Before the following codes executed in our main function's loop, we have already defined the following terms:
* m = hl7.parse(message)
* z = integer representing the row index
* col_name = current element being coded.  Also column name


<hr style="border:5px solid gray"> </hr>


## DI_One()

This function is the simplest situation:  where the HL7 Element is found in one described location directly inputted from the message.

### Direct Input from HL7 - One Element


16/43 fit this criteria

<ul>
    <li>Visit_ID </li>
    <li>Treating_Facility_ID   </li>
    <li>Admit_Date_Time (only take 1st 12 chars for datetime uniformity) </li>
    <li>Patient_Class_Code   </li>
    <li>Patient_Zip   </li>
    <li>Processing_ID   </li>
    <li>Trigger_Event   </li>
    <li>Message_Date_Time   </li>
    <li>Recorded_Date_Time   </li>
    <li>Discharge_Disposition   </li>
    <li>Discharge_Date_Time   </li>
    <li>Administrative_Sex   </li>
    <li>Patient_City   </li>
    <li>Patient_State  </li>
    <li>Patient_Country   </li>
    <li>Version_ID   </li>

</ul>

#### All of the mentioned elements above use the following function DI_One(ind,m,df,z,col_name) 

for more info on the function, run the following cell

In [None]:
DI_One?

In [None]:
########################
# How it looks  
########################

ind = ['ABC',1,1,1,1]
DI_One(ind,m,df,z,col_name) 

In [None]:
########################
# Annotated Version 
########################

# Choose exact HL7 path to element beginning with field
ind = ['ABC',1,1,1,1]

# Execute DI_One() function using the (index,message,dataframe,row_z,column_name)
DI_One(ind,m,df,z,col_name) 

### NOTE:  This function updates the respective cell in the dataframe 'df'.  See function's description for more detail.

### Direct Input from HL7 - 1st non-null of Two Elements (NO CONCETATION)

Here we describe a situation where we have been given two locations.  If an element exists in the first location, we use it to define our element.  Otherwise, search the second location and do the same thing.

3/43 

<ul>
    <li>Sending_Facility_ID  </li>
    <li>First_Patient_ID  </li>
    <li>Admit_Reason_Code  </li>
</ul>

In [None]:
########################
# How it looks  
########################

ind = ['ABC',0,0,0,0]
ind2 = ['DEF',1,1,1,1]
obj = DI_One(ind,m,df,z,col_name)
if len(obj)==0:
    DI_One(ind2,m,df,z,col_name)

In [None]:
########################
# Annotated Version 
########################

# Specify full index of 1st in hierarchy 
ind = ['ABC',0,0,0,0]

# Specify full index of 2nd in hierarchy 
ind2 = ['DEF',1,1,1,1]

# Run our DI_One function to pull element from message with path 'ind'
obj = DI_One(ind,m,df,z,col_name)

# If this doesn't work, do same but with 'ind2'
if len(obj)==0:
    DI_One(ind2,m,df,z,col_name)


### NOTE:  This function updates the respective cell in the dataframe 'df'.  See function's description for more detail.

## DI_One_CONC()

### Direct Input from HL7 - One Element (WITH CONCETATION)

There is only one case where this is used directly.  It is when we pull our information directly from one location, but the field may repeat which requires concatinating elements.

1/43

<ul>
    <li>Diagnosis_Type  </li>
</ul>

#### The mentioned element above uses the following function DI_One_CONC(field,ind,m,df,z,col_name) 

for more info on the function, run the following cell

In [None]:
DI_One_CONC?

In [None]:
########################
# How it looks  
########################

field = ['ABC']
ind = [0,0,0]
DI_One_CONC(field,ind,m,df,z,col_name)

In [None]:
########################
# Annotated Version 
########################

# Specify field as 1 element list
field = ['ABC']

# Specify rest of indeces
ind = [0,0,0]

# Execute DI_One() function using the (field,rest_of_index,message,dataframe,row_z,column_name)
DI_One_CONC(field,ind,m,df,z,col_name)


### NOTE:  This function updates the respective cell in the dataframe 'df'.  See function's description for more detail.

### Direct Input from HL7 - 1st non-null of Two Elements (WITH CONCETATION)

Similar to our other '1st non-null of two elements' definition but this time with the option of a repeating field whos elements can be concatenated.

6/43

<ul>
    <li>Admit_Reason_Description  </li>
    <li>Diagnosis_Code  </li>
    <li>Diagnosis_Description  </li>
    <li>Race_Code      </li>
    <li>Ethnicity_Code       </li>
    <li>Ethnicity_Description       </li>
</ul>

In [None]:
########################
# How it looks  
########################

field = ['ABC']
ind = [0,0,0]
field2 = ['DEF']
ind2 = [1,1,1]
obj = DI_One_CONC(field,ind,m,df,z,col_name)
if len(obj)==0:
    DI_One_CONC(field2,ind2,m,df,z,col_name)

In [None]:
########################
# Annotated Version 
########################

# Specify full index of 1st in hierarchy.  Since CONC required, do field and ind separately.
field = ['ABC']
ind = [0,0,0]

# Specify full index of 2nd in hierarchy.  Since CONC required, do field and ind separately.
field2 = ['DEF']
ind2 = [1,1,1]

# Run our DI_One_CONC function to pull element from message with path 'field' / 'ind'
obj = DI_One_CONC(field,ind,m,df,z,col_name)

# If empty, our DI_One_CONC function to pull element from message with path 'field2' / 'ind2'
if len(obj)==0:
    DI_One_CONC(field2,ind2,m,df,z,col_name)
    
### NOTE:  This function updates the respective cell in the dataframe 'df'.  See function's description for more detail.

# xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# ONE BY ONE


### For each NSSP Priority element below, we describe a unique code on how to access it from a HL7 message.  Some peices of code will write multiple elements to our output dataframe at once:

#### Example:  Reported_Age_Units is found at the same time as Reported_Age for the sake of saving computing runtime
# xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx



### C_Unique_Patient_ID
--------------------
Field must be populated.  If not populated, record is sent to the Exceptions table.

Scan the following fields/ HL7 segments and select the first non-null value:
* Medical_Record_Number
* Patient ID (PID-2.1) [Legacy]
* First_Patient_ID (PID-3)
* Patient_Account_Number (PID-18) 
* Visit_Number (PV1-19)


\* Also outputs C_Unique_Patient_ID_Data_source

In [None]:
########################
# How it looks  
########################

colz = ['Medical_Record_Number','Patient_ID','First_Patient_ID','Patient_Account_Number','Visit_ID']
for col in colz:
    var = df.loc[z,col]
    if var == var:
        df.loc[z,col_name] = var
        df.loc[z,(col_name+'_Data_Source')] = col
        break

In [None]:
########################
# Annotated Version 
########################


# Specify columns in hierarchical order.  first choice -> last choice
colz = ['Medical_Record_Number','Patient_ID','First_Patient_ID','Patient_Account_Number','Visit_ID']

# Loop through our heirchical options
for col in colz:
    
    # Select our value of this element in the current row, z
    var = df.loc[z,col]
    
    # If non-null, then we update this rows' C_Unique_Patient_ID cell and track our Data Source. Once we do this, break the loop.
    if var == var:
        df.loc[z,col_name] = var
        df.loc[z,(col_name+'_Data_Source')] = col
        break

### Facility_Type_Code
--------------------
OBX-5 Segment where
* OBX-3 Observation Identifier SS003^Facility/visit type
* OBX-2="CWE"

Populate this field with the OBX-5.1 value (standard code) if it exists. ELSE use the OBX-5.4 (local code) value.

If more than one OBX segment is sent that contains facility type information, only the FIRST OBX segment will be considered.

 \*Also outputs Facility_Type_Code_OBX2
 \*Also outputs Facility_Type_Code_OBX3

In [None]:
########################
# How it looks  
########################

if NoError(index,m,'OBX'):
    obxs = m['OBX']
    pat = 'SS003'
    pattern = re.compile(pat,re.M|re.I)
    for q in np.arange(0,len(obxs)):
        five = ''
        obx = obxs[q]
        searchme = str(obx)
        match = re.findall(pattern,searchme)
        num_matches = len(match)
        if num_matches > 0:
            two = str(obx[2])
            three = str(obx[3])
            five = obx[5]
            if len(str(five[0])) > 0:
                while type(five) != str:
                    five = five[0]
            else:
                five = str(five[3])
                
            if len(five)>0:
                df.loc[z,col_name] = five
                df.loc[z,col_name+'_OBX2'] = two
                df.loc[z,col_name+'_OBX3'] = three
                break


In [None]:
########################
# Annotated Version
########################


# Check to see we can index OBX
if NoError(index,m,'OBX'):
    
    # Get list of all repeated OBX fields
    obxs = m['OBX']
    
    # Specify key pattern to look for in OBX field's string and compile in RegEx
    pat = 'SS003'
    pattern = re.compile(pat,re.M|re.I)
    
    # Loop through all repeated OBX fields
    for q in np.arange(0,len(obxs)):
        
        # Initialize OBX5
        five = ''
        # Select current obx field and create a string copy of it
        obx = obxs[q]
        searchme = str(obx)
        
        # Check to see if the pattern is in the string match
        match = re.findall(pattern,searchme)
        num_matches = len(match)
        
        # If there is a match...
        if num_matches > 0:
            
            # Pick out OBX2, OBX3, OBX5
            two = str(obx[2])
            three = str(obx[3])
            five = obx[5]
            
            # If five[0] is non-empty, index it with a 0 until it is a string
            if len(str(five[0])) > 0:
                while type(five) != str:
                    five = five[0]
            
            # Otherwise, choose five[3]
            else:
                five = str(five[3])
    
            
            # Append respective OBX5.# if it is non-empty.
            # Also append OBX2 and OBX3 to Facility_Type_Code_OBX# respectively for option to later do a data validity test 
            if len(five)>0:
                df.loc[z,col_name] = five
                df.loc[z,col_name+'_OBX2'] = two
                df.loc[z,col_name+'_OBX3'] = three2
                
                # "If more than one OBX segment is sent that contains facility type information, only the FIRST OBX segment will be considered" 
                break
                
                

### Chief_Complaint_Text
--------------------
OBX-5 segments where:
* OBX-3 Observation Identifier is 8661-1 and/or 11292-0
* OBX-2 = "TX" or "CWE" or "CW"

Select all non-null values and concatenate:
IF OBX-2="TX" then Chief_Complaint_Text = OBX-5.1
IF OBX-2="CWE" or "CW" then Chief_Complaint_Text = concatenate(OBX-5.9, OBX-5.2, OBX-5.5)

 \*Also outputs Chief_Complaint_Text_OBX2

\*Also outputs Chief_Complaint_Text_OBX3

# IMPORTANT NOTE - 

Reading the description above, we see that there is a chance that we will have multiple chief complaint fields.  If this is the case, we will have to concatenate the multiple fields.  We will denote this field concetation with the '|' character.

There is also a chance that if OBX-2 = CW/CWE we will have to concatenate multiple subcomponents.  We will denote this sub-concetation with a '^' character.

While it is rare for the Chief Complaint field to repeat, there is a chance that we will end up with a chief_complaint_text similar to the example below.

`CC_Text = OBX.1.5.9^OBX.1.5.2^OBX.1.5.5 | OBX.4.5.9^OBX.4.5.2^OBX.4.5.5`

Above the repeating field is OBX|1| and OBX|4|

In [None]:
########################
# How it looks  
########################

pat = '(8661-1)|(11292-0)'
pattern = re.compile(pat,re.M|re.I)
fives = []
twos = []
threes = []
if NoError(index,m,'OBX'):
    sect = index(m,'OBX')
    for j in np.arange(0,len(sect)):
        obx = sect[j]
        searchme = str(obx)
        match = re.findall(pattern,searchme)
        two = ''
        three = ''
        five = ''
        if len(match)>0:
            if NoError(index,obx,2) & NoError(index,obx,3) & NoError(index,obx,5):
                two = str(obx[2])
                three = str(obx[3])
                if (two == 'CW')|(two == 'CWE'):
                    pt1 = ''
                    pt2 = ''
                    pt3 = ''
                    five = obx[5]
                    if NoError(index_n,five,[0,8]):
                        pt1 = str(index_n(five,[0,8]))
                    if NoError(index_n,five,[0,1]):
                        pt2 = str(index_n(five,[0,1]))
                    if NoError(index_n,five,[0,4]):
                        pt3 = str(index_n(five,[0,4]))
                        
                    fife_dog = '^'.join([pt1,pt2,pt3])
                    if len(fife_dog.replace('^',''))>0:
                        fives.append(fife_dog)
                        twos.append(two)
                        threes.append(three)
                elif (two == 'TX'):
                    five = obx[5]
                    while type(five) != str:
                        five = five[0]
                    fives.append(five)
                    twos.append(two)
                    threes.append(three)

    entry_5 = '|'.join(fives)
    entry_2 = '|'.join(twos)
    entry_3 = '|'.join(threes)
    
    if len(entry_5)>0:
        df.loc[z,col_name] = entry_5
        df.loc[z,col_name+'_OBX2'] = entry_2
        df.loc[z,col_name+'_OBX3'] = entry_3

In [None]:
########################
# Annotated Version 
########################

# Specify pattern and compile in RegEx
pat = '(8661-1)|(11292-0)'
pattern = re.compile(pat,re.M|re.I)

# Create empty array for concentated elements to be appended to.  Later merged into 1 string
fives = []
twos = []
threes = []

# Check to see if OBX can be indexed into
if NoError(index,m,'OBX'):
    
    # If so, define obx field as 'sect'
    sect = index(m,'OBX')
    
    #  Loop through repeated OBX fields
    for j in np.arange(0,len(sect)):
        
        # Define obx field in question as 'obx' and create string copy of it called 'searchme'
        obx = sect[j]
        searchme = str(obx)
        
        # Search for RegEx keyword matches within obx string
        match = re.findall(pattern,searchme)
        
        # Initialize OBX2,3,5
        two = ''
        three = ''
        five = ''
        
        # If we have any matches....
        if len(match)>0:
            
            # Check to see if OBX2,OBX3,and OBX5 exist
            if NoError(index,obx,2) & NoError(index,obx,3) & NoError(index,obx,5):
                
                # Define OBX2,3
                two = str(obx[2])
                three = str(obx[3])
                
                # If OBX2 == (CW or CWE)
                if (two == 'CW')|(two == 'CWE'):
                    
                    # Initialize parts
                    pt1 = ''
                    pt2 = ''
                    pt3 = ''
                    five = obx[5]
                    
                    # See if OBX-5.9 exists.  If so, re-define pt 1.
                    if NoError(index_n,five,[0,8]):
                        pt1 = str(index_n(five,[0,8]))
                    
                    # See if OBX-5.2 exists.  If so, re-define pt 2.
                    if NoError(index_n,five,[0,1]):
                        pt2 = str(index_n(five,[0,1]))
                        
                    # See if OBX-5.5 exists.  If so, re-define pt 3.
                    if NoError(index_n,five,[0,4]):
                        pt3 = str(index_n(five,[0,4]))
                        
                    # Define a concetated version (by '^' character) of OBX-5.9,OBX-5.2,OBX-5.5
                    fife_dog = '^'.join([pt1,pt2,pt3])
                    
                    # If it isn't just a string of carrots...
                    if len(fife_dog.replace('^',''))>0:
                        
                        # Append everything 
                        fives.append(fife_dog)
                        twos.append(two)
                        threes.append(three)
                    
                # If OBX2 == TX
                elif (two == 'TX'):
                    
                    # Index 5...should work.
                    five = obx[5]
                    
                    # Index OBX5 with a 0 until its a string.
                    while type(five) != str:
                        five = five[0]
                        
                    # Append all of them to the list 'fives'
                    fives.append(five)
                    twos.append(two)
                    threes.append(three)

    # After all repeated fields looped through concetate elements with '|' character
    entry_5 = '|'.join(fives)
    entry_2 = '|'.join(twos)
    entry_3 = '|'.join(threes)
    
    # If we have non-zero length entry for OBX-5, append OBX2,OBX3, and OBX5 parts to respective dataframe locations.
    if len(entry_5)>0:
        df.loc[z,col_name] = entry_5
        df.loc[z,col_name+'_OBX2'] = entry_2
        df.loc[z,col_name+'_OBX3'] = entry_3

### C_FacType_Patient_Class
--------------------
Perform calculation based on another database variable.

IF Facility_Type_Code is null then C_FacType_Patient_Class is null
ELSE IF Facility_Type_Code=261QE0002X then C_FacType_Patient_Class=E
ELSE IF Facility_Type_Code=1021-5 then C_FacType_Patient_Class=I
ELSE IF Facility_Type_Code=261QM2500X then C_FacType_Patient_Class=O
ELSE IF Facility_Type_Code=261QP2300X then C_FacType_Patient_Class=O
ELSE IF Facility_Type_Code=261QU0200X then C_FacType_Patient_Class=O

 \*Also outputs C_FacType_Patient_Class_Data_Source

In [None]:
########################
# How it looks  
########################

fac_code = str(df.loc[z,'Facility_Type_Code'])

if fac_code == '261QE0002X':
    df.loc[z,col_name] = 'E'
elif fac_code == '1021-5':
    df.loc[z,col_name] = 'I'
elif (fac_code == '261QM2500X')|(fac_code == '261QP2300X')|(fac_code == '261QU0200X'):
    df.loc[z,col_name] = 'O'
    
df.loc[z,col_name+'_Data_Source'] = 'Facility_Type_Code'

In [None]:
########################
# Annotated Version  
########################

# Locate facility type code from our row (previously attained)
fac_code = str(df.loc[z,'Facility_Type_Code'])

# If it is code below, C_FacType_Patient_Class = 'E'
if fac_code == '261QE0002X':
    df.loc[z,col_name] = 'E'
    
# If it is code below, C_FacType_Patient_Class = 'E'
elif fac_code == '1021-5':
    df.loc[z,col_name] = 'I'
    
# If it is code below, C_FacType_Patient_Class = 'E'
elif (fac_code == '261QM2500X')|(fac_code == '261QP2300X')|(fac_code == '261QU0200X'):
    df.loc[z,col_name] = 'O'

# All Data sources stem from Facility_Type_Code.  Assign this data_source in dataframe.
df.loc[z,col_name+'_Data_Source'] = 'Facility_Type_Code'

### C_Patient_Class
--------------------
Calculated patient class is set to: 
1) Patient_Class_Code (PV1-2) if it is non-null AND valid according to the PHIN standard (D, E, I, V, B, O, P, R)
2) ELSE use a mapped Patient_Class_Code (PV1-2), if non-null AND not valid AND one of the following mapped values: 
    - Emergency: E 
    - Inpatient: I 
    - Observation: V
     - EMER: E 
    - EMERGENCY: E
    - INPATIENT: I
    - HOS: I
    - OBSERVE: V
    - Emergency Department: E
    - Outpatient: O
    - ICU: I
     - Obsv: V
     - OUTPATIENT: O
     - ER: E
3) ELSE use C_FacType_Patient_Class
4) ELSE assign class value based on the inferred patient class associated with the primary entry on the MFT (C_MFT_Patient_Class).  (Requires a look-up to the MFT table for the entry flagged as primary with Site_ID-C_Facility_ID; return the Inferred_Patient_Class value matching that entry)

 \*Also outputs C_Patient_Class_Data_Source

In [None]:
########################
# How it looks 
########################

#
class_code = (df.loc[z,'Patient_Class_Code'])
trigger = 0
while trigger == 0:
    if class_code ==  class_code:
        class_code = str(class_code).upper()
        if class_code in ['D', 'E', 'I', 'V', 'B', 'O', 'P', 'R']:
            df.loc[z,col_name] = class_code
            df.loc[z,col_name+'_Data_Source'] = 'Patient_Class_Code'
            trigger = 1
            break
        else:
            dicts1 = ['OBS','EMER','ER','INPATIENT','ICU','OUTPATIENT']
            dicts2 = ['V','E','E','I','I','O']
            for g in np.arange(0,len(dicts1)):
                if dicts1[g] in class_code:
                    df.loc[z,col_name] = dicts2[g]
                    df.loc[z,col_name+'_Data_Source'] = 'Patient_Class_Code_Mapping'
                    trigger = 1
                    break
    C_FacType_Patient_Class = (df.loc[z,'C_FacType_Patient_Class'])
    if C_FacType_Patient_Class == C_FacType_Patient_Class:
        df.loc[z,col_name] = C_FacType_Patient_Class
        df.loc[z,col_name+'_Data_Source'] = 'C_FacType_Patient_Class'
        trigger = 1
    trigger = 1



In [None]:
########################
# Annotated Version
########################

# Locate Patient_Class_Code from current row in dataframe (previously attained)
class_code = (df.loc[z,'Patient_Class_Code'])

# Initialize a trigger
trigger = 0

# Loop through until trigger is not 0
while trigger == 0:
    
    # if class code is non-null...
    if class_code ==  class_code:
        
        # Take uppercase letter if it isn't already uppercase
        class_code = str(class_code).upper()
        
        # If it is one of these letters...
        if class_code in ['D', 'E', 'I', 'V', 'B', 'O', 'P', 'R']:
            
            # Then C_Patient_Class = Patient_Class_Code.  
            df.loc[z,col_name] = class_code
            
            # Record data source of 'Patient_Class_Code'
            df.loc[z,col_name+'_Data_Source'] = 'Patient_Class_Code'
            
            # Put trigger = 0 / break loop
            trigger = 1
            break
        
        # Maybe Patient_Class_Code isn't one of those letters...now what?
        else:
            
            # dicts1 are keywords
            dicts1 = ['OBS','EMER','ER','INPATIENT','ICU','OUTPATIENT']
            
            # dicts2 has same length but indicates what letter dicts1 maps to
            dicts2 = ['V','E','E','I','I','O']
            
            # Loop through dicts1
            for g in np.arange(0,len(dicts1)):
                
                # If that keyword is somewhere in the Patient_Class_Code entry...
                if dicts1[g] in class_code:
                    
                    # Then C_Patient_Class = dicts2[g] (what letter the keyword maps to)
                    df.loc[z,col_name] = dicts2[g]
                    
                    # Record our data source
                    df.loc[z,col_name+'_Data_Source'] = 'Patient_Class_Code_Mapping'
                    
                    # Set trigger / break to exit loop
                    trigger = 1
                    break
                    
    # Locate C_FacType_Patient_Class from current row in dataframe (previously attained)
    C_FacType_Patient_Class = (df.loc[z,'C_FacType_Patient_Class'])
    
    # If it is non-null...
    if C_FacType_Patient_Class == C_FacType_Patient_Class:
        
        # Then C_Patient_Class = C_FacType_Patient_Class
        df.loc[z,col_name] = C_FacType_Patient_Class
        
        # Record data source
        df.loc[z,col_name+'_Data_Source'] = 'C_FacType_Patient_Class'
        
        # Change trigger.  It will break.
        trigger = 1
        
    # If nothing happened, change trigger. We can't find the element.  Cell will remain as np.NaN
    trigger = 1


 
### C_Death
--------------------
Set to "Yes" IF:
* PID-30.1 (Patient_Death_Indicator) = First letter of "Y" and/or
* PID-29.1 is not null and/or
* PV1-36.1 contains "20," "22," "23," "24," "25," "26," "27," "28," "29," "40," "41," or "42"

ELSE set to "No"

 \*Also outputs C_Death_Data_Source

In [None]:
########################
# How it looks
########################

trigger = 0
while trigger == 0:
    PDI = df.loc[z,'Patient_Death_Indicator']
    if PDI == PDI:
        if str(PDI)[0].upper() == 'Y':
            df.loc[z,col_name] = 'Yes'
            df.loc[z,col_name+'_Data_Source'] = 'Patient_Death_Indicator'
            trigger = 1
            break
    Death_DT = df.loc[z,'Patient_Death_DateTime']
    if Death_DT == Death_DT:
        df.loc[z,col_name] = 'Yes'
        df.loc[z,col_name+'_Data_Source'] = 'Patient_Death_DateTime'
        trigger = 1
        break
    Dis_Disp = df.loc[z,'Discharge_Disposition']
    if Dis_Disp == Dis_Disp:
        dead_keys = ['20','22','23','24','25','26','27','28','29','40','41','42']
        for g in np.arange(0,len(dead_keys)):
            if dead_keys[g] in str(Dis_Disp):
                df.loc[z,col_name] = 'Yes'
                df.loc[z,col_name+'_Data_Source'] = 'Discharge_Disposition'
                break       
    if trigger == 0:
        df.loc[z,col_name] = 'No'
        trigger = 1
        break
    

In [None]:
########################
# Annotated Version
########################

# Initialize trigger
trigger = 0

# Loop
while trigger == 0:
    
    # Locate PDI from dataframe row (already solved for)
    PDI = df.loc[z,'Patient_Death_Indicator']
    
    # If it is non-null
    if PDI == PDI:
        
        # See if the first letter in uppercase is Y
        if str(PDI)[0].upper() == 'Y':
            
            # If so, C_Death = 'Yes'
            df.loc[z,col_name] = 'Yes'
            
            # Record Data source and break loop
            df.loc[z,col_name+'_Data_Source'] = 'Patient_Death_Indicator'
            trigger = 1
            break
            
    # Locate Death_DT from dataframe row (already solved for)
    Death_DT = df.loc[z,'Patient_Death_DateTime']
    
    # If it is non-null...
    if Death_DT == Death_DT:
        
        # Then C_Death = 'Yes'
        df.loc[z,col_name] = 'Yes'
        
        # Record Data Source and break loop
        df.loc[z,col_name+'_Data_Source'] = 'Patient_Death_DateTime'
        trigger = 1
        break
        
    # Locate Dicharge Disp from dataframe row (already solved for)
    Dis_Disp = df.loc[z,'Discharge_Disposition']
    
    # If it is non-null...
    if Dis_Disp == Dis_Disp:
        
        # Define discharge disposition keys that indicate death
        dead_keys = ['20','22','23','24','25','26','27','28','29','40','41','42']
        
        # Loop through dead keys
        for g in np.arange(0,len(dead_keys)):
            
            # If the dichage disposition contains this key, the patient died
            if dead_keys[g] in str(Dis_Disp):
                
                # Then C_Death = 'Yes'
                df.loc[z,col_name] = 'Yes'
                
                # Record Data Source and break the loop
                df.loc[z,col_name+'_Data_Source'] = 'Discharge_Disposition'
                break      
                
    #If we made it this far and the trigger is still 0...
    if trigger == 0:
        
        # Then the patient didn't die!
        
        # Record 'No'.  Since we didn't really use a data source, don't record one
        df.loc[z,col_name] = 'No'
        
        # Change trigger and break.
        trigger = 1
        break
    

### Medical_Record_Number
--------------------
Direct input from HL7 message, select the FIRST non-null PID-3.1 value 
WHERE
PID-3.5 = "MR" (Medical Record Number)

In [None]:
########################
# How it looks
########################

pid35 = Index_pull(['PID',0,3,0,4,0],m)
if pid35 == 'MR':
    ind = ['PID',0,3,0,0]
    DI_One(ind,m,df,z,col_name)

In [None]:
########################
# Annotated Version
########################

# Locate/define PID-3.5
pid35 = Index_pull(['PID',0,3,0,4,0],m)

# If the PID-3.5 == 'MR'
if pid35 == 'MR':
    
    # Define indeces for PID-3.1
    ind = ['PID',0,3,0,0]
    
    # Use DI_One() function to append info to our array
    DI_One(ind,m,df,z,col_name)

### Age_Reported
--------------------
OBX-5 segment where OBX-3 observation identifier of 21612-7 AGE-REPORTED (LOINC) and OBX-2 Value Type="NM"

NOTE: If the Age Reported measurement reported does not "fit" into the decimal(6,2) datatype, the measurement will not be stored in Age_Reported. The string value of the measurement will still be stored in Str_Age_Reported.

 \*Also outputs Age_Reported_OBX2

\*Also outputs Age_Reported_OBX3


# Important Note:

When I tested my code on over 1,000 sample PHESS data rows, none of them fit into the decimal(6,2) datatype.  An example of this datatype would be 0025.00 for the age of 25.  Again, I could not find any cases where I saw this.  

My decision is to instead list off the integer value in OBX-5.  Just using intuition.



In [None]:
########################
# How it looks
########################

if NoError(index,m,'OBX'):
    five = ''
    obxs = m['OBX']
    pat = '21612-7'
    pattern = re.compile(pat,re.M|re.I)
    for q in np.arange(0,len(obxs)):
        obx = obxs[q]
        searchme = str(obx)
        match = re.findall(pattern,searchme)
        num_matches = len(match)
        if num_matches > 0:
            units = ''
            two = str(obx[2])
            three = str(obx[3])
            five = obx[5]
            if str(two).upper() == 'NM':
                while type(five) != str:
                    five = five[0]
            units = Index_pull([6,0,1],obx)
            if len(five)>0:
                df.loc[z,col_name] = five
                df.loc[z,col_name+'_OBX2'] = two
                df.loc[z,col_name+'_OBX3'] = three
                
                df.loc[z,'Age_Units_Reported'] = units
                df.loc[z,'Age_Units_Reported_OBX2'] = two
                df.loc[z,'Age_Units_Reported_OBX3'] = three


In [None]:
########################
# Annotated Version
########################

# See if message has an 'OBX' field
if NoError(index,m,'OBX'):
    
    # Initialize OBX5
    five = ''
    
    # Call OBX repeated fields 'obxs'
    obxs = m['OBX']
    
    # Define and compile the keyword pattern in RegEx
    pat = '21612-7'
    pattern = re.compile(pat,re.M|re.I)
    
    # Loop through OBX fields
    for q in np.arange(0,len(obxs)):
        
        # Define current obx field as 'obx' and a string version as 'searchme'
        obx = obxs[q]
        searchme = str(obx)
        
        # Use RegEx to look for matches
        match = re.findall(pattern,searchme)
        num_matches = len(match)
        
        # If the number of keyword matches is non-zero...
        if num_matches > 0:
            
            # Initialize units
            units = ''
            
            # Locate OBX2,3,5
            two = str(obx[2])
            three = str(obx[3])
            five = obx[5]
            
            # If OBX-2 reflects numeric datatype...
            if str(two).upper() == 'NM':
                
                # Index OBX-5 by 0 until it is a string
                while type(five) != str:
                    five = five[0]
            
            # Pull out units (OBX-6.2)
            units = Index_pull([6,0,1],obx)
            
            # If we have an non-zero length age...
            if len(five)>0:
                
                # Append OBX-5,2,3 into our dataframe in the Age_Reported related cells
                df.loc[z,col_name] = five
                df.loc[z,col_name+'_OBX2'] = two
                df.loc[z,col_name+'_OBX3'] = three
                
                # Append OBX-5,2,3 into our dataframe in the Age_Units_Reported related cells
                df.loc[z,'Age_Units_Reported'] = units
                df.loc[z,'Age_Units_Reported_OBX2'] = two
                df.loc[z,'Age_Units_Reported_OBX3'] = three

### C_Patient_County
--------------------
Return the first non-null value from:
* PID-11.9
* PID-12.1

 \*Also outputs C_Patient_County_Data_Source

In [None]:
########################
# How it looks
########################

pid119 = Index_pull(['PID',0,11,0,8],m)

if len(pid119) > 0:
    df.loc[z,col_name] = pid119
    df.loc[z,col_name+'_Data_Source'] = 'County_Code'

else:
    pid121 = Index_pull(['PID',0,12,0,0],m)
    if len(pid121) > 0:
        df.loc[z,col_name] = pid119
        df.loc[z,col_name+'_Data_Source'] = 'PID-12.1'

In [None]:
########################
# Annotated Version
########################

# Look at PID-11.9 WITHOUT actually appending it to a dataframe
pid119 = Index_pull(['PID',0,11,0,8],m)

# If PID-11.9 has non-zero length...
if len(pid119) > 0:
    
    # then C_Patient_County = PID-11.9
    df.loc[z,col_name] = pid119
    
    # Record Data Source
    df.loc[z,col_name+'_Data_Source'] = 'County_Code'

# Otherwise...
else:
    
    # Look at PID-12.1 WITHOUT actually appending it to a dataframe
    pid121 = Index_pull(['PID',0,12,0,0],m)
    
    # If PID-12.1 has non-zero length...
    if len(pid121) > 0:
        
        # then C_Patient_County = PID-12.1
        df.loc[z,col_name] = pid119
        
        # Record Data Source
        df.loc[z,col_name+'_Data_Source'] = 'PID-12.1'

C_Patient_Age
--------------------
Select the first non-null value from:
* C_Visit_Date - Birth_Date**
* Age_Reported 
* Age_Calculated 

** when performing the calculation, store the numeric year value if the value >= 2 years (round down to the nearest integer); else, store the number of months and round down to the nearest integer (i.e., 4.9 rounds to 4; 5.2 rounds to 5). If the number resolved is greater than 150 years, the Birth_Date_Time column will be set to NULL and the calculation will continue down the hierarchy to Age_Reported or Age_Calculated. If the value resolved is less than 1 month, store 0. 
Otherwise, we use the age and companion units from the message as hierarchically defined.

 \*Also outputs C_Patient_Age_Data_Source

In [None]:
########################
# How it looks
########################


c_visit_date = df.loc[z,'C_Visit_Date']
birth = df.loc[z,'Birth_Date_Time']


if (c_visit_date == c_visit_date)&(birth == birth):
    c_visit_date = pd.to_datetime(c_visit_date[:8])
    birth = pd.to_datetime(birth[:8])
    years = int((c_visit_date - birth).days / 365.25)
    if years<2:
        df.loc[z,col_name] = int((c_visit_date - birth).days / (30.4375))
        df.loc[z,col_name+'_Data_Source'] = 'C_Visit_Date - Birth_Date'
        df.loc[z,'C_Patient_Age_Units'] = 'Months'
        df.loc[z,'C_Patient_Age_Units_Data_Source'] = 'C_Visit_Date - Birth_Date'
    elif (years>=2)&(years<150):
        df.loc[z,col_name] = years
        df.loc[z,col_name+'_Data_Source'] = 'C_Visit_Date - Birth_Date'
        df.loc[z,'C_Patient_Age_Units'] = 'Years'
        df.loc[z,'C_Patient_Age_Units_Data_Source'] = 'C_Visit_Date - Birth_Date'
        
else:
    age_reported = df.loc[z,'Age_Reported']
    age_units_reported = df.loc[z,'Age_Units_Reported']
    
    if (age_reported == age_reported)&(age_units_reported == age_units_reported):
        df.loc[z,col_name] = age_reported
        df.loc[z,col_name+'_Data_Source'] = 'Age_Reported'
        df.loc[z,'C_Patient_Age_Units'] = age_units_reported
        df.loc[z,'C_Patient_Age_Units_Data_Source'] = 'Age_Units_Reported'
        
    else:
        cal_age = df.loc[z,'Age_Calculated']
    
        if (cal_age == cal_age):
            df.loc[z,col_name] = cal_age
            df.loc[z,col_name+'_Data_Source'] = 'Age_Calculated'
        
    

In [None]:
########################
# Annotated Version
########################

# Locate our C_Visit_Date and Birth_Date_Time
c_visit_date = df.loc[z,'C_Visit_Date']
birth = df.loc[z,'Birth_Date_Time']

# If both of these are non-null...
if (c_visit_date == c_visit_date)&(birth == birth):
    
    # Only take the YYYYMMDD values of these anc convert to datetime
    c_visit_date = pd.to_datetime(c_visit_date[:8])
    birth = pd.to_datetime(birth[:8])
    
    # Take the difference between the two datetimes in days, divide by days in a year, and round down to integer
    years = int((c_visit_date - birth).days / 365.25)
    
    # If they're less than 2 years old
    if years<2:
        
        # Then C_Patient_Age should be age in months...calculate this similarly to years, but divide by days in an avg month
        df.loc[z,col_name] = int((c_visit_date - birth).days / (30.4375))
        
        # Record Data Source
        df.loc[z,col_name+'_Data_Source'] = 'C_Visit_Date - Birth_Date'
        
        # Record Age units as months and record that Data Source
        df.loc[z,'C_Patient_Age_Units'] = 'Months'
        df.loc[z,'C_Patient_Age_Units_Data_Source'] = 'C_Visit_Date - Birth_Date'
        
    # If they are older than 2 and less than 150 years old...
    elif (years>=2)&(years<150):
        
        # record age, data source, age units, age units data source
        df.loc[z,col_name] = years
        df.loc[z,col_name+'_Data_Source'] = 'C_Visit_Date - Birth_Date'
        df.loc[z,'C_Patient_Age_Units'] = 'Years'
        df.loc[z,'C_Patient_Age_Units_Data_Source'] = 'C_Visit_Date - Birth_Date'

# At least one of the two values was null...
else:
    # Find age_reported and age_units_reported
    age_reported = df.loc[z,'Age_Reported']
    age_units_reported = df.loc[z,'Age_Units_Reported']
    
    # If both of these are non-null
    if (age_reported == age_reported)&(age_units_reported == age_units_reported):
        
        # Then C_Patient_Age = Age_Reported.  Document this data source
        df.loc[z,col_name] = age_reported
        df.loc[z,col_name+'_Data_Source'] = 'Age_Reported'
        
        # Then C_Patient_Age_Units = Age_Units_Reported.  Document this data source
        df.loc[z,'C_Patient_Age_Units'] = age_units_reported
        df.loc[z,'C_Patient_Age_Units_Data_Source'] = 'Age_Units_Reported'
        
    # Now either age_reported or age_units_reported was null...
    else:
        
        # Locate Age_Calculated in our dataframe row
        cal_age = df.loc[z,'Age_Calculated']
        
        # If it is non-null...
        if (cal_age == cal_age):
            
            # Then C_Patient_Age = Age_Calculated.  Record data sourec.
            df.loc[z,col_name] = cal_age
            df.loc[z,col_name+'_Data_Source'] = 'Age_Calculated'
    
    # If nothing works, we cannot find it and C_Patient_Age will be np.NaN

### Age Calculated

OBX-5 segment where OBX-3 segment ID '29553-5'

NOTE: If the Age Calculated measurement reported does not "fit" into the decimal(6,2) datatype, the measurement will not be stored in Age_Calculated. The string value of the measurement will still be stored in Str_Age_Calculated.

In [None]:
########################
# How it looks
########################

if NoError(index,m,'OBX'):
    five = ''
    obxs = m['OBX']
    pat = '29553-5'
    pattern = re.compile(pat,re.M|re.I)
    for q in np.arange(0,len(obxs)):
        obx = obxs[q]
        searchme = str(obx)
        match = re.findall(pattern,searchme)
        num_matches = len(match)
        if num_matches > 0:
            five = obx[5]
            while type(five) != str:
                five = five[0]
            if len(five)>0:
                df.loc[z,col_name] = five

In [None]:
########################
# Annotated Version
########################

# See if message has an OBX field
if NoError(index,m,'OBX'):

    # Initialize OBX-5
    five = ''
    
    # Call list of OBX fields 'obxs'
    obxs = m['OBX']
    
    # Define the pattern we are looking for and compile it using RegEx
    pat = '29553-5'
    pattern = re.compile(pat,re.M|re.I)
    
    # Loop through all the 'OBX' fields.
    for q in np.arange(0,len(obxs)):
        
        # Define the 'OBX' field we are looking at and keep a string copy as 'searchme'
        obx = obxs[q]
        searchme = str(obx)
        
        # Check for matches using RegEx
        match = re.findall(pattern,searchme)
        num_matches = len(match)
        
        #If there are matches
        if num_matches > 0:
            
            # Index OBX-5
            five = obx[5]
            
            # Index with a 0 until it is a string
            while type(five) != str:
                five = five[0]
                
            # If we found something with non-zero length then we've found it!  Append to our dataframe.
            if len(five)>0:
                df.loc[z,col_name] = five

### C_Chief_Complaint
--------------------
Hierarchically defined (select first non-null):
* Chief_Complaint_Text
* Admit_Reason_Description

 \*Also outputs C_Chief_Complaint_Data_Source

In [None]:
########################
# How it looks
########################

Chief_Complaint_Text = df.loc[z,'Chief_Complaint_Text']

if (Chief_Complaint_Text==Chief_Complaint_Text):
    df.loc[z,col_name] = Chief_Complaint_Text
    df.loc[z,col_name+'_Data_Source'] = 'Chief_Complaint_Text'
    
else:
    Admit_Reason_Description = df.loc[z,'Admit_Reason_Description']
    if (Admit_Reason_Description==Admit_Reason_Description):
        df.loc[z,col_name] = Admit_Reason_Description
        df.loc[z,col_name+'_Data_Source'] = 'Admit_Reason_Description'


In [None]:
########################
# Annotated Version
########################

# Locate Chief_Complaint_Text from our dataframe row
Chief_Complaint_Text = df.loc[z,'Chief_Complaint_Text']

# If it is non-null...
if (Chief_Complaint_Text==Chief_Complaint_Text):
    
    #  Then C_Chief_Complaint = Chief_Complaint_Text.  Record data source
    df.loc[z,col_name] = Chief_Complaint_Text
    df.loc[z,col_name+'_Data_Source'] = 'Chief_Complaint_Text'
    
# Otherwise (it was null)
else:
    
    # Locate Admit_Reason_Description from our dataframe row
    Admit_Reason_Description = df.loc[z,'Admit_Reason_Description']
    
    # If it is non-null...
    if (Admit_Reason_Description==Admit_Reason_Description):
        
        #  Then C_Chief_Complaint = Admit_Reason_Description.  Record data source
        df.loc[z,col_name] = Admit_Reason_Description
        df.loc[z,col_name+'_Data_Source'] = 'Admit_Reason_Description'


### Chief_Complaint_Code
--------------------
OBX-5.1, OBX-5.4 segments where:
* OBX-3 Observation Identifier is 8661-1 and/or 11292-0
* OBX-2 = "CWE" or "CW"

Select first non-null value from segments OBX-5.1, OBX-5.4 and concatenate if repeating

 \*Also outputs Chief_Complaint_Code_OBX2

\*Also outputs Chief_Complaint_Code_OBX3

# IMPORTANT NOTE - 

Reading the description above, we see that there is a chance that we will have multiple chief complaint code fields.  If this is the case, we will have to concatenate the multiple fields.  We will denote this field concetation with the '|' character.

There is also a chance that we will have to concatenate multiple subcomponents.  We will denote this sub-concetation with a '^' character.

While it is rare for the Chief Complaint Code field to repeat, there is a chance that we will end up with a chief_complaint_code similar to the example below.

`CC_Code = OBX.1.5.1^OBX.1.5.4| OBX.4.5.1^OBX.4.5.4

Above the repeating field is OBX|1| and OBX|4|

In [None]:
########################
# How it looks
########################

pat = '(8661-1)|(11292-0)'
pattern = re.compile(pat,re.M|re.I)
two = ''
three = ''
five = ''


fives = []
twos = []
threes = []
if NoError(index,m,'OBX'):
    sect = index(m,'OBX')
    for j in np.arange(0,len(sect)):
        obx = sect[j]
        searchme = str(obx)
        match = re.findall(pattern,searchme)
        two = ''
        three = ''
        five = ''
        if len(match):
            if NoError(index,obx,2) & NoError(index,obx,3) & NoError(index,obx,5):
                two = str(obx[2])
                three = str(obx[3])
                if (two == 'CW')|(two == 'CWE'):
                    pt1 = ''
                    pt2 = ''
                    five = obx[5]
                    
                    if NoError(index_n,five,[0,0]):
                        pt1 = str(index_n(five,[0,0]))
                    if NoError(index_n,five,[0,3]):
                        pt2 = str(index_n(five,[0,3]))
                        
                    fife_dog = '^'.join([pt1,pt2])
                    if len(fife_dog.replace('^',''))>0:
                        fives.append(fife_dog)
                        twos.append(two)
                        threes.append(three)

    entry_5 = '|'.join(fives)
    entry_2 = '|'.join(twos)
    entry_3 = '|'.join(threes)
    
    if len(entry_5)>0:
        df.loc[z,col_name] = entry_5
        df.loc[z,col_name+'_OBX2'] = entry_2
        df.loc[z,col_name+'_OBX3'] = entry_3

In [None]:
########################
# Annotated Version
########################

# Identify the pattern(s) we will look for and compile in RegEx
pat = '(8661-1)|(11292-0)'
pattern = re.compile(pat,re.M|re.I)

# Create empty array for concentated elements to be appended to.  Later merged into 1 string
fives = []
twos = []
threes = []

# Check to see if OBX can be indexed into
if NoError(index,m,'OBX'):
    
    # If so, define obx field as 'sect'
    sect = index(m,'OBX')
    
    #  Loop through repeated OBX fields
    for j in np.arange(0,len(sect)):
        
        # Define obx field in question as 'obx' and create string copy of it called 'searchme'
        obx = sect[j]
        searchme = str(obx)
        
        # Search for RegEx keyword matches within obx string
        match = re.findall(pattern,searchme)
        
        # Initialize OBX2,3,5
        two = ''
        three = ''
        five = ''
        
        # If we have any matches....
        if len(match):
            
            # Check to see if OBX2,OBX3,and OBX5 exist
            if NoError(index,obx,2) & NoError(index,obx,3) & NoError(index,obx,5):
                
                # Define OBX2,3
                two = str(obx[2])
                three = str(obx[3])
                
                # If OBX2 == (CW or CWE)
                if (two == 'CW')|(two == 'CWE'):
                    
                    # Initialize parts
                    pt1 = ''
                    pt2 = ''
                    five = obx[5]
                    
                    # See if OBX-5.1 exists.  If so, re-define pt 1.
                    if NoError(index_n,five,[0,0]):
                        pt1 = str(index_n(five,[0,0]))
                        
                    # See if OBX-5.4 exists.  If so, re-define pt 1.
                    if NoError(index_n,five,[0,3]):
                        pt2 = str(index_n(five,[0,3]))
                        
                    # Created a concetated version (by '^' character) of OBX-5.1,OBX-5.4
                    fife_dog = '^'.join([pt1,pt2])
                    
                    # If it is not just one '^' character, append it to fives, append two,three
                    if len(fife_dog.replace('^',''))>0:
                        fives.append(fife_dog)
                        twos.append(two)
                        threes.append(three)
    
    # Join concetated element entries by '|' character into one string
    entry_5 = '|'.join(fives)
    entry_2 = '|'.join(twos)
    entry_3 = '|'.join(threes)
    
    #If we have a non-zero length entry_5 element...add OBX-5,2,3 to dataframe
    if len(entry_5)>0:
        df.loc[z,col_name] = entry_5
        df.loc[z,col_name+'_OBX2'] = entry_2
        df.loc[z,col_name+'_OBX3'] = entry_3

### C_Patient_Age_Years
--------------------
Resolve the value returned in C_Patient_Age into a normalized Years value. The age of the patient should round down to the nearest Integer.

* If C_Patient_Age_Units begins with an M we will divide that value by 12 and round down.
* If C_Patient_Age_Units begins with a W we will divide that value by 52 and round down.
* If C_Patient_Age_Units begins with a D we will divide that value by 365 and round down.
* If C_Patient_Age_Units begins with a Y or ANNUM we will round down to the nearest whole number.

NOTE: The age calculation does not account for leap years at this time. This means the C_Patient_Age may be incorrect for patients with close birth dates and visit dates.

 \*Also outputs C_Patient_Age_Years_Data_Source

In [None]:
########################
# How it looks
########################

C_Patient_Age = df.loc[z,'C_Patient_Age']
sources = 'C_Patient_Age, C_Patient_Age_Units'

if (C_Patient_Age == C_Patient_Age):
    C_Patient_Age_Units = df.loc[z,'C_Patient_Age_Units']
    if str(C_Patient_Age_Units).upper() = 'Y':
        df.loc[z,col_name] = C_Patient_Age
        df.loc[z,col_name+'_Data_Source'] = sources
    elif str(C_Patient_Age_Units).upper() = 'M':
        df.loc[z,col_name] = int(int(C_Patient_Age)/12)
        df.loc[z,col_name+'_Data_Source'] = sources
        

In [None]:
########################
# Annotated Code
########################

# Locate C_Patient_Age from our dataframe row
C_Patient_Age = df.loc[z,'C_Patient_Age']

# Pre-record the source to later add to data_source dataframe cell
sources = 'C_Patient_Age, C_Patient_Age_Units'

# If C_Patient_Age is non-null...
if (C_Patient_Age == C_Patient_Age):
    
    # Then locate the associated units in our dataframe row
    C_Patient_Age_Units = df.loc[z,'C_Patient_Age_Units']
    
    # If the units value begins with 'Y'...
    if str(C_Patient_Age_Units).upper() = 'Y':
        
        # This means that the age is already in years.  Write to dataframe.  Record source.
        df.loc[z,col_name] = C_Patient_Age
        df.loc[z,col_name+'_Data_Source'] = sources
        
    # Elif the units value begins with 'M'...
    elif str(C_Patient_Age_Units).upper() = 'M':
        
        # This means that the age is in Months.  Divide by 12 and round down to int for age in years. Record data source.
        df.loc[z,col_name] = int(int(C_Patient_Age)/12)
        df.loc[z,col_name+'_Data_Source'] = sources

## The supporting functions

In [None]:
from pj_funcs import *

############################################################################################################

def NoError(func, *args, **kw):
    '''
    Determine whether or not a function and its arguments gives an error
    
    Parameters
    ----------
    func: function, required
    *args: varies, required
    
    Returns
    -------
    bool
        True if function does not cause error.
	False if function causes error.
        
    Requirements
    ------------
    -none
    '''
    try:
        func(*args, **kw)
        return True
    except Exception:
        return False
    
    
############################################################################################################

def index_n(m,ind):
    '''
    Indexes some object 'm' by each element in the list 'ind'
    
    Parameters
    ----------
    m: type varies, required
    ind: list, required
    
    Returns
    -------
    m[ind[0]][ind[1]][ind[...]][ind[n]]
     
    Requirements
    ------------
    -Numpy as np
    
    '''
    for i in np.arange(0,len(ind)):
        m = m[ind[i]]
    return m

#############################################################################################################

def Index_pull(ind,m):
    
    '''
    Locates and returns the element within a message 'm' thats location
        is described by indeces, 'ind'
    
    Parameters
    ----------
    ind: list, required, full index path as list indicating HL7 location.
    m: hl7 type object, required, m = hl7.parse(some_message)
    
    Returns
    -------
    Str
        Element
     
    Requirements
    ------------
    -NoError from pj_funcs.py
    -index_n from pj_funcs.py
    -hl7
    
    '''
    
    output = ''
    
    # Try indexing the message by ind
    if NoError(index_n,m,ind):
        
        #  If the indexing up to the 2nd to last element returns a string, accept it.  Call it 'output'
        if type(index_n(m,ind[:-1])) == str:
            output = index_n(m,ind[:-1])

        # Normally, we will take the exact, full-indexed value.  Call it 'output'
        else:
            output = str(index_n(m,ind))
    
    # Return output.  If none found, return empty string, ''
    return output

############################################################################################################

def Index_pull_CONC(field,rest_index,m):
    '''
    Returns a concetated string for elements with repeating fields. Seperated by '|' characters.
    
    Example: consider the case of Ethnicity Code where a patient may have multiple selected ethnicities.
        For our example we will assume this element is always located in PID-22.1.
    
            print(Index_pull_CONC('PID', [22,0,0], m))
                Ethnicity1|Ethnicity2
        
        Note:  Ethnicity1 and Ethnicity2 are pulled from PID|x|-22.1 and PID|y|-22.1 respectively where
            x,y are non-equal integers representing different repetitions of a repeated field.
        
    
    Parameters
    ----------
    field: list (with one element), required, for non-empty return choose valid 3 letter HL7 field
    rest: list, required, integer list indicating where to find it.
    m: hl7 type object, required, m = hl7.parse(some_message)
    
    Returns
    -------
    Str
        Concetation represented by '|'
     
    Requirements
    ------------
    -NoError from pj_funcs.py
    -index_n from pj_funcs.py
    -Numpy as np
    -hl7
    
    '''
    
    # Initialize empty output
    output = ''
    
    # Read in field
    field_str = field[0]
    
    # Check to see if the field exists in our message
    if NoError(index,m,field_str):
        
        # Set the field equal to 'fi'
        fi = m[field_str]
        
        # If the field repeats, it has a non-zero length. Loop through its length 1 by 1
        for u in np.arange(0,len(fi)):
            
            # Identify the total index by summing strings: field, loop_number, rest_index
            tot_index = field+[u]+rest_index
            
            # Make sure message can be indexed by the total index
            if NoError(index_n,m,tot_index):
                
                #  If the indexing up to the 2nd to last element returns a string, accept it.  Call it 'output'
                if type(index_n(m,tot_index[:-1])) == str:
                    full = index_n(m,tot_index[:-1])
                    
                    # If this string, 'full', has non-zero length, add it to our output and end with '|'
                    if len(full)>0:
                        output += full
                        output += '|'
                        
                # Normally, we will take the exact, full-indexed value.  Call it 'output'
                else:
                    full = str(index_n(m,tot_index))
                    
                    # If this string, 'full', has non-zero length, add it to our output and end with '|'
                    if len(full)>0:
                        output += full
                        output += '|'
                        
                # Go back and loop through more repeated fields until no more exist
                
    # if non-zero length output, clean up last trailing '|' character
    if len(output)>0:
        if output[-1] == '|':
            output = output[:-1]
            
    # Return output.  If none found, this will be '' (empty string)
    return output

############################################################################################################


def DI_One(ind,m,df,z,col_name):
    
    '''
    Returns the element value of 'm' indexed by 'ind'.
    Updates the dataframe 'df' cell value indexed by 'z' and 'col_name'
    
    Parameters
    ----------
    ind: list, required, complete index path (as list) to desired element
    m: hl7 type object, required, m = hl7.parse(some_message)
    df:  pandas DataFrame, required
    z:  int, required, valid integer row index of df
    col_name: str, required, valid column in df
    
    Returns
    -------
    Str
        Element
        
    Output
    ------
    Updates dataframe
        df.loc[z,col_name] = Element
     
    Requirements
    ------------
    -Index_pull from pj_funcs.py
    -Pandas
    -hl7
    
    '''
    
    # Call the index on the message.
    obj = Index_pull(ind,m)
    
    # See if the 'obj' is an actual non-zero thing.
    if len(obj)>0:
        
        # If so, append to the row_z, col_colname in Dataframe, df
        df.loc[z,col_name] = obj
        
    # Else:  Do nothing.
    
    # Return the object.  If none found, will return empty str, '' with no df update
    return obj

####################################################################

def DI_One_CONC(field,ind,m,df,z,col_name):
    
    '''
    Returns the CONCETATED element value of 'm' indexed by its respective
        repeating field, 'field', and 'ind'.
    Updates the dataframe 'df' cell value indexed by 'z' and 'col_name'
    
    Parameters
    ----------
    field: list (with one element), required, for non-empty return choose valid 3 letter HL7 field
    ind: list, required, complete index path (as list) to desired element
    m: hl7 type object, required, m = hl7.parse(some_message)
    df:  pandas DataFrame, required
    z:  int, required, valid integer row index of df
    col_name: str, required, valid column in df
    
    Returns
    -------
    Str
        Concetated_Element separated by '|'
        
    Output
    ------
    Updates dataframe
        df.loc[z,col_name] = Concetated_Element
     
    Requirements
    ------------
    -Index_pull_CONC from pj_funcs.py
    -Pandas
    -hl7
    
    '''
    
    # Call the index on the message.
    obj = Index_pull_CONC(field,ind,m)
    
    # See if the 'obj' is an actual non-zero thing.
    if len(obj)>0:
        
        # If so, append to the row_z, col_colname in Dataframe, df
        df.loc[z,col_name] = obj
        
    # Else:  Do nothing.
    
    # Return the object
    return obj