In [1]:
import pandas as pd

def clean_relevant_events(table, model_type):
    print("starting cleaning relevant events")
    print('original length: ',len(table))
    table = table.dropna(subset = ['value']) #remove rows where value is null
    print('after droping nans: ',table.shape)
    labels = table.label.unique()
    for label in labels:
        table.reset_index()
        if label == 'Heart Rate':
            min_value = 30
            max_value = 220
        elif label == 'Temperature Celsius':
            min_value = 25
            max_value = 45
        elif label in ['Arterial Blood Pressure mean','Non Invasive Blood Pressure mean']:
            min_value = 15
            max_value = 250
        elif label in ['Arterial Blood Pressure diastolic', 'Non Invasive Blood Pressure diastolic']:
            min_value = 15
            max_value = 200
        elif label in ['Arterial Blood Pressure systolic','Non Invasive Blood Pressure systolic']:
            min_value = 30
            max_value = 250
        elif label in ['Alanine Aminotransferase (ALT)', 'Asparate Aminotransferase (AST)']:
            min_value = 0
            max_value = 20000
        elif label in ['Urea Nitrogen']:
            min_value = 2
            max_value = 200
        elif label in ['Calcium, Total']:
            min_value = 0
            max_value = 21
        elif label in ['C-Reactive Protein', 'pO2', 'Platelet Count']:
            min_value = 0
            max_value = 1000
        elif label in ['Glucose']:
            min_value = 0
            max_value = 2000
        elif label in ['Hemoglobin']:
            min_value = 2
            max_value = 25
        elif label in ['INR(PT)']:
            min_value = 0.5
            max_value = 15
        elif label in ['Lactate']:
            min_value = 0.1
            max_value = 30
        elif label in ['Lymphocytes', 'Neutrophils']:
            min_value = 0.2
            max_value = 100
        elif label in ['pCO2']:
            min_value = 0
            max_value = 200
        elif label in ['pH']:
            min_value = 6.6
            max_value = 7.8
        elif label in ['PT']:
            min_value = 5
            max_value = 100
        elif label in ['PTT']:
            min_value = 5
            max_value = 200
        elif label in ['Red Blood Cells']:
            min_value = 1
            max_value = 8
        elif label in ['RDW']:
            min_value = 5
            max_value = 40
        elif label in ['SpO2']:
            min_value = 0
            max_value = 100
        elif label in ['Albumin']:
            min_value = 0.5
            max_value = 6.5
        elif label in ['Bilirubin, Total']:
            min_value = 0.1
            max_value = 70   
        elif label in ['Creatinine']:
            min_value = 0.1
            max_value = 28
        elif label in ['Bicarbonate', 'Calculated Total CO2']:
            min_value = 0
            max_value = 9999
        elif label in ['Hematocrit']:
            min_value = 5
            max_value = 75
        elif label in ['Potassium']:
            min_value = 0.05
            max_value = 13
        elif label in ['Sodium']:
            min_value = 90
            max_value = 215
        elif label in ['Alkaline Phosphatase', 'Chloride', 'MCH', 'MCHC', 'MCV']:
            min_value = 0.1
            max_value = 9999
        else: #[Anion Gap, Base Excess, Basophils, Eosinophils, Magnesium, Monocytes, Phosphate, Protein, Total]
            # Skip the label
            continue
        
        # Get all of the relevant rows according to label
        tmp = table.loc[table['label'] == label]['value']
        # Drop values which are below the minimum criteria
        table.drop(tmp.loc[tmp < min_value].index, inplace=True)
        # Drop values which are above the maximum criteria
        table.drop(tmp.loc[tmp > max_value].index, inplace=True)

    
    print('length after clean up: ', len(table))
    if model_type == 'a':
        print("done cleaning relevant_events table")
        table.to_csv("C:/Temp/submission_tables/a_relevant_events_clean.csv", encoding='utf-8')
        print("done creating a_relevant_events_clean.csv")
    if model_type == 'b':
        print("done cleaning b_relevant_events table")
        table.to_csv("C:/Temp/submission_tables/b_relevant_events_clean.csv", encoding='utf-8')
        print("done creating b_relevant_events_clean.csv")


    
