In [1]:
from joblib import dump, load
import pandas as pd

## Creating a function for model predictions on new data

In [3]:
# load the scaler

scaler = load('model_resources/scaler.joblib')

In [4]:
# load model
model = load('model_resources/rfc.joblib')

In [22]:
def predict_readmission(data):
    
    scaled_data = scaler.transform(data.reshape(1,-1))
    
    return model.predict_proba(scaled_data)


## Creating a function to parse data from front end

In [6]:
import json

test_json = {
    'gender': 'Male',
    'age': '70-80',
    'time_in_hospital': 15,
    'num_lab_procedures': 3,
    'num_procedures': 30,
    'num_medications': 20,
    'diag_1': 'infectious and parasitic diseases',
    'diag_2': 'endocrine, nutritional, metabolic & immunity disorders',
    'diag_3': 'diseases of the circulatory system',
    'number_diagnoses': 3,
    'max_glu_serum': 'None',
    'A1Cresult': 'None',
    'diabetesMed': 'No',
    'num_meds_changed': 10,
    'total_hosp_visits': 10,
    'race_AfricanAmerican': 1,
    'race_Asian': 0,
    'race_Caucasian': 0,
    'race_Hispanic': 0,
    'race_Other': 0,
    'admission_type_id_Elective': 1,
    'admission_type_id_Emergency': 0,
    'admission_type_id_Other': 0,
    'discharge_disposition_id_Discharged to Home': 1,
    'discharge_disposition_id_Other': 0,
    'admission_source_id_Emergency Room': 0,
    'admission_source_id_Other': 1,
    'admission_source_id_Referral': 0,
}

test_json = json.dumps(test_json)

In [14]:
def clean_json(json_data):
    
    json_data = json.loads(json_data)

    # Using the encoding steps used in the training of our model to encode data received from our dashboard
    age = {
    "0-10": 0,
    "10-20": 1,
    "20-30":2,
    "30-40":3,
    "40-50":4,
    "50-60":5,
    "60-70":6,
    "70-80":7,
    "80-90":8,
    "90-100":9
    }
    
    gender = {
    "Female": 0,
    "Male": 1
    }
    
    diag_dict = {
    'infectious and parasitic diseases': 1,
    'neoplasms': 2,
    'endocrine, nutritional, metabolic & immunity disorders': 3,
    'diseases of the blood and blood-forming organisms': 4,
    'mental disorder': 5,
    'diseases of the nervous system and sense organs': 6,
    'diseases of the circulatory system': 7,
    'diseases of the respiratory system': 8,
    'diseases of the digestive system': 9,
    'diseases of the genitourinary system': 10,
    'complications of pregnancy, childbirth, and the puerperium': 11,
    'diseases of the skin and subcutaneuous tissue': 12,
    'diseases of the muscuskeletal system and connective tissue': 13,
    'congenital anomalies': 14,
    'certain condtions originating in the perinatal period': 15,
    'symptoms, signs and ill-defined conditions': 16,
    'injury and poisoning': 17
    }
    
    A1Cresult = {
    "None": 0,
    "Norm": 1,
    ">7":2,
    ">8":3
    }
    
    diabetesMed = {
    "No": 0,
    "Yes": 1
    }
    
    serum_amounts = {
    "None": 0,
    "Norm": 1,
    ">200": 2,
    ">300": 3
    }
    
    columns_to_clean = {
    'gender': gender,
    'age': age, 
    'diag_1': diag_dict,
    'diag_2': diag_dict,
    'diag_3': diag_dict,
    'max_glu_serum': serum_amounts,
    'A1Cresult': A1Cresult,
    'diabetesMed': diabetesMed}
    
    
    for i in columns_to_clean:
        if i in json_data:
            json_data[i] = columns_to_clean[i][json_data[i]]
            
    for i in json_data:
        json_data[i] = [json_data[i]]
        
        
    return json.dumps(json_data)


In [15]:
def create_input_data(json_data):
    
    # turn json into a pandas df
    df = pd.read_json(json_data)

    return df.values
    

In [20]:
input_ = create_input_data(clean_json(test_json))

In [27]:
tuple(predict_readmission(input_)[0])

(0.48333333333333334, 0.5166666666666667)

In [25]:
len(test_json)

824