## UNIX deployed version
- **Model**: Logistic regression
- **Encoding**: Onehot sparse
- **Model explainer**: LIME (Local Interpretable Model-Agnostic Explanations)
- **Server address**: http://[ip]:5000/predict


#### API consuming variables
[[redacted]]


#### Model consuming variables
[[redacted]]

In [None]:
source_files = '/local_home/ml/output/'
server_ip = [ip]
server_port = 5000

# #### windows >>>>
# source_files = 'Automation/output/'
# server_ip = [ip]
# server_port = 8000

from flask import Flask, request, render_template, jsonify, make_response, abort
import pandas as pd
import os
from sklearn.externals import joblib
import numpy as np
import ast
import re
import datetime
import dill


app = Flask(__name__)

############ Error Handling ############
class BaseError(Exception):
    """Base Error Class"""

    def __init__(self, code=400, message='', detail='', status='', exceptioncode=None):
        Exception.__init__(self)
        self.code = code
        self.message = message
        self.status = status
        self.detail = detail
        self.exceptioncode = exceptioncode

    def to_dict(self):
        return jsonify({"exceptions": {"type":"E", "code":self.code, "message":self.message, "detail":self.detail, "exceptioncode":self.exceptioncode}})

class ValidationError(BaseError):
    def __init__(self, message='X', detail='Mandatory input parameter Id is missing in the Request.', custom_msg=0):
        BaseError.__init__(self)
        self.code = 400
        self.status = ''
        self.exceptioncode = 4000
        self.message = message
        if custom_msg==0:
            self.detail = 'Mandatory input parameter ' + detail + ' is invalid.'
        else:
            self.detail = detail

class ServerError(BaseError):
    def __init__(self, message='Internal server error'):
        BaseError.__init__(self)
        self.code = 500
        self.message = message
        self.status = 'SERVER_ERROR'

@app.errorhandler(ServerError)
@app.errorhandler(ValidationError)
def handle_error(error):
    return error.to_dict(), getattr(error, 'code')


@app.route('/predict', methods=['POST'])
def predict():
    ################# data-loading #################
    try:
        test_json = request.get_json()
        if(type(test_json)==str):
            data = pd.read_json(test_json, orient='records')
        else:
            data = pd.DataFrame.from_dict(test_json, orient='columns')

        api_collist = [[redacted]]
        data = data[api_collist]
        
    except Exception as e:
        m = re.search("'([^']*)'", repr(e))
        key = m.group(1)
        raise ValidationError(detail=str(key), custom_msg=0)

    model_collist = [[redacted]]
    data = data[model_collist]
    
    ################# data-preprocessing #################
    
    #convert empty values to null
    data = data.replace(r'^\s*$', np.nan, regex=True)
    
    #removing whitespace
    data['A'] = data['A'].str.strip()
    data['B'] = data['B'].str.strip()
    data['C'] = data['C'].str.strip()
    
    #null values data-validation
    for column in data:
        if data[column].isnull().any():
            raise ValidationError(detail='Mandatory input parameter '+ str(column) +' contains null value', custom_msg=1)
    
    #drop duplicate records
    data = data.drop_duplicates(subset=['D'], keep='first')

    #change
    try:
        data['E'] =  pd.to_datetime(data['E'], format="%Y-%m-%d %H:%M:%S")
        data['E'] = (datetime.datetime.now() - data['E']).astype('<m8[Y]')
        data = data.drop('E', axis=1)
    except Exception as e:
        m = re.search("time data (.*) doesn't match format specified", str(e))
        key = m.group(1)
        raise ValidationError(detail='datetime format '+ str(key) +' is invalid. Correct format is YYYY-MM-DD HH:MM:SS', custom_msg=1)

    #combine 3 request_type INPATIENT categories into single category
    try:
        data['x'] = data['x'].replace(['x-x', 'x-x x', 'x-x'], 'x')
        data['x'] = data['x'].str.upper()
    except:
        raise ValidationError(detail='x')

    #invalid-datatypes
    try:
        data['x'] = data['x'].astype(np.int64)
    except Exception as e:
        raise ValidationError(detail='x')
    
    try:
        data['x'] = data['x'].astype(np.int64)
    except Exception as e:
        raise ValidationError(detail='x')

    proc_ids = data.pop('x')
    
    
    ################# data-encoding #################

    categorical_features = [0,1,2,3,4,5,6,7,8,9,10,11,13]
    categorical_names = {}
    for feature in categorical_features:
        try:
            le = joblib.load(source_files+str(feature)+'.joblib')
            data.iloc[:, feature] = le.transform(data.iloc[:, feature].values)
            categorical_names[feature] = le.classes_
        except Exception as e:
            class_namex = list(data.columns)[feature]
            raise ValidationError(detail = str(class_namex))
    data = data.astype(float)

    try:
        #onehot encoding
        encoder = joblib.load(source_files+'ohe.joblib')
        encoded_data = encoder.transform(data)
    except Exception as e:
        m = re.search("'([^']*)'", repr(e))
        key = m.group(1)
        raise ValidationError(detail=str(key), custom_msg=0)

        
    ################# data-prediction #################
    try:
        logmodel = joblib.load(source_files+'logistic_model.joblib')
        predictions = logmodel.predict(encoder.transform(data)).astype(float)
        predictions_prob = logmodel.predict_proba(encoder.transform(data)).astype(float)
        for i,val in enumerate(predictions):
            if val==0:
                predictions_prob[i] = predictions_prob[i,0]
            elif val==1:
                predictions_prob[i] = predictions_prob[i,1]
        mydict = {0: 'x', 1: 'y'}
        predictions_mapped = [mydict.get(n, n) for n in predictions]
        predictions = predictions_mapped
        confidence = np.round(predictions_prob[:,0]*100,2)
        
        df = pd.DataFrame({'x':proc_ids, 'decision':predictions, 'confidence':confidence})
    except Exception as e:
        m = re.search("'([^']*)'", repr(e))
        print(m)
        key = m.group(1)
        raise ValidationError(detail=str(key), custom_msg=0)
    
    
    ################# data-explanation #################
    try:        
        with open(source_files+'logistic_lime_explainer.pickle', 'rb') as f:
            explainer = dill.load(f)
        predict_fn = lambda x: logmodel.predict_proba(encoder.transform(x)).astype(float)

        for i in range(len(df)):
            exp = explainer.explain_instance(data.iloc[i], predict_fn, num_features=18)
            y = pd.DataFrame(exp.as_list(), columns=['key', 'value'])
            scores = y.sort_values(by='value', ascending=False).reset_index(drop=True).iloc[0:5]
            keyval = scores["key"].str.split("=", n=1, expand=True)
            #df.loc[i,'significantVariableInfo'] = [scores.set_index('key')['value'].to_dict()]
            df.loc[i,'significantVariableInfo'] = [keyval.set_index(0)[1].to_dict()]
    except Exception as e:
        m = re.search("'([^']*)'", repr(e))
        print(m)
        key = m.group(1)
        raise ValidationError(detail=str(key), custom_msg=0)
        
        
    ################# Sending-back response in JSON #################
    try:
        response_json_string = pd.DataFrame()
        response_json_string = df.to_json(orient="records")
        response_json_dict = ast.literal_eval(response_json_string)
        responses = jsonify(prediction = response_json_dict)
        return(responses)
    except Exception as e:
        m = re.search("'([^']*)'", repr(e))
        print(m)
        key = m.group(1)
        raise ValidationError(detail=str(key), custom_msg=0)


if __name__ == '__main__':
    app.run(host=server_ip, port=server_port)