# Imports

In [19]:
import pandas as pd
import numpy as np
import seaborn as sns

import matplotlib.pyplot as plt


import xgboost as xgb

import sklearn.metrics
import pickle

from IPython.core.display import HTML
pd.options.mode.chained_assignment = None  # default='warn'

In [20]:
# from google.colab import drive
# drive.mount('/content/drive')

# Helper functions

In [21]:
def jupyter_settings():
    %matplotlib inline
    %pylab inline
    plt.style.use( 'bmh' )
    plt.rcParams['figure.figsize'] = [25, 12]
    plt.rcParams['font.size'] = 24
    display( HTML( '<style>.container { width:100% !important; }</style>') )
    sns.set()
jupyter_settings()

Populating the interactive namespace from numpy and matplotlib


In [22]:
# import sys 
# path_to_module = '/content/drive/MyDrive/health_insurance_cross_sell-main/health_insurance_cross_sell'
# sys.path.append(path_to_module)

In [23]:
from functions import performace
from Healthinsurance.healthinsurance import HealthInsurance


# Load data

In [24]:
test_df = pd.read_csv('webapp/data/test_raw.csv')


In [25]:
test_df.head()

Unnamed: 0,id,gender,age,region_code,policy_sales_channel,previously_insured,annual_premium,vintage,vehicle_age,vehicle_damage,response
0,196319,Male,50,33.0,124.0,0,40841.0,154,> 2 Years,1,0
1,377373,Male,40,8.0,124.0,1,2630.0,145,1-2 Year,0,0
2,96687,Female,65,41.0,109.0,0,26336.0,246,1-2 Year,1,0
3,303533,Male,60,28.0,124.0,0,52717.0,139,1-2 Year,1,0
4,256233,Male,36,36.0,26.0,0,39176.0,73,1-2 Year,1,1


In [26]:
test_df.shape

(114090, 11)

In [27]:
test_df.dtypes

id                        int64
gender                   object
age                       int64
region_code             float64
policy_sales_channel    float64
previously_insured        int64
annual_premium          float64
vintage                   int64
vehicle_age              object
vehicle_damage            int64
response                  int64
dtype: object

# Data Preparation

In [28]:
df9 = test_df.copy()

In [29]:
# mms_age = pickle.load(open('/content/drive/MyDrive/health_insurance_cross_sell-main/health_insurance_cross_sell/features/age_scaler.pkl','rb'))
# mms_vintage = pickle.load(open('/content/drive/MyDrive/health_insurance_cross_sell-main/health_insurance_cross_sell/features/vintage_scaler.pkl','rb'))
# se = pickle.load(open('/content/drive/MyDrive/health_insurance_cross_sell-main/health_insurance_cross_sell/features/annual_premium_scaler.pkl','rb'))


In [30]:
# model = pickle.load(open('/content/drive/MyDrive/health_insurance_cross_sell-main/health_insurance_cross_sell/models/xgb_model_final.pkl','rb'))

# Deploy

## HealthInsurance class

In [31]:
import pickle
import numpy  as np
import pandas as pd

class HealthInsurance:
    
    def __init__( self ):
        self.home_path = 'Users/Lavin/Documents/Comunidade DS/health_insurance_cross_sell/'
        self.annual_premium_scaler =            pickle.load( open( self.home_path + 'features/annual_premium_scaler.pkl','rb' ) )
        self.age_scaler =                       pickle.load( open( self.home_path + 'features/age_scaler.pkl','rb' ) ) 
        self.vintage_scaler =                   pickle.load( open( self.home_path + 'features/vintage_scaler.pkl','rb' ) ) 
        
    def data_cleaning( self, df1 ):
        # 1.1. Rename Columns
        cols_new = ['id', 'gender', 'age','region_code', 'previously_insured', 'vehicle_age', 
                    'vehicle_damage', 'annual_premium', 'policy_sales_channel', 'vintage', 'response']

        # rename 
        df1.columns = cols_new
        
        return df1 

    
    def feature_engineering( self, df2 ):
        # Feature Engineering

        # Vehicle Damage Number
        df2['vehicle_damage'] = df2['vehicle_damage'].apply(lambda x: 1 if x =='Yes' else 0)

        # Vehicle Age
        df2['gender'] = df2['gender'].apply(lambda x: 1 if x=='Female' else (0 if x=='Male' else x))

        
        return df2
    
    
    def data_preparation( self, df5 ):

        # Min - max sacaler
        df5['age'] = self.annual_premium_scaler.transform(df5[['age']].values)

        df5['vintage'] = self.vintage_scaler.transform(df5[['vintage']].values)

        # StandardScale
        df5['annual_premium'] = self.annual_premium_scaler.transform(df5[['annual_premium']].values)

        # vehicle_age ordinal scale
        vehicle_age_dict = {'> 2 Years':3, '1-2 Year':2, '< 1 Year':1}
        df5['vehicle_age'] = df5['vehicle_age'].map(vehicle_age_dict)

        # Feature Selection
        final_columns = ['id',  'age', 'region_code', 'policy_sales_channel',
              'previously_insured', 'annual_premium', 'vintage', 'vehicle_age',
              'vehicle_damage', 'response']
        
        return df5[ final_columns ]
    
    
    def get_prediction( self, model, original_data, test_data ):
        # model prediction
        pred = model.predict_proba( test_data )
        
        # join prediction into original data
        original_data['prediction'] = pred
        
        return original_data.to_json( orient='records', date_format='iso' )

## API Handler

In [32]:
import pickle
import pandas as pd
from flask import Flask, request, Response
from Healthinsurance.healthinsurance import HealthInsurance

# loading model
path = ''
model = pickle.load( open( path + 'models/xgb_model_final.pkl', 'rb' ) )

# initialize API
app = Flask( __name__ )

@app.route( '/predict', methods=['POST'] )
def health_insurance_predict():
    test_json = request.get_json()
    
    if test_json: # there is data
        if isinstance( test_json, dict ): # unique example
            test_raw = pd.DataFrame( test_json, index=[0] )
            
        else: # multiple example
            test_raw = pd.DataFrame( test_json, columns=test_json[0].keys() )
            
        # Instantiate Rossmann class
        pipeline = HealthInsurance()
        
        # data cleaning
        df1 = pipeline.data_cleaning( test_df )
        
        # feature engineering
        df2 = pipeline.feature_engineering( df1 )
        
        # data preparation
        df3 = pipeline.data_preparation( df2 )
        
        # prediction
        df_response = pipeline.get_prediction( model, test_df, df3 )
        
        return df_response
    
    else:
        return Response( '{}', status=200, mimetype='application/json' )
    
if __name__ == '__main__':
    app.run( '0.0.0.0', debug=True )

XGBoostError: [12:16:55] C:/Users/Administrator/workspace/xgboost-win64_release_1.6.0/src/learner.cc:1059: Check failed: header == serialisation_header_: 
  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.


## API tester

In [None]:
#pip install requests==2.25.1

In [None]:
import requests
import json

data = json.dumps( df9.to_dict( orient='records' ) )

In [34]:
# API Call
url = 'http://192.168.0.5:5000/HealthInsurance/predict'
# #url = 'http://0.0.0.0:5000/predict'
# #url = 'https://health-insurance-model.herokuapp.com/predict'
# header = {'Content-type': 'application/json' }

# r = requests.post( url, data=data, headers=header )
# print( 'Status Code {}'.format( r.status_code ) )

# Api call

#url = 'http://192.168.0.5:5000/rossmann/predict'
#url = 'https://rossmann-predict-lav-test.herokuapp.com/rossmann/predict'
header = {'Content-type':'application/json'}
data = data

r = requests.post(url,data,headers=header)
print('Status code {}'.format(r.status_code))

Status code 500
