In [1]:
from sqlalchemy import create_engine, text
import pandas as pd
import requests
def getting_data_from_AWS(query: str, 
                          host: str, 
                          port:int, 
                          database: str, 
                          user: str, 
                          password: str): 
    """Downloads dataframe based on query pulling data from AWS RDS instance. 

    Args:
        query (str): SQL query to retrieve data 
        host (str): AWS host string
        port (str): AWS port 
        database (str): AWS database name
        user (str): AWS user
        password (str): AWS password
    
    Returns:
        pd.DataFrame with the ouput of query
    
    Example usage:
        getting_data_from_AWS(query = "SELECT * FROM TABLE" (with triple "s)
                              host = 'XXX.XXX.XXX.rds.amazonaws.com', 
                              port = 5432, 
                              database = 'postgres', 
                              user = 'super_safe_user', 
                              password = 'super_safe_pass'):
    """
    # Creating connection
    conn_db = create_engine(f'postgresql://{user}:{password}@{host}:{port}/{database}',
                            pool_pre_ping=True)

    # Running query and importing it 
    with conn_db.begin() as conn:
        df = pd.read_sql(sql=text(query), 
                         con=conn)


    return df;

In [2]:
host = "insuranceproject.ce08lquzezfq.ca-central-1.rds.amazonaws.com"
port = 5432
database = "postgres"
user = "sebmatecho"
password = "bandadatosa"

query = """
   SELECT * FROM insurance
"""

df_raw = getting_data_from_AWS(query = query,
                               host = host,  
                               port = port,  
                               database = database, 
                               user = user,
                               password = password)


In [231]:
df_raw.dtypes

gender                   object
age                     float64
driving_license         float64
region_code             float64
previously_insured      float64
vehicle_age              object
vehicle_damage           object
annual_premium          float64
policy_sales_channel    float64
vintage                 float64
response                float64
dtype: object

In [206]:
import pandas as pd 
import pickle
import os
from  pathlib import Path
import pandas as pd
from pathlib import Path
import pickle 
import os

class HealthInsurance(): 
    def __init__(self)->None:
        base_path = Path.cwd()
        self.home = base_path/'parameters'
        self.age_transform = pickle.load(open(os.path.join(self.home,'age_transform.pkl'),'rb'))
        self.annual_premium_transform = pickle.load(open(os.path.join(self.home,'annual_premium_transform.pkl'),'rb'))
        self.gender_transform = pickle.load(open(os.path.join(self.home,'gender_transform.pkl'),'rb'))
        self.policy_sales_channel_transform = pickle.load(open(os.path.join(self.home,'policy_sales_channel_transform.pkl'),'rb'))
        self.region_code_transform = pickle.load(open(os.path.join(self.home,'region_code_transform.pkl'),'rb'))
        self.vintage_transform = pickle.load(open(os.path.join(self.home,'vintage_transform.pkl'),'rb'))
        
    def data_cleaning(self,df1): 
                
        df1.columns = ['gender', 'age', 'driving_license', 'region_code', 'previously_insured',
       'vehicle_age', 'vehicle_damage', 'annual_premium','policy_sales_channel', 'vintage']
        
        df1['age'] = df1['age'].astype(int)
        df1['driving_license'] = df1['driving_license'].astype(int)
        df1['region_code'] = df1['region_code'].astype(int)
        df1['previously_insured'] = df1['previously_insured'].astype(int)
        df1['vintage'] = df1['vintage'].astype(int)
        
        return df1
    
    def feature_engineering(self,df2): 
        df2['vehicle_damage'] = df2['vehicle_damage'].apply( lambda x: 1 if x == 'Yes' else 0 )
        df2['vehicle_age'] =  df2['vehicle_age'].apply( lambda x: 'over_2_years' if x == '> 2 Years' else 'between_1_2_year' if x == '1-2 Year' else 'below_1_year' )
        return df2
    
    def data_preparation(self, df3): 
        df3['annual_premium'] = self.annual_premium_transform.transform( df3[['annual_premium']].values )
        df3['age'] = self.age_transform.transform( df3[['age']].values )
        df3['vintage'] = self.vintage_transform.transform( df3[['vintage']].values )
        df3['gender'] = df3['gender'].map( self.gender_transform )
        df3['region_code'] = df3['region_code'].map( self.region_code_transform )
        df3 = pd.get_dummies( df3, prefix='vehicle_age', columns=['vehicle_age'] )
        df3['policy_sales_channel'] = df3['policy_sales_channel'].map( self.policy_sales_channel_transform )
        
        cols_selected = ['annual_premium', 'vintage', 'age', 'region_code', 'vehicle_damage', 'previously_insured',
                 'policy_sales_channel']
    
        return df3[cols_selected]
    
    def prediction(self,model, df4, df5): 
        pred = model.predict_proba(df4)
        df5['prediction'] = pred[:, 0]
        return df5


In [207]:
home_path = Path.cwd()
# home_path = Path('C:/Users/Windows/Desktop/Repos/insurance_prediction/')
model_path = home_path /'models'/'models'/ 'logistic_regression.pkl'
model = joblib.load(model_path)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [208]:
df_test = df_raw.sample(2,random_state=42).drop('response', axis =1)
df_final = df_test.copy()
testing_class = HealthInsurance()
# testing_class
df1 = testing_class.data_cleaning(df_test)
# df1
df2 = testing_class.feature_engineering(df1)
# df2
df3 = testing_class.data_preparation(df2)
# df3
df4 = testing_class.prediction(model,df3,df_final)
df4


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Unnamed: 0,gender,age,driving_license,region_code,previously_insured,vehicle_age,vehicle_damage,annual_premium,policy_sales_channel,vintage,prediction
418628,Female,21.0,1.0,28.0,0.0,< 1 Year,Yes,29005.0,160.0,63.0,0.667582
385400,Female,48.0,1.0,17.0,0.0,1-2 Year,Yes,28855.0,4.0,280.0,0.783452


In [4]:
data = json.dumps(df_raw.sample(10).drop('response', axis = 1).to_dict( orient='records' ))

# API Call
# url = 'http://127.0.0.1:8000/predict/'
url = 'https://health-insurance-api-oanp.onrender.com/predict'
header = {'Content-type': 'application/json' }

r = requests.post( url, data=data, headers=header )
print( f'[Info] Status Code {r.status_code}' )

pd.DataFrame(json.loads(r.json()))

[Info] Status Code 200


Unnamed: 0,gender,age,driving_license,region_code,previously_insured,vehicle_age,vehicle_damage,annual_premium,policy_sales_channel,vintage,prediction
0,Male,21,1,36,0,< 1 Year,No,40371,152,48,0.992662
1,Male,23,1,3,1,< 1 Year,No,29535,152,198,0.999629
2,Male,67,1,28,0,1-2 Year,Yes,30569,26,283,0.916689
3,Male,41,1,8,1,1-2 Year,No,47916,124,92,0.999717
4,Male,44,1,6,0,1-2 Year,Yes,2630,124,18,0.768253
5,Male,54,1,28,0,1-2 Year,Yes,39127,26,31,0.387894
6,Female,24,1,41,1,< 1 Year,No,23923,152,20,0.99987
7,Male,43,1,19,1,1-2 Year,No,37714,124,299,0.999648
8,Female,68,1,33,1,1-2 Year,No,2630,138,151,0.999908
9,Female,38,1,28,0,1-2 Year,Yes,59422,122,173,0.792545


In [None]:
pd.DataFrame(json.loads(r.json()))