In [1]:
# Data Handling
import pickle
import numpy as np
import pandas as pd
from pydantic import BaseModel

In [2]:
# Server
import uvicorn
from fastapi import FastAPI

In [3]:
# Modeling
from xgboost import XGBClassifier

In [4]:
app = FastAPI()

In [5]:
# Initialize files
classifier_model = pickle.load(open('xgboost_classifier.pickle', 'rb'))

In [6]:
features_columns = ['satisfaction_level', 'last_evaluation', 'number_project',
       'average_montly_hours', 'time_spend_company', 'Work_accident', 'left',
       'promotion_last_5years', 'sales_RandD', 'sales_accounting', 'sales_hr',
       'sales_management', 'sales_marketing', 'sales_product_mng',
       'sales_sales', 'sales_support', 'sales_technical']

In [7]:
def convert_dummies(to_predict, expected_model_columns, categorical_variables):
    new_dict = to_predict.copy()
    for v in categorical_variables:
        new_dict.update({'{categorical}_{value}'.format(categorical=v, value=new_dict.get(v)): 1})
        new_dict.pop(v)
    
    dependent_variables = []
    for feature in expected_model_columns:
        dependent_variables.append(new_dict.get(feature, 0))

    return np.array(dependent_variables)

In [8]:
class Data(BaseModel):
    satisfaction_level: float
    last_evaluation: float
    number_project: float
    average_montly_hours: float
    time_spend_company: float
    Work_accident: float
    promotion_last_5years: float
    sales: str
    #salary: str

In [9]:
@app.post("/predict")
def predict_api(data: Data):
    
    # Extract data in correct order
    data_dict = data.dict()
     
    # Apply encoding
    encoded_features = convert_dummies(
        to_predict=data_dict,
        expected_model_columns=features_columns,
        categorical_variables=['sales'])

    
    # Create and return prediction
    prediction = classifier_model.predict(encoded_features.reshape(1, -1))

    return {"prediction": prediction[0]}


In [10]:
df = pd.read_csv('../app/data/HR_comma_sep.csv')
df.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,sales,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.11,0.88,7,272,4,0,1,0,sales,medium
3,0.72,0.87,5,223,5,0,1,0,sales,low
4,0.37,0.52,2,159,3,0,1,0,sales,low


In [11]:
to_predict = df.iloc[0, :].to_dict()
to_predict

{'satisfaction_level': 0.38,
 'last_evaluation': 0.53,
 'number_project': 2,
 'average_montly_hours': 157,
 'time_spend_company': 3,
 'Work_accident': 0,
 'left': 1,
 'promotion_last_5years': 0,
 'sales': 'sales',
 'salary': 'low'}

In [12]:
features_array = convert_dummies(
    to_predict=to_predict,
    expected_model_columns=features_columns,
    categorical_variables=['sales'])
features_array

array([  0.38,   0.53,   2.  , 157.  ,   3.  ,   0.  ,   1.  ,   0.  ,
         0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   1.  ,   0.  ,
         0.  ])

In [13]:
classifier_model.predict(data=features_array.reshape(1, -1))

array(['low'], dtype=object)

In [14]:
predict_api(Data(
    satisfaction_level=0.38,
    last_evaluation= 0.53,
    number_project= 2,
    average_montly_hours= 157,
    time_spend_company= 3,
    Work_accident= 0,
    left= 1,
    promotion_last_5years= 0,
    sales= 'sales'))

{'prediction': 'low'}