In [1]:
#import libraries

import pandas as pd
import numpy as np
from flask import Flask, request, render_template
import joblib
from sklearn.preprocessing import StandardScaler
import logging
import lightgbm as lgb

# configure logging
logging.basicConfig(level=logging.INFO)

app = Flask(__name__)

scaler = StandardScaler()

attrition_model = joblib.load('ibm_lgm_clf_model.pkl')
attrition_scaler = joblib.load('ibm_scaler.pkl')


@app.route('/')
def home():
    return render_template('index.html')

# function to convert income category
def convert_income_category(income):
    bins = [1009, 3000, 7000, 10000, 14000]
    labels = [1, 2, 3, 4]
    for i in range(len(bins) - 1):
        if bins[i] <= income <= bins[i + 1]:
            return labels[i]
    return 1 

def convert_age_to_onehot(age):
    age_groups = ['AgeGroup_18-30', 'AgeGroup_31-40', 'AgeGroup_41-50', 'AgeGroup_51-60']
    selected_group = None

    if 18 <= age <= 30:
        selected_group = 'AgeGroup_18-30'
    elif 31 <= age <= 40:
        selected_group = 'AgeGroup_31-40'
    elif 41 <= age <= 50:
        selected_group = 'AgeGroup_41-50'
    else:
        selected_group = 'AgeGroup_51-60'

    return {group: group == selected_group for group in age_groups}


def convert_marital_status_to_onehot(marital_status):
    marital_status_categories = ['Single', 'Married', 'Divorced']
    return {f'MaritalStatus_{status}': marital_status == status for status in marital_status_categories}

# function to preprocess input data
def preprocess_input(input_data):
    logging.info(f'content of input_data: {input_data}')

    # convert income category
    input_data['IncomeCategory'] = convert_income_category(input_data.get('IncomeCategory'))

    # convert age to one-hot encoding
    age_onehot = convert_age_to_onehot(input_data.get('Age'))
    for group, value in age_onehot.items():
        input_data[group] = value
        
    logging.info(f'new content of input_data: {input_data}')

    # perform one-hot encoding for the marital status
    marital_status = input_data.get('MaritalStatus')
    marital_status_onehot = convert_marital_status_to_onehot(marital_status)
    for status, value in marital_status_onehot.items():
        input_data[status] = value
    
    logging.info(f'new content of input_data: {input_data}')

    # drop unnecessary columns
    input_data.pop('Age')
    input_data.pop('IncomeCategory')
    input_data.pop('MaritalStatus')

   # numerical features for normalization
    normalization_columns = ['EnvironmentSatisfaction', 'JobSatisfaction', 'YearsAtCompany', 'IncomeCategory', 'WorkLifeBalance']

    # extract numerical features for transformation
    numerical_features = [input_data.get(column, 0) for column in normalization_columns]

    # transform the scaler on the input data
    transformed_features = attrition_scaler.transform([numerical_features])

    # update the input_data with transformed values
    for i, column in enumerate(normalization_columns):
        input_data[column] = transformed_features[0][i]


    return input_data

# route for handling predictions
@app.route('/predict', methods=['POST'])
def predict():
    try:
        # get input data from the request and convert to float
        form_data = {key: int(value) if key != 'MaritalStatus' else value for key, value in request.form.items()}
        # log the types of values in form_data
        logging.info(f'types of values in form_data: {[type(value) for value in form_data.values()]}')

        # log the raw input data
        logging.info(f'raw input data: {form_data}')

        # validate age input
        age = form_data.get('Age')
        if age is None or not (18 <= age <= 60):
            return render_template('index.html', age_error='Age must be between 18 and 60')

        # validate monthly income
        income = form_data.get('IncomeCategory')
        if income is None or not (1000 <= income <= 15000):
            return render_template('index.html', income_error='Monthly income must be between $1,000 and $15,000')

        # validate years at the company
        years_at_company = form_data.get('YearsAtCompany')
        if years_at_company is None or not (age - years_at_company > 16):
            return render_template('index.html', years_at_company_error='Age minus years at company must be more than 16,')

        # log validated age, income, and years at the company
        logging.info(f'validated age: {age}, validated income: {income}, validated years at company: {years_at_company}')

        # preprocess input data
        preprocessed_data = preprocess_input(form_data)

        # log data types after preprocessing
        logging.info(f'data types after preprocessing: {[(key, type(value)) for key, value in preprocessed_data.items()]}')

        # convert input data to a 2D array for scaler transformation
        input_array = np.array([list(preprocessed_data.values())])

        logging.info(f'input array: {input_array}')

        # make predictions
        features = input_array
        predictions = attrition_model.predict(features)
        probabilities = attrition_model.predict_proba(features)
        prediction_probability = probabilities[0][1]  # Probability of the positive class

        # provide a descriptive message based on the prediction and probability
        if predictions[0]:
            prediction_message = f'The employee is LIKELY to leave with a probability of {prediction_probability:.2%}'
        else:
            prediction_message = f'The employee is UNLIKELY to leave with a probability of {1 - prediction_probability:.2%}'
        # log the outcome
        logging.info(f'prediction outcome: {prediction_message}')

        
        # pass the prediction probability and feature importance to the template
        return render_template('index.html', attrition_prediction=prediction_message)



    except ValueError as ve:
        # log the ValueError
        logging.error(f'valueerror during prediction: {ve}')

        return render_template('index.html', other_error='Age, income, and years at the company must be integers')

    except Exception as e:
        # log other exceptions
        logging.error(f'an error occurred during prediction: {e}')

        return render_template('index.html', other_error='An error occurred. Please check your input.')


In [None]:
# Run the Flask app
if __name__ == '__main__':
    app.run(debug=False)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [28/Nov/2023 18:34:28] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [28/Nov/2023 18:34:28] "[36mGET /static/ibm_logo.png HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [28/Nov/2023 18:34:29] "[36mGET /static/light_bluemountain.jpg HTTP/1.1[0m" 304 -
INFO:root:types of values in form_data: [<class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'str'>]
INFO:root:raw input data: {'EnvironmentSatisfaction': 4, 'JobSatisfaction': 3, 'WorkLifeBalance': 3, 'YearsAtCompany': 12, 'IncomeCategory': 13000, 'Age': 40, 'MaritalStatus': 'Married'}
INFO:root:validated age: 40, validated income: 13000, validated years at company: 12
INFO:root:content of input_data: {'EnvironmentSatisfaction': 4, 'JobSatisfaction': 3, 'WorkLifeBalance': 3, 'YearsAtCompany': 12, 'IncomeCategory': 13000, 'Age': 40, 'MaritalStatus': 'Married'}
INFO:root:new conten