In [1]:
import pandas as pd
import gradio as gr
import joblib as jb
import plotly.express as px
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, ConfusionMatrixDisplay

In [2]:
# load data and model
test_data = pd.read_csv("./data/test_data.csv")
model = jb.load("./model/model.pkl")

test_data.head()

Unnamed: 0,RowNumber,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,5703,585,France,Male,36,7,0.0,2,1,0,94283.09,0
1,3668,525,Germany,Male,33,4,131023.76,2,0,0,55072.93,0
2,1618,557,Spain,Female,40,4,0.0,2,0,1,105433.53,0
3,5674,639,Spain,Male,34,5,139393.19,2,0,0,33950.08,0
4,4273,640,Spain,Female,34,3,77826.8,1,1,1,168544.85,0


In [3]:
# split data
target = "Exited"
X = test_data.drop(columns=["RowNumber", "Exited"])
y = test_data[target]

# predict probabilities
prediction_probabilites = model.predict_proba(X)[:,1]

In [10]:
def predict(probability_threshold, crc, cac):
    """
    Parameters
    ----------
    probability_threshold: float
        probability threshold for model prediction
    crc: int
        Customer retention cost
    cac: int
        Customer aquisition cost
    
    Returns
    -------
    figure: ConfusionMatrix
        Confusion matrix showing True values vs Predicted values
    evaluation metrics: float
        Accuracy, Precision, and Recall scores
    amounts: int
        costs of using or not using a model in monetary value
    """

    # get predictions
    predictions = (prediction_probabilites >= probability_threshold).astype(int)

    # get confusion matrix
    matrix_array = confusion_matrix(y, predictions)
    # fig = ConfusionMatrixDisplay.from_predictions(y, predictions, colorbar=False)

    fig = px.imshow(
        matrix_array,
        labels=dict(x="Predicted Value", y="True Value"),
        x = ["False", "True"],
        y = ["False", "True"],
        text_auto=True,
        aspect="auto")
    fig.update_xaxes(side="bottom")
    fig.update_traces(dict(showscale=False, coloraxis=None))

    # get metrics
    accuracy = accuracy_score(y, predictions).round(2)
    precision = precision_score(y, predictions).round(2)
    recall = recall_score(y, predictions).round(2)

    # get costs
    retention_cost = matrix_array.sum(axis=0)[1] * crc # crc cost with model
    aquisition_cost = matrix_array[1][0] * cac # cac with model
    total = retention_cost + aquisition_cost # total amount spent on customer aquisition and retention
    amount_saved = (matrix_array.sum(axis=1)[1] * cac) - total

    return fig, accuracy, precision, recall, retention_cost, aquisition_cost, total, amount_saved

In [13]:
demo = gr.Interface(
    fn=predict,
    inputs=[
            gr.Slider(minimum=0.0, maximum=1.0, value=0.5),
            gr.Number(value=50, label="Customer Retention Cost (€)"),
            gr.Number(value=200, label="Customer Acquisiton Cost(€)")
        ],
    outputs=[
            gr.Plot(label="True Value vs Predicted Value"),
            gr.Number(label="Accuracy Score"),
            gr.Number(label="Precision Score"),
            gr.Number(label="Recall Score"),
            gr.Number(label="Customer Retention Cost(€)"),
            gr.Number(label="Customer Aquisition Cost(€)"),
            gr.Number(label="Total Amount(€)"),
            gr.Number(label="Amount Saved(€)")
        ]
)

demo.launch()


Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB
Running on local URL:  http://127.0.0.1:7865

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x7f5b7c5f3340>, 'http://127.0.0.1:7865/', None)