In [55]:
# Import Libraries 
import joblib
import pandas as pd
import numpy as np
import gradio as gr

In [71]:
# Load the preprocessor, the imputer and the Random Forest Classifier model
preprocessor = joblib.load("../model/preprocessor.joblib")
imputer = joblib.load("../model/imputer.joblib")
model = joblib.load("../model/rf_model.joblib")
# data_pre = joblib.load('../model/data_preprocessor.joblib', module=__import__('../notebook/lp2 - customer churn classification.ipynb'))

In [6]:
test = pd.read_excel('../notebook/data/Telco-churn-second-2000.xlsx')

In [7]:
expected_inputs = ["gender", "SeniorCitizen", "Partner", "Dependents", "tenure", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", "MonthlyCharges", "TotalCharges"]

In [8]:
# def churn_predict(*args):
#     print(args)

#     # Convert the tuple to a DataFrame
#     df = pd.DataFrame([args], columns=expected_inputs)

#     preprocessed_df = preprocessor.transform(df)

#     pred = model.predict(preprocessed_df)[0]
#     return pred, df

In [9]:
def churn_predict(*args):
    print(args)

    # Convert the tuple to a Dataframe
    output_dataframe = pd.DataFrame([args], columns=expected_inputs)
    return output_dataframe

In [10]:
# Define some variable limits and lists of options
max_tenure = 1.61803398875 * 72 # Applied the Golden Ratio to the maximum value from the training data to leave room for increased customer tenures while still ensuring a limit on the possible inputs. 
max_monthly_charges = 1.61803398875 * 200 # Applied the Golden Ratio to the maximum amount of monthly charges from the training data to leave room for increased amounts while still ensuring a limit on the possible inputs. 
max_total_charges = 1.61803398875 * 8684.8 # Applied the Golden Ratio to the maximum amount of total charges from the training data to leave room for increased amounts while still ensuring a limit on the possible inputs. 
yes_or_no = ["Yes", "No"] # To be used for the variables whose possible options are "Yes" or "No".
internet_service_choices = ["Yes", "No", "No internet service"] # A variable for the choices available for the "Internet Service" variable


In [11]:
# ----- App Interface
with gr.Blocks() as app:

    # Title
    gr.Markdown("# ChurnPredict Pro")
    
    # About app and Data dictionary
    with gr.Row():
        # Expander for more info on columns
    
        gr.Markdown("""This app uses a machine learning model to predict whether or not a customer will churn based on inputs made by you, the user. The (XGBoost) model was trained and built based on the Telecom Churn Dataset. You may refer to the expander at the bottom for more information on the inputs.
                    """)
        # Expander for more info on columns
        with gr.Accordion("Open for information on inputs"):
            gr.Markdown("""This app receives the following as inputs and processes them to return the prediction on whether a customer, given the inputs, will churn or not.
                    - Contract: The contract term of the customer (Month-to-Month, One year, Two year)
                    - Dependents: Whether the customer has dependents or not (Yes, No)
                    - DeviceProtection: Whether the customer has device protection or not (Yes, No, No internet service)
                    - Gender: Whether the customer is a male or a female
                    - InternetService: Customer's internet service provider (DSL, Fiber Optic, No)
                    - MonthlyCharges: The amount charged to the customer monthly
                    - MultipleLines: Whether the customer has multiple lines or not
                    - OnlineBackup: Whether the customer has online backup or not (Yes, No, No Internet)
                    - OnlineSecurity: Whether the customer has online security or not (Yes, No, No Internet)
                    - PaperlessBilling: Whether the customer has paperless billing or not (Yes, No)
                    - Partner: Whether the customer has a partner or not (Yes, No)
                    - Payment Method: The customer's payment method (Electronic check, mailed check, Bank transfer(automatic), Credit card(automatic))
                    - Phone Service: Whether the customer has a phone service or not (Yes, No)
                    - SeniorCitizen: Whether a customer is a senior citizen or not
                    - StreamingMovies: Whether the customer has streaming movies or not (Yes, No, No Internet service)
                    - StreamingTV: Whether the customer has streaming TV or not (Yes, No, No internet service)
                    - TechSupport: Whether the customer has tech support or not (Yes, No, No internet)
                    - Tenure: Number of months the customer has stayed with the company
                    - TotalCharges: The total amount charged to the customer
                    """)

    # Phase 1: Receiving Inputs
    gr.Markdown("**Demographic Data**")
    with gr.Row():
        gender = gr.Dropdown(label="Gender", choices=["Female", "Male"], value="Female")
        SeniorCitizen = gr.Radio(label="Senior Citizen", choices=yes_or_no, value="No")
        Partner = gr.Radio(label="Partner", choices=yes_or_no, value="No")
        Dependents = gr.Radio(label="Dependents", choices=yes_or_no, value="No")

    with gr.Row():
        with gr.Column():
            gr.Markdown("**Contract and Tenure Data**")
            Contract = gr.Dropdown(label="Contract", choices=["Month-to-month", "One year", "Two year"], value="Month-to-month")
            tenure = gr.Slider(label="Tenure (months)", minimum=1, step=1, interactive=True, value=1, maximum= max_tenure)
        with gr.Column():
            gr.Markdown("**Phone Service Usage**")
            PhoneService = gr.Radio(label="Phone Service", choices=yes_or_no, value="Yes")
            MultipleLines = gr.Dropdown(label="Multiple Lines", choices=["Yes", "No", "No phone service"], value="No")

    # Internet Service Usage
    gr.Markdown("**Internet Service Usage**")
    with gr.Row():
        InternetService = gr.Dropdown(label="Internet Service", choices=["DSL", "Fiber optic", "No"], value="Fiber optic")
        OnlineSecurity = gr.Dropdown(label="Online Security", choices=internet_service_choices, value="No")
        OnlineBackup = gr.Dropdown(label="Online Backup", choices=internet_service_choices, value="No")
        DeviceProtection = gr.Dropdown(label="Device Protection", choices=internet_service_choices, value="No")
        TechSupport = gr.Dropdown(label="Tech Support", choices=internet_service_choices, value="No")
        StreamingTV = gr.Dropdown(label="TV Streaming", choices=internet_service_choices, value="No")
        StreamingMovies = gr.Dropdown(label="Movie Streaming", choices=internet_service_choices, value="No")

    # Billing and Payment
    gr.Markdown("**Charges (USD), Billing and Payment**")
    with gr.Row():
        MonthlyCharges = gr.Slider(label="Monthly Charges", step=0.05, maximum=max_monthly_charges)
        TotalCharges = gr.Slider(label="Total Charges", step=0.05, maximum=max_total_charges)
        PaperlessBilling = gr.Radio(label="Paperless Billing", choices=yes_or_no, value="Yes")
        PaymentMethod = gr.Dropdown(label="Payment Method", choices=["Electronic check", "Mailed check", "Bank transfer (automatic)", "Credit card (automatic)"], value="Electronic check")

    # Output Prediction
    # output = gr.Label("Awaiting Submission...")
    output = gr.Dataframe(headers=expected_inputs)
    submit_button = gr.Button("Submit")
    
    
    submit_button.click(fn = churn_predict,
                        outputs = output,
                        inputs = [gender, SeniorCitizen, Partner, Dependents, tenure, PhoneService, MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod, MonthlyCharges, TotalCharges])

In [12]:
app.launch( share=True, debug=True)

Running on local URL:  http://127.0.0.1:7861

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


('Male', 'Yes', 'No', 'No', 24, 'Yes', 'Yes', 'DSL', 'No', 'Yes', 'No', 'No', 'Yes', 'No', 'Month-to-month', 'No', 'Mailed check', 155.9, 5676.25)
Keyboard interruption in main thread... closing server.




In [65]:
df = pd.DataFrame([('Male', 'Yes', 'No', 'No', 24, 'Yes', 'Yes', 'DSL', 'No', 'Yes', 'No', 'No', 'Yes', 'No', 'Month-to-month', 'No', 'Mailed check', 155.9, 5676.25)], columns=expected_inputs)

df

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
0,Male,Yes,No,No,24,Yes,Yes,DSL,No,Yes,No,No,Yes,No,Month-to-month,No,Mailed check,155.9,5676.25


In [66]:
df['SeniorCitizen'] = df['SeniorCitizen'].replace(['Yes', 'No'], ['1', '0'])

In [67]:
df['SeniorCitizen']

0    1
Name: SeniorCitizen, dtype: object

In [68]:
df.dtypes

gender               object
SeniorCitizen        object
Partner              object
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges        float64
dtype: object

In [72]:
data = pd.read_csv('../notebook/data/sample.csv')

data

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
0,Female,0,No,No,5,Yes,No,Fiber optic,No,No,No,Yes,No,Yes,Month-to-month,Yes,Credit card (automatic),85.4,425.9


In [73]:
x = preprocessor.transform(df)

In [74]:

print(x)

[[-0.3188839   3.05239766  1.52430216  0.          1.          0.
   0.          1.          0.          1.          0.          0.
   1.          0.          0.          1.          1.          0.
   0.          1.          0.          0.          0.          0.
   1.          1.          0.          0.          1.          0.
   0.          0.          0.          1.          1.          0.
   0.          1.          0.          0.          1.          0.
   0.          0.          0.          1.        ]]


In [46]:
preprocessed_df = preprocessor.transform(df)

ValueError: Found unknown categories ['1'] in column 1 during transform

In [53]:
import joblib

# Define a function
def my_function(x):
  return x * 2

# Serialize the function
joblib.dump(my_function, 'my_function.joblib')

# Load the function
loaded_function = joblib.load('my_function.joblib')

# Call the loaded function
result = loaded_function(10)

# Print the result
print(result)

20
