In [1]:
# !pip install gradio

In [2]:
# !pip install xgboost

In [3]:
import gradio as gr

import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# from google.colab import drive
# drive.mount('/content/drive')

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
from joblib import load

In [7]:
# import our test to confirm if our model works correctly
test = pd.read_csv('./X-test.csv')

In [8]:
X_test = pd.read_csv('./X-test.csv')
y_test = pd.read_csv('./y-test.csv')

In [9]:
X_train = pd.read_csv('./X-train.csv')
y_train = pd.read_csv('./y-train.csv')

In [10]:
model = load('./FinalPipeline.pkl')

In [11]:
# Load your dataframe
df = pd.read_csv('./Telco-Customer-Churn.csv')

# 1. Drop specified columns
drop_columns = ['customerID', 'gender', 'PaymentMethod', 'StreamingMovies']
df = df.drop(drop_columns, axis=1)

# 2. Convert TotalCharges columns to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# 3. Scale numeric columns
numeric_features = ['SeniorCitizen', 'MonthlyCharges', 'TotalCharges']
numeric_transformer = StandardScaler()

# 4. Encode categorical columns
categorical_features = ['Dependents', 'PhoneService', 'MultipleLines', 'InternetService',
                        'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
                        'TechSupport', 'StreamingTV', 'Contract', 'PaperlessBilling']
categorical_transformer = OneHotEncoder(sparse=False)

# 5. Encode the target column
target_column = 'Churn'
target_encoder = LabelEncoder()

# 6. Split into X and y
X = df.drop(target_column, axis=1)
y = target_encoder.fit_transform(df[target_column])

# 7. Create the pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# 8. Split data


pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', XGBClassifier())
])

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the pipeline and make predictions
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

y_pred

array([1, 0, 0, ..., 0, 0, 0])

In [12]:
df_ = pd.read_csv('./Telco-Customer-Churn.csv')

drop_columns = ['customerID', 'gender', 'PaymentMethod', 'StreamingMovies', 'Churn']
df_ = df_.drop(drop_columns, axis=1)

In [13]:
df_['TotalCharges'] = pd.to_numeric(df_['TotalCharges'], errors='coerce')

In [14]:
df_.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   SeniorCitizen     7043 non-null   int64  
 1   Partner           7043 non-null   object 
 2   Dependents        7043 non-null   object 
 3   tenure            7043 non-null   int64  
 4   PhoneService      7043 non-null   object 
 5   MultipleLines     7043 non-null   object 
 6   InternetService   7043 non-null   object 
 7   OnlineSecurity    7043 non-null   object 
 8   OnlineBackup      7043 non-null   object 
 9   DeviceProtection  7043 non-null   object 
 10  TechSupport       7043 non-null   object 
 11  StreamingTV       7043 non-null   object 
 12  Contract          7043 non-null   object 
 13  PaperlessBilling  7043 non-null   object 
 14  MonthlyCharges    7043 non-null   float64
 15  TotalCharges      7032 non-null   float64
dtypes: float64(2), int64(2), object(12)
memory

In [15]:
X_test.head(0)

Unnamed: 0,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,Contract,PaperlessBilling,MonthlyCharges,TotalCharges


In [16]:
df_.head(0)

Unnamed: 0,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,Contract,PaperlessBilling,MonthlyCharges,TotalCharges


In [17]:
X_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1409 entries, 185 to 1161
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   SeniorCitizen     1409 non-null   int64  
 1   Partner           1409 non-null   object 
 2   Dependents        1409 non-null   object 
 3   tenure            1409 non-null   int64  
 4   PhoneService      1409 non-null   object 
 5   MultipleLines     1409 non-null   object 
 6   InternetService   1409 non-null   object 
 7   OnlineSecurity    1409 non-null   object 
 8   OnlineBackup      1409 non-null   object 
 9   DeviceProtection  1409 non-null   object 
 10  TechSupport       1409 non-null   object 
 11  StreamingTV       1409 non-null   object 
 12  Contract          1409 non-null   object 
 13  PaperlessBilling  1409 non-null   object 
 14  MonthlyCharges    1409 non-null   float64
 15  TotalCharges      1408 non-null   float64
dtypes: float64(2), int64(2), object(12)
memo

In [18]:
df_.isin(['Partner']).any().any()


False

In [19]:
# pipeline.fit_transform(df_)

In [20]:
y_pred2 = pipeline.predict(df_)

In [21]:
pipeline

In [22]:
model2 = pipeline.fit(X_train, y_train)

In [23]:
model2.predict(X_test)

array([1, 0, 0, ..., 0, 0, 0])

# **GRADIO BUILD**

In [32]:
# since the output is a 0 or a 1 , we will have a classifier function to
def classifier_1(result):
    if result== 0:
        return "Customer will churn"
    else:
        return "Customer will not churn"

In [33]:
# Get the inputs our model will accept

for column in X_test.columns:
  print (column)

SeniorCitizen
Partner
Dependents
tenure
PhoneService
MultipleLines
InternetService
OnlineSecurity
OnlineBackup
DeviceProtection
TechSupport
StreamingTV
Contract
PaperlessBilling
MonthlyCharges
TotalCharges


In [34]:
X_test.columns

Index(['SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService',
       'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
       'DeviceProtection', 'TechSupport', 'StreamingTV', 'Contract',
       'PaperlessBilling', 'MonthlyCharges', 'TotalCharges'],
      dtype='object')

In [35]:
# Take in user inputs
def main_function(SeniorCitizen, Partner, Dependents, tenure, PhoneService, MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, StreamingTV, Contract, PaperlessBilling, MonthlyCharges, TotalCharges):
    inputs = [SeniorCitizen, Partner, Dependents, tenure, PhoneService, MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, StreamingTV, Contract, PaperlessBilling, MonthlyCharges, TotalCharges]

    input_df = pd.DataFrame([inputs], columns=['SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'Contract', 'PaperlessBilling', 'MonthlyCharges', 'TotalCharges'])

    model22 = pipeline.fit(X_train, y_train)
    pred = model22.predict(input_df)

    output = classifier_1(pred[0])

    if output == "Customer will churn":
        return [(0, output)]
    else:
        return [(1, output)]

# Define your outputs
output = gr.outputs.HighlightedText(color_map={
    "Customer will churn": "green",
    "Customer will not churn": "red"
})



In [36]:
X_test['TotalCharges'].min() , X_test['TotalCharges'].max()

(18.9, 8672.45)

In [29]:
# X_test[""].value_counts()

In [37]:
# Create the Gradio interface
interface = gr.Interface(
    fn=main_function,
    inputs=[
        gr.inputs.Dropdown([0, 1], label="Is the customer a senior citizen? Select 1 for YES and 0 for NO"),
        gr.inputs.Dropdown(['No', 'Yes'], label='Partner'),
        gr.inputs.Dropdown(['No', 'Yes'], label='Dependents'),
        gr.inputs.Slider(label="Tenure: How long have you been using Vodafone?"),
        gr.inputs.Dropdown(['No', 'Yes'], label='Phone Service'),
        gr.inputs.Radio(['Yes', 'No', 'No Phone service'], label="Multiple Lines"),
        gr.inputs.Dropdown(['DSL', 'Fiber optic', 'No'], label='Internet Service'),
        gr.inputs.Radio(['Yes', 'No', 'No Online Security'], label="Online Security"),
        gr.inputs.Radio(['Yes', 'No', 'Online Backup'], label="Online Backup"),
        gr.inputs.Radio(['Yes', 'No', 'No Device Protection'], label="Device Protection"),
        gr.inputs.Radio(['Yes', 'No', 'Tech Support'], label="Tech Support"),
        gr.inputs.Radio(['Yes', 'No', 'No TV streaming'], label="TV Streaming"),
        gr.inputs.Dropdown(['Month-to-month', 'One year', 'Two year'], label='Contract'),
        gr.inputs.Dropdown(['No', 'Yes'], label='Paperless Billing'),
        gr.inputs.Slider(minimum=18, maximum=120, label="Monthly Charges"),
        gr.inputs.Slider(minimum=18, maximum=8675, label="Total Charges")
    ],
    outputs= output,
    title="Team Paris Customer Churn Prediction App",
    description="Let's Get Started With Some Predictions!"
)

interface.launch()



Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




In [38]:
X_test.head()

Unnamed: 0,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,Contract,PaperlessBilling,MonthlyCharges,TotalCharges
185,0,Yes,No,1,No,No phone service,DSL,No,No,No,No,No,Month-to-month,Yes,24.8,24.8
2715,0,No,No,41,Yes,Yes,No,No internet service,No internet service,No internet service,No internet service,No internet service,Month-to-month,Yes,25.25,996.45
3825,0,Yes,Yes,52,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,Two year,No,19.35,1031.7
1807,0,No,No,1,Yes,No,Fiber optic,No,No,Yes,No,No,Month-to-month,No,76.35,76.35
132,0,No,No,67,Yes,No,DSL,No,No,No,Yes,No,Two year,No,50.55,3260.1
