In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
dat = pd.read_csv("Dataset/Churn-Data.csv")

In [4]:
dat.sample()

Unnamed: 0,cID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,TV_Streaming,Movie_Streaming,Contract,PaperlessBilling,Method_Payment,Charges_Month,TotalCharges,Churn
761,5440-VHLUL,Male,0,No,No,69,Yes,Yes,Fiber optic,No,...,No,Yes,Yes,Yes,One year,Yes,Bank transfer (automatic),105.2,7386.05,No


In [None]:
dat.info()

In [None]:
dat.drop('cID' , axis = 'columns' , inplace = True) #as cid not really imp

In [None]:
dat['gender'] = dat['gender'].map({'Male': 0, 'Female': 1})

In [None]:
dat['Partner'] = dat['Partner'].map({'No': 0 , 'Yes': 1})

In [None]:
dat['PhoneService'] = dat['PhoneService'].map({'No': 0, 'Yes': 1})
dat['Dependents'] = dat['Dependents'].map({'No': 0, 'Yes': 1})

In [None]:
colors = ["#2B299B" , "#3B74C9" , "#85DFE3"]

plt.figure(figsize = (20, 6))

counts = dat["MultipleLines"].value_counts()
explode = (0, 0.1  , 0.1)

counts.plot(kind = 'pie', fontsize = 12, colors = colors, explode = explode, autopct = '%.1f%%')
plt.title('Status')
plt.xlabel('Status', weight = "bold", color = "#2F0F5D", fontsize = 14, labelpad = 20)
plt.ylabel('Count', weight = "bold", color = "#2F0F5D", fontsize = 14, labelpad = 20)
plt.legend(labels = counts.index, loc = "best")
plt.show()

In [None]:
mapping_dict = {'No phone service': 0, 'No': 0, 'Yes': 1}

dat['MultipleLines'] = dat['MultipleLines'].map(mapping_dict)

In [None]:
dat['PhoneService'] = dat['PhoneService'].map({'No': 0, 'Yes': 1})

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [None]:
dat.InternetService = le.fit_transform(dat.InternetService)

dat.Contract = le.fit_transform(dat.Contract)

dat.Method_Payment = le.fit_transform(dat.Method_Payment)

In [None]:
dat.replace('No internet service','No',inplace=True)
dat.replace('No phone service','No',inplace=True)

In [None]:
yes_no_columns = ['Partner','Dependents','PhoneService','MultipleLines','OnlineSecurity','OnlineBackup',
                  'DeviceProtection','TechSupport','TV_Streaming','Movie_Streaming','PaperlessBilling','Churn']
for col in yes_no_columns:
    dat[col].replace({'Yes': 1,'No': 0},inplace=True)

In [None]:
pd.to_numeric(dat.TotalCharges,errors='coerce').isnull().sum()

In [None]:
dat[pd.to_numeric(dat.TotalCharges,errors='coerce').isnull()]

In [None]:
dat.shape

In [None]:
dat = dat[dat.TotalCharges!=' ']

In [None]:
dat.shape

In [None]:
cols_to_scale = ['tenure','Charges_Month','TotalCharges']

from sklearn.preprocessing import MinMaxScaler
dat[cols_to_scale] = MinMaxScaler().fit_transform(dat[cols_to_scale])

In [None]:
dat[cols_to_scale].sample(5)

In [None]:
def summary(dat):
    summary_dat = pd.DataFrame(dat.dtypes, columns=['dtypes'])
    summary_dat['missing#'] = dat.isna().sum()
    summary_dat['missing%'] = (dat.isna().sum())/len(dat)
    summary_dat['unique'] = dat.nunique().values
    summary_dat['count'] = dat.count().values
    return summary_dat

summary(dat)

In [None]:
X = dat.drop('Churn',axis='columns')
y = dat['Churn']

In [None]:

y.sample(5)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=5)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras import Sequential

In [None]:
model = Sequential([
    Dense(15, input_shape=(19,), activation='relu'),
    Dense(15, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, epochs=100)

In [None]:
model.evaluate(X_test, y_test)

In [None]:
yp = model.predict(X_test)

y_pred = []
for element in yp:
    if element > 0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [None]:
def f1_score_tf(y_true, y_pred):

    y_pred_binary = tf.round(y_pred)

    tp = tf.reduce_sum(y_true * y_pred_binary)
    fp = tf.reduce_sum(tf.clip_by_value(y_pred_binary - y_true, 0, 1))
    fn = tf.reduce_sum(tf.clip_by_value(y_true - y_pred_binary, 0, 1))

    precision = tp / (tp + fp + tf.keras.backend.epsilon())
    recall = tp / (tp + fn + tf.keras.backend.epsilon())

    f1 = 2 * (precision * recall) / (precision + recall + tf.keras.backend.epsilon())
    return f1.numpy()

In [None]:
y_true = np.array(y_test)

In [None]:
y_true = tf.constant(y_true)
y_pred = tf.constant(y_true)

In [None]:
y_true = np.array(y_test, dtype='int32')
y_pred = np.array(y_pred, dtype='float32')


In [None]:
f1_tf = f1_score_tf(y_true, y_pred)

In [None]:
print("F1 Score (TensorFlow):", f1_tf)

In [None]:
from tensorflow.keras.models import load_model

In [None]:
model.save('model.h5')