# ***Importing important Dependancies***

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import minmax_scale
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

### ***DataSet Loading***

In [2]:
df=pd.read_csv('Churn_data.csv')

## ***Exploratory Data Analysis***

In [3]:
# Drop unnecessary columns (if needed)
df = df.drop(['Unnamed: 0', 'state', 'area.code'], axis=1)

In [4]:
# Change of coloumn Data Type object to int
df['day.charge'] = pd.to_numeric(df['day.charge'], errors='coerce')
df['eve.mins'] = pd.to_numeric(df['eve.mins'], errors='coerce')

In [5]:
df.dtypes

account.length      int64
voice.plan         object
voice.messages      int64
intl.plan          object
intl.mins         float64
intl.calls          int64
intl.charge       float64
day.mins          float64
day.calls           int64
day.charge        float64
eve.mins          float64
eve.calls           int64
eve.charge        float64
night.mins        float64
night.calls         int64
night.charge      float64
customer.calls      int64
churn              object
dtype: object

In [6]:
df.columns

Index(['account.length', 'voice.plan', 'voice.messages', 'intl.plan',
       'intl.mins', 'intl.calls', 'intl.charge', 'day.mins', 'day.calls',
       'day.charge', 'eve.mins', 'eve.calls', 'eve.charge', 'night.mins',
       'night.calls', 'night.charge', 'customer.calls', 'churn'],
      dtype='object')

In [7]:
# Convert categorical variables to numerical using Label Encoding
label_encoder = LabelEncoder()
df['voice.plan'] = label_encoder.fit_transform(df['voice.plan'])
df['intl.plan'] = label_encoder.fit_transform(df['intl.plan'])
df['churn'] = label_encoder.fit_transform(df['churn'])

## ***Feature Selection***

In [8]:
# Define features (X) and target variable (y)
X = df.drop('churn', axis=1)
y = df['churn']

In [9]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Impute missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')  # You can choose a different strategy based on your data
X_train_imputed = pd.DataFrame(imputer.fit_transform(X_train), columns=X_train.columns)
X_test_imputed = pd.DataFrame(imputer.transform(X_test), columns=X_test.columns)

### ***Logistic Regression***

In [11]:
from sklearn.linear_model import LogisticRegression
# Logistic Regression Classifier
logreg_classifier = LogisticRegression()

# Train the model
logreg_classifier.fit(X_train_imputed, y_train)

# Predictions
y_pred = logreg_classifier.predict(X_test_imputed)

# Evaluation
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy of Logistic Regression: {accuracy*100}')
print('\nConfusion Matrix:\n', conf_matrix)
print('\nClassification Report:\n', report)

Accuracy of Logistic Regression: 86.0

Confusion Matrix:
 [[850  11]
 [129  10]]

Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.99      0.92       861
           1       0.48      0.07      0.12       139

    accuracy                           0.86      1000
   macro avg       0.67      0.53      0.52      1000
weighted avg       0.81      0.86      0.81      1000



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### ***Random Forest Classifier***

In [12]:
# Initialize the Random Forest Classifier
model1 = RandomForestClassifier(random_state=42)

# Train the model
model1.fit(X_train_imputed, y_train)

# Make predictions on the test set
y_pred = model1.predict(X_test_imputed)

# Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Display results
print(f'Accuracy: {accuracy*100}')
print('\nConfusion Matrix:\n', conf_matrix)
print('\nClassification Report:\n', classification_rep)


Accuracy: 95.8

Confusion Matrix:
 [[851  10]
 [ 32 107]]

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.99      0.98       861
           1       0.91      0.77      0.84       139

    accuracy                           0.96      1000
   macro avg       0.94      0.88      0.91      1000
weighted avg       0.96      0.96      0.96      1000



### ***Support Vector Machine***

In [13]:
from sklearn.svm import SVC
# Initialize the SVM Classifier
model = SVC()

# Train the model
model.fit(X_train_imputed, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_imputed)

# Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Display results
print(f'Accuracy: {accuracy*100}')
print('\nConfusion Matrix:\n', conf_matrix)
print('\nClassification Report:\n', classification_rep)

Accuracy: 86.5

Confusion Matrix:
 [[861   0]
 [135   4]]

Classification Report:
               precision    recall  f1-score   support

           0       0.86      1.00      0.93       861
           1       1.00      0.03      0.06       139

    accuracy                           0.86      1000
   macro avg       0.93      0.51      0.49      1000
weighted avg       0.88      0.86      0.81      1000



### ***Decision Tree Classifier Algorithm***

In [14]:
from sklearn.tree import DecisionTreeClassifier
# Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42)

# Train the model
dt_classifier.fit(X_train_imputed, y_train)

# Predictions
y_pred = dt_classifier.predict(X_test_imputed)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy of Decision Tree Classifier: {accuracy*100}")
print('\nConfusion Matrix:\n', conf_matrix)
print("Classification Report Decision Tree Classifier:\n", report)

Accuracy of Decision Tree Classifier: 93.30000000000001

Confusion Matrix:
 [[829  32]
 [ 35 104]]
Classification Report Decision Tree Classifier:
               precision    recall  f1-score   support

           0       0.96      0.96      0.96       861
           1       0.76      0.75      0.76       139

    accuracy                           0.93      1000
   macro avg       0.86      0.86      0.86      1000
weighted avg       0.93      0.93      0.93      1000



### ***K-Nearest Classifier***

In [15]:
from sklearn.neighbors import KNeighborsClassifier
# K-Nearest Neighbors (KNN) Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)

# Train the model
knn_classifier.fit(X_train_imputed, y_train)

# Predictions
y_pred = knn_classifier.predict(X_test_imputed)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy of K-Nearest Classifier: {accuracy*100}")
print("Classification Report of K-Nearest Classifier:\n", report)

Accuracy of K-Nearest Classifier: 89.3
Classification Report of K-Nearest Classifier:
               precision    recall  f1-score   support

           0       0.90      0.98      0.94       861
           1       0.78      0.32      0.46       139

    accuracy                           0.89      1000
   macro avg       0.84      0.65      0.70      1000
weighted avg       0.88      0.89      0.87      1000



### ***Naives Bayes Classifier***

In [16]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

# Multinomial Naive Bayes Classifier
nb_classifier = MultinomialNB()

# Train the model
nb_classifier.fit(X_train_imputed, y_train)

# Predictions
y_pred = nb_classifier.predict(X_test_imputed)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy of Naives Bayes Classifier: {accuracy*100}")
print("Classification Report Naives Bayes Classifier:\n", report)

Accuracy of Naives Bayes Classifier: 63.7
Classification Report Naives Bayes Classifier:
               precision    recall  f1-score   support

           0       0.92      0.63      0.75       861
           1       0.23      0.68      0.34       139

    accuracy                           0.64      1000
   macro avg       0.58      0.65      0.55      1000
weighted avg       0.83      0.64      0.69      1000



### ***Gaussian Naive Bayes***

In [17]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import GaussianNB

# Gaussian Naive Bayes Classifier
gnb_classifier = GaussianNB()

# Train the model
gnb_classifier.fit(X_train_imputed, y_train)

# Predictions
y_pred = gnb_classifier.predict(X_test_imputed)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy of Gaussian Naive Bayes: {accuracy*100}")
print("Classification Report of Gaussian Naive Bayes:\n", report)

Accuracy of Gaussian Naive Bayes: 85.3
Classification Report of Gaussian Naive Bayes:
               precision    recall  f1-score   support

           0       0.93      0.90      0.91       861
           1       0.48      0.56      0.51       139

    accuracy                           0.85      1000
   macro avg       0.70      0.73      0.71      1000
weighted avg       0.86      0.85      0.86      1000



## ***Deep Learning***

In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report

# Assume X_train, X_test, y_train, and y_test are already loaded and imputed

# Convert labels to one-hot encoding
y_train_encoded = to_categorical(y_train)
y_test_encoded = to_categorical(y_test)

# Min-Max Scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the Neural Network model
model = Sequential()
model.add(Dense(128, input_shape=(X_train_scaled.shape[1],), activation='sigmoid'))
model.add(Dense(64, activation='relu'))
model.add(Dense(len(y_train_encoded[0]), activation='softmax'))  # Adjust the number of output nodes

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_scaled, y_train_encoded, epochs=20, batch_size=32, validation_split=0.1)

# Evaluate the model
_, accuracy = model.evaluate(X_test_scaled, y_test_encoded)
print(f"Accuracy: {accuracy}")

# Predictions
y_pred_encoded = model.predict(X_test_scaled)
y_pred_classes = y_pred_encoded.argmax(axis=1)

# Convert one-hot encoded labels back to original labels
y_test_classes = y_test_encoded.argmax(axis=1)

# Evaluation
accuracy = accuracy_score(y_test_classes, y_pred_classes)
report = classification_report(y_test_classes, y_pred_classes)

print(f"Accuracy of Sequential Model is: {accuracy*100}")
print("Classification Report:\n", report)





Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Accuracy: 0.8610000014305115
Accuracy of Sequential Model is: 86.1
Classification Report:
               precision    recall  f1-score   support

           0       0.86      1.00      0.93       861
           1       0.00      0.00      0.00       139

    accuracy                           0.86      1000
   macro avg       0.43      0.50      0.46      1000
weighted avg       0.74      0.86      0.80      1000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
import pickle
pickle.dump(model1,open('model.pkl','wb'))

In [20]:
import sklearn
print(sklearn.__version__)

1.2.2
