In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

import tensorflow as tf

# Importing The data

In [None]:
df = pd.read_csv('../input/churn-modelling/Churn_Modelling.csv')
df.head()

# Basic Data Exploration

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
corr = df.corr()
corr

In [None]:
fig,ax = plt.subplots(figsize = (15,15))
ax = sns.heatmap(corr,
                 annot = True,
                 linewidths = 1.0,
                 fmt = '.2f',
                 cmap = 'YlGnBu');

In [None]:
df.head()

# Dropping some Columns


In [None]:
df = df.drop(['RowNumber','CustomerId','Surname'],axis = 1)

In [None]:
df.head()

## Visualizing some columns

In [None]:
sns.countplot(df.Geography)

In [None]:
sns.countplot(df.Gender)

In [None]:
df['Age'].sort_values()

In [None]:
df.info()

## Converting non-numeric columns into Numeric

In [None]:
for label, content in df.items():
    if not pd.api.types.is_numeric_dtype(content):
        df[label] = content.astype('category').cat.as_ordered()
        df[label] = pd.Categorical(content).codes

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df['Geography'].value_counts()

In [None]:
df['Gender'].value_counts()

# Splitting the data

In [None]:
x = df.drop('Exited',axis = 1)
y = df['Exited']

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y, test_size = 0.2)

# Building Models

# RandomForest

In [None]:
ran_model = RandomForestClassifier(n_estimators = 1000,
                                   random_state = 42)
ran_model.fit(x_train,y_train)

In [None]:
ran_preds = ran_model.predict(x_test)

In [None]:
ran_model.score(x_test,y_test)

In [None]:
print(classification_report(y_test,ran_preds))

In [None]:
conf_mat = confusion_matrix(y_test,ran_preds)
conf_mat

In [None]:
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(conf_mat,
            annot=True,
            linewidths=0.5,
            linecolor="red",
            fmt=".0f",
            ax=ax)
plt.xlabel("Predicted Values")
plt.ylabel("True Values")
plt.show();

# ANN

Standardizing the data coz it is important for neural networks

In [None]:
sc = StandardScaler()
ann_train = sc.fit_transform(x_train)
ann_test = sc.fit_transform(x_test)

In [None]:
# Initializing the ANN
ann = tf.keras.models.Sequential()

# Adding the input layer and the first hidden layer
ann.add(tf.keras.layers.Dense(units=32, activation='relu'))

# Adding the second hidden layer
ann.add(tf.keras.layers.Dense(units=32, activation='relu'))

# Adding the output layer
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [None]:
# Compiling the ANN
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
# Training the ANN on the Training set
ann.fit(ann_train, y_train, batch_size = 32, epochs = 100)

In [None]:
# Predicting the Test set results
ann_pred = ann.predict(ann_test)
ann_pred = (ann_pred > 0.5)


# Making the Confusion Matrix
cm = confusion_matrix(y_test, ann_pred)
print(cm)
accuracy_score(y_test, ann_pred)

In [None]:
print(classification_report(y_test,ann_pred))

In [None]:
fig, ax = plt.subplots(figsize=(5,5))
sns.heatmap(cm,
            annot=True,
            linewidths=0.5,
            linecolor="red",
            fmt=".0f",
            ax=ax)
plt.xlabel("Predicted Values")
plt.ylabel("True Values")
plt.show();

# Plotting the scores 

In [None]:
scores = pd.DataFrame({'RandomForest': ran_model.score(x_test,y_test),
                       'ANN': accuracy_score(y_test, ann_pred)},
                        index = [0])

In [None]:
scores.T.plot(kind = 'bar',
              figsize = (10,10))
plt.title('Scores of all Model')
plt.xlabel('Model Name')
plt.ylabel('Scores');
 

# Plz Upvote If You Like This Notebook.