### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')

In [None]:
tf.__version__

### Data Preprocessing

##### Importing the dataset

In [None]:
df = pd.read_csv('/kaggle/input/churn-modelling/Churn_Modelling.csv')
X = df.iloc[:, 3:-1]
y = df.iloc[:, -1]

In [None]:
X.head(2)

### Label Encoding Ordinal features or features with only 2 levels

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X['Gender'] = le.fit_transform(X["Gender"])

### One Hot Encoding the Nominal features

In [None]:
# from sklearn.compose import ColumnTransformer
# from sklearn.preprocessing import OneHotEncoder
# ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
# X = np.array(ct.fit_transform(X))

dumm = pd.get_dummies(X['Geography'])



### If get_dummies method is used, we have to concatenate the actual dataframe and the dataframe with the dummies of the categorical features.

### If OneHotEncoder was used, we need not concatenate as it will modify the actual dataframe.

In [None]:
X = pd.concat([X,dumm],axis = 1)

In [None]:
X = X.drop("Geography",1)

### Splitting the data into Train and Test

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

### As Neural networks work on the basis of the product of the weights and the actual wegights, it is absolutely mandatory to scale the data.

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### ANN

##### Initializing ANN with input parameters

In [None]:
n = int(input("Enter number of layers: "))
neuron = int(input("Enter the number of neurons per layer: "))

ann = tf.keras.models.Sequential()
for i in range(1,n+1):
  ann.add(tf.keras.layers.Dense(units = neuron, activation='relu'))

ann.add(tf.keras.layers.Dense(units = 1, activation='sigmoid'))

ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

### Defining number of epochs and training the data

In [None]:
epoch = int(input("Enter the number of epochs: "))
ann.fit(X_train,y_train, batch_size=32, epochs = epoch)

### <u>Checking the performance by comparing the predicted and actual results</u>

In [None]:
y_pred = ann.predict(X_test)

In [None]:
y_test = np.array(y_test).reshape(-1,1)

##### As the resultant values are between 0 and 1 (due to the sigmoid activation function), we have to define a threshold to convert a value to either 0 or 1.

In [None]:
from sklearn.preprocessing import binarize
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score,f1_score
for i in range(1,11):
    y_pred2=binarize(y_pred,i/10)[:]
    cm2=confusion_matrix(y_test,y_pred2)
    print ('With',i/10,'threshold the Confusion Matrix is ','\n',cm2,'\n',
            'with',cm2[0,0]+cm2[1,1],'correct predictions and',cm2[1,0],'Type II errors( False Negatives)','\n\n',
          'Sensitivity: ',cm2[1,1]/(float(cm2[1,1]+cm2[1,0])),'Specificity: ',cm2[0,0]/(float(cm2[0,0]+cm2[0,1])),'\n\n\n')
    print('The accuracy score is: ',accuracy_score(y_test,y_pred2))
    print('The f1 score is: ',f1_score(y_test,y_pred2))
    print('\n')

Threshold of 0.5 yields the best result. Hence,
- [values >= 0.5] = 1 
- [values < 0.5] = 0

The resultant values can be used as the predicted output from the model.

In [None]:
y_pred2=binarize(y_pred,0.5)[:]

In [None]:
df.head(1)

In [None]:
X_test[0]

### Checking with the customer details, whether the customer will churn or not!

In [None]:
result = ann.predict(sc.transform([[600,0,40,3,60000,2,1,1,50000,1,0,0]]))

In [None]:
result_=binarize(result,0.5)[:]

In [None]:
result_

In [None]:
print("As the value of the prediction is ",int(result_[0])," we can retain the customer as he is not likely to switch")

## <u>NOTE</u>
#### The number of layers and the number of neuron per layer depends on the problem and in this case, utilizing more number of Layers resulted in the model Over fitting to the Training data.