## Import Libraries

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# Import visualization libraries 
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
def ignore_warn(*args, **kwargs):
    pass
warnings.warn = ignore_warn #ignore annoying warning (from matplotlib and seaborn)

%matplotlib inline

# Model building libraries 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
import keras 
from keras.models import Sequential
from keras.layers import Dense
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Loading The Data

> Let's understand our data

In [None]:
# Reading the data into pandas data frame 
df = pd.read_csv('../input/churn-modelling/Churn_Modelling.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

## EDA 

> Let's visualize our data for better understanding 

In [None]:
# Finding Relation between Exited and Gender
# Creating frequency table
freq_table = df.groupby(['Exited']).size().reset_index(name='Count')
freq_table['Male'] = freq_table['Exited'].apply(lambda x : df['Exited'][(df['Exited'] == x) & (df['Gender'] == 'Male')].count())
freq_table['Female'] = freq_table['Exited'].apply(lambda x : df['Exited'][(df['Exited'] == x) & (df['Gender'] == 'Female')].count())

plt.figure(figsize=(12, 5))
sns.countplot(x='Exited', data=df, hue='Gender')
freq_table

In [None]:
# Creating frequency table 
freq_table = df.groupby(['Geography']).size().reset_index(name='Count')
freq_table['0'] = freq_table['Geography'].apply(lambda x : df['Geography'][(df['Geography'] == x) & (df['Exited'] == 0)].count())
freq_table['1'] = freq_table['Geography'].apply(lambda x : df['Geography'][(df['Geography'] == x) & (df['Exited'] == 1)].count())

# Initializing lables and sizes for the pie chart 
labels = ['France', 'Germany', 'Spain']  # names on pie chart 
sizes = [df['Geography'][df['Geography'] == 'France'].count(),df['Geography'][df['Geography'] == 'Germany'].count(), df['Geography'][df['Geography'] == 'Spain'].count()]  
# only "explode" the 2nd and 3rd slice (i.e. 'France', 'Germany')
explode = (0, 0.1, 0.1)
#add colors   
colors = ['#FFA32F','#378AFF', '#93F03B'] 

fig, ax =plt.subplots(nrows=1, ncols=2, figsize=(15, 7))
sns.countplot(x='Geography', data=df, hue='Exited', ax=ax[0])
# Equal aspect ratio ensures that pie is drawn as a circle
ax[1].pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90, textprops={'fontsize': 15, 'fontWeight':'500'})
ax[1].axis('equal')
plt.tight_layout()
freq_table 

In [None]:
# Relation between Geography and Gender
plt.figure(figsize=(15, 5))
sns.countplot(x='Geography', data=df, hue='Gender')

In [None]:
plt.figure(figsize=(10, 7))
df.CreditScore.plot.hist(grid=True, bins=20, rwidth=0.9)
plt.xlabel('CreditScore')
plt.grid(axis='y', alpha=0.75)

In [None]:
# The majority of people taking loan are between 30 - 40 years old 
print('The smallest age is {} and the largest is {}'.format(df.Age.min(), df.Age.max()))
plt.figure(figsize=(10, 5))
sns.distplot(df.Age)

In [None]:
# Corr heatmap
plt.figure(figsize=(15,7))
sns.heatmap(df.corr(), vmin=-1, cmap='coolwarm', annot=True)
df.corr().Exited.sort_values(ascending=False)

## Data Preprocessing 

> Now Let's preprocess our data to be ready for the model

In [None]:
# One hot encoding Geography and Gender features 
geo = pd.get_dummies(df.Geography, drop_first=True)
gender = pd.get_dummies(df.Gender, drop_first=True)
df = pd.concat([geo, gender, df], axis=1)

In [None]:
df.head()

In [None]:
# Drop unnecessary features 
df.drop(['RowNumber', 'CustomerId', 'Surname', 'Geography', 'Gender'], axis=1, inplace=True)

In [None]:
df.head()

In [None]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [None]:
# Spiliting the data into training and testing 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize our data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

## Building Model

> We will build an ANN using Keras 

In [None]:
model = Sequential() #initialize model object

model.add(Dense(32, activation='relu', kernel_initializer='normal', input_dim=11))
model.add(Dense(16, activation='relu', kernel_initializer='normal'))
model.add(Dense(1, activation='sigmoid', kernel_initializer='normal')) 

# Compiling the ANN
model.compile(optimizer='adam', loss= 'binary_crossentropy', metrics=['accuracy'])

In [None]:
# Fitting the model
model.fit(X_train, y_train, batch_size = 20, epochs = 100)

In [None]:
# Predicting the test set
prediction = model.predict(X_test)
prediction = (prediction > 0.5)

In [None]:
#86.15%
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

print('Accuracy: {}% \n'.format(accuracy_score(y_test, prediction)* 100)) # Calculating accuracy 
print('*'*100)
print(confusion_matrix(y_test, prediction))
print(classification_report(y_test, prediction)) 