In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

In [3]:
dataset = pd.read_csv('/content/Churn_Modelling.csv')

In [4]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
dataset.isna().sum()

Unnamed: 0,0
RowNumber,0
CustomerId,0
Surname,0
CreditScore,0
Geography,0
Gender,0
Age,0
Tenure,0
Balance,0
NumOfProducts,0


In [6]:
X = dataset.iloc[: , 3:13].values

In [7]:
y=dataset.iloc[: , -1].values

In [8]:
X[0]

array([619, 'France', 'Female', 42, 2, 0.0, 1, 1, 1, 101348.88],
      dtype=object)

In [9]:
from sklearn.preprocessing import LabelEncoder

In [10]:
labelencoder = LabelEncoder()
X[: , 2] = labelencoder.fit_transform(X[: , 2])

In [11]:
X[0]

array([619, 'France', 0, 42, 2, 0.0, 1, 1, 1, 101348.88], dtype=object)

In [12]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

In [13]:
ct = ColumnTransformer(transformers = [('encoder' , OneHotEncoder() , [1])] , remainder='passthrough')

In [14]:
X = np.array(ct.fit_transform(X))

In [15]:
X[0]

array([1.0, 0.0, 0.0, 619, 0, 42, 2, 0.0, 1, 1, 1, 101348.88],
      dtype=object)

In [16]:
X = X[: ,1:] #to avoid dummy variable trap

In [17]:
X[0]

array([0.0, 0.0, 619, 0, 42, 2, 0.0, 1, 1, 1, 101348.88], dtype=object)

In [18]:
from sklearn.model_selection import train_test_split

In [19]:
x_train , x_test , y_train , y_test = train_test_split(X , y, test_size=0.2 , random_state=0)

In [22]:
!pip install xgboost # install the xgboost library
from xgboost import XGBClassifier # import the XGBClassifier class



In [23]:
classifier = XGBClassifier()
classifier.fit(x_train , y_train)

In [24]:
y_pred = classifier.predict(x_test)

In [25]:
from sklearn.metrics import accuracy_score

In [26]:
accuracy_score(y_test , y_pred)

0.853

#GridSearch CV

In [27]:
from sklearn.model_selection import GridSearchCV

In [28]:
parameters = {
    'learning_rate':[0.1,0.15,0.2,0.25,0.3],
    'max_depth':[3,4,5,6,7],
    'gamma':[0.0 , 0.1,0.2,0.3, 0.4],
    'min_child_width':[1,2,3,4,5,6]
}

In [29]:
gridsearch = GridSearchCV(estimator=classifier , param_grid=parameters , scoring='neg_log_loss' , cv=10 , n_jobs=-1)

In [30]:
gridsearch = gridsearch.fit(x_train , y_train)

Parameters: { "min_child_width" } are not used.



In [31]:
gridsearch.best_estimator_

In [32]:
gridsearch.best_params_

{'gamma': 0.4, 'learning_rate': 0.15, 'max_depth': 3, 'min_child_width': 1}

In [33]:
gridsearch.best_score_

-0.33292938949717243

#Randomized Search CV

In [34]:
from sklearn.model_selection import RandomizedSearchCV

In [35]:
parameters = {
    'learning_rate':[0.1,0.15,0.2,0.25,0.3],
  'gamma':[0,0.1,0.2,0.3,0.4],
  'max_depth':[3,4,5,6,7],
  'min_child_weight':[1,2,3,4,5,6]
}

In [36]:
randomcv = RandomizedSearchCV(estimator=classifier , param_distributions=parameters , cv = 10 , n_jobs=-1)

In [37]:
randomcv.fit(x_train , y_train)

In [38]:
randomcv.best_estimator_

In [39]:
randomcv.best_params_

{'min_child_weight': 2, 'max_depth': 3, 'learning_rate': 0.25, 'gamma': 0.3}

In [40]:
randomcv.best_score_

0.86425