# Randomized Search CV

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [2]:
df=pd.read_csv("Churn_Modelling.csv")

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
df.shape

(10000, 14)

In [5]:
# Separating independent and dependent Variable
x=df.iloc[:,3:13].values
y=df.iloc[:,-1].values
y

array([1, 0, 1, ..., 1, 1, 0], dtype=int64)

In [6]:
x

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [7]:
#Encoding Catogerical data into numerical-->Gender Male-1, Female-0
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
x[:,2]=labelencoder.fit_transform(x[:,2])

In [8]:
x

array([[619, 'France', 0, ..., 1, 1, 101348.88],
       [608, 'Spain', 0, ..., 0, 1, 112542.58],
       [502, 'France', 0, ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 0, ..., 0, 1, 42085.58],
       [772, 'Germany', 1, ..., 1, 0, 92888.52],
       [792, 'France', 0, ..., 1, 0, 38190.78]], dtype=object)

In [9]:
# Encoding Geography Categorical data
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct=ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[1])],remainder='passthrough')
x=np.array(ct.fit_transform(x))

In [10]:
x[0]

array([1.0, 0.0, 0.0, 619, 0, 42, 2, 0.0, 1, 1, 1, 101348.88],
      dtype=object)

In [11]:
#Apply Dummy Trap
x = x[: ,1:]
x

array([[0.0, 0.0, 619, ..., 1, 1, 101348.88],
       [0.0, 1.0, 608, ..., 0, 1, 112542.58],
       [0.0, 0.0, 502, ..., 1, 0, 113931.57],
       ...,
       [0.0, 0.0, 709, ..., 0, 1, 42085.58],
       [1.0, 0.0, 772, ..., 1, 0, 92888.52],
       [0.0, 0.0, 792, ..., 1, 0, 38190.78]], dtype=object)

In [12]:
#Train test split
from sklearn.model_selection import train_test_split
x_train , x_test , y_train , y_test = train_test_split(x , y, test_size=0.2 , random_state=0)

In [13]:
#Applying XG boost classifier
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(x_train , y_train)
y_pred=classifier.predict(x_test)
y_pred

array([0, 0, 0, ..., 0, 0, 0])

In [14]:
#Accuracy
from sklearn.metrics import accuracy_score
accuracy_score(y_test , y_pred)

0.8545

In [15]:
#Hyper Parametrer tuning For More Accurate Accuracy
from sklearn.model_selection import RandomizedSearchCV

In [16]:
parameters = {
    'learning_rate':[0.1,0.15,0.2,0.25,0.3],
    'max_depth':[3,4,5,6,7],
    'gamma':[0.0 , 0.1,0.2,0.3, 0.4],
    'min_child_width':[1,2,3,4,5,6]
}

In [17]:
randomcv = RandomizedSearchCV(estimator=classifier , param_distributions=parameters , cv = 10 , n_jobs=-1)

In [18]:
randomcv.fit(x_train , y_train)

Parameters: { "min_child_width" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




In [19]:
randomcv.best_estimator_

In [20]:
randomcv.best_params_

{'min_child_width': 3, 'max_depth': 3, 'learning_rate': 0.2, 'gamma': 0.0}

In [21]:
randomcv.best_score_

0.864625