### Importing Libraries

In [1]:
#Importing required libraries
import pandas as pd 
import numpy as np

import warnings
warnings.filterwarnings("ignore")

### Loading the dataset

In [2]:
#reading the data
data=pd.read_csv('data_cleaned.csv')

In [3]:
data.shape

(891, 25)

In [4]:
#first five rows of the data
data.head()

Unnamed: 0,Survived,Age,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,SibSp_0,SibSp_1,...,Parch_0,Parch_1,Parch_2,Parch_3,Parch_4,Parch_5,Parch_6,Embarked_C,Embarked_Q,Embarked_S
0,0,22.0,7.25,0,0,1,0,1,0,1,...,1,0,0,0,0,0,0,0,0,1
1,1,38.0,71.2833,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,1,0,0
2,1,26.0,7.925,0,0,1,1,0,1,0,...,1,0,0,0,0,0,0,0,0,1
3,1,35.0,53.1,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,1
4,0,35.0,8.05,0,0,1,0,1,1,0,...,1,0,0,0,0,0,0,0,0,1


### Separating independent and dependent variables.

In [5]:
#independent variables
x = data.drop(['Survived'], axis=1)

#dependent variable
y = data['Survived']

### Creating the train and test dataset

In [6]:
#import the train-test split
from sklearn.model_selection import train_test_split

In [7]:
#divide into train and test sets
train_x,test_x,train_y,test_y = train_test_split(x,y, random_state = 101, stratify=y)

### Building a Decision Tree Model

In [8]:
#Importing Decision Tree Classifier 
from sklearn.tree import DecisionTreeClassifier

In [9]:
#creating a decision tree instance
clf = DecisionTreeClassifier(random_state=101)

In [10]:
clf.fit(train_x, train_y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=101, splitter='best')

In [11]:
clf.score(test_x, test_y)

0.7533632286995515

### Implementing Gridsearch

In [12]:
parameter_grid = {
    'max_depth' : [4,5,6,7],
    'max_features': [0.5, 0.7]
    }

In [13]:
from sklearn.model_selection import GridSearchCV
gridsearch = GridSearchCV(estimator=clf, param_grid=parameter_grid, scoring='neg_mean_squared_error', cv=5)

In [14]:
gridsearch.fit(train_x, train_y)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=DecisionTreeClassifier(class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features=None,
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              presort=False, random_state=101,
                                              splitter='best'),
             iid='warn', n_jobs=None,
             param_grid={'max_depth': [4, 5, 6, 7], 'max_features': [0.5, 0.7]},
             pre_dispatch='2*n_jobs', refit=True, return

In [15]:
gridsearch.best_params_

{'max_depth': 4, 'max_features': 0.7}

In [16]:
#creating a decision tree instance with new hyperparameter values
clf = DecisionTreeClassifier(random_state=101, max_depth=4, max_features=0.7)

In [17]:
clf.fit(train_x, train_y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
                       max_features=0.7, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=101, splitter='best')

In [18]:
clf.score(test_x, test_y)

0.7892376681614349

### Implementing Random Search

In [19]:
from sklearn.model_selection import RandomizedSearchCV

In [20]:
parameter_grid = {
    'max_depth' : [4,5,6,7,8,9],
    'max_features': [0.3,0.5, 0.7,0.9]
    }

In [21]:
randomsearch = RandomizedSearchCV(estimator=clf, param_distributions=parameter_grid, n_iter= 10, cv=5)

In [22]:
randomsearch.fit(train_x, train_y)

RandomizedSearchCV(cv=5, error_score='raise-deprecating',
                   estimator=DecisionTreeClassifier(class_weight=None,
                                                    criterion='gini',
                                                    max_depth=4,
                                                    max_features=0.7,
                                                    max_leaf_nodes=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    presort=False,
                                                    random_state=101,
                                                    splitter='best'),
   

In [23]:
randomsearch.best_params_

{'max_features': 0.3, 'max_depth': 9}

In [27]:
#creating a decision tree instance with random search hyperparameters
clf = DecisionTreeClassifier(random_state=101, max_depth=9, max_features=0.3)

In [28]:
clf.fit(train_x, train_y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=9,
                       max_features=0.3, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=101, splitter='best')

In [29]:
clf.score(test_x, test_y)

0.7713004484304933