# Titanic Project

### Import of Various Libraries

In [9]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd

In [10]:
df = sns.load_dataset('titanic')

In [11]:
df.shape

(891, 15)

In [12]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


### Exploratory Data Analysis - EDA

In [13]:
df.shape

(891, 15)

In [14]:
A = df[['survived','pclass', 'sex', 'age']]

In [15]:
A.shape

(891, 4)

In [16]:
A.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [17]:
A.shape

(714, 4)

In [18]:
X = A[['pclass', 'sex', 'age']]

In [19]:
y = A['survived']

In [20]:
X.shape

(714, 3)

In [21]:
y.shape

(714,)

In [22]:
X.describe()

Unnamed: 0,pclass,age
count,714.0,714.0
mean,2.236695,29.699118
std,0.83825,14.526497
min,1.0,0.42
25%,1.0,20.125
50%,2.0,28.0
75%,3.0,38.0
max,3.0,80.0


In [23]:
X.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 714 entries, 0 to 890
Data columns (total 3 columns):
pclass    714 non-null int64
sex       714 non-null object
age       714 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 22.3+ KB


### Data Preprocessing

In [24]:
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()

In [25]:
X['sex'] = lb.fit_transform(X['sex'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [26]:
X.head()

Unnamed: 0,pclass,sex,age
0,3,1,22.0
1,1,0,38.0
2,3,0,26.0
3,1,0,35.0
4,3,1,35.0


In [27]:
y.value_counts()

0    424
1    290
Name: survived, dtype: int64

### Decision Tree Classifier

In [28]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier

  from numpy.core.umath_tests import inner1d


In [29]:
from sklearn.model_selection import train_test_split

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [31]:
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

### Model Performance Evaluation:

def print_score(clf, X_train, y_train, X_test, y_test, train=True):
    '''
    print the accuracy score, classification report and confusion matrix of classifier
    '''
    if train:
        '''
        training performance
        '''
        print("Train Result:\n")
        print("accuracy score: {0:.4f}\n".format(accuracy_score(y_train, clf.predict(X_train))))
        print("Classification Report: \n {}\n".format(classification_report(y_train, clf.predict(X_train))))
        print("Confusion Matrix: \n {}\n".format(confusion_matrix(y_train, clf.predict(X_train))))

        res = cross_val_score(clf, X_train, y_train, cv=10, scoring='accuracy')
        print("Average Accuracy: \t {0:.4f}".format(np.mean(res)))
        print("Accuracy SD: \t\t {0:.4f}".format(np.std(res)))
        
    elif train==False:
        '''
        test performance
        '''
        print("Test Result:\n")        
        print("accuracy score: {0:.4f}\n".format(accuracy_score(y_test, clf.predict(X_test))))
        print("Classification Report: \n {}\n".format(classification_report(y_test, clf.predict(X_test))))
        print("Confusion Matrix: \n {}\n".format(confusion_matrix(y_test, clf.predict(X_test))))    

In [33]:
clf = DecisionTreeClassifier(random_state=42)

In [34]:
clf.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=42,
            splitter='best')

In [35]:
print_score(clf, X_train, y_train, X_test, y_test, train=True)

Train Result:

accuracy score: 0.9158

Classification Report: 
              precision    recall  f1-score   support

          0       0.88      0.99      0.93       299
          1       0.98      0.81      0.88       200

avg / total       0.92      0.92      0.91       499


Confusion Matrix: 
 [[296   3]
 [ 39 161]]

Average Accuracy: 	 0.8054
Accuracy SD: 		 0.0700


In [36]:
print_score(clf, X_train, y_train, X_test, y_test, train=False)

Test Result:

accuracy score: 0.7721

Classification Report: 
              precision    recall  f1-score   support

          0       0.77      0.86      0.82       125
          1       0.77      0.64      0.70        90

avg / total       0.77      0.77      0.77       215


Confusion Matrix: 
 [[108  17]
 [ 32  58]]



### Bagging Classifier

In [37]:
bag_clf = BaggingClassifier(base_estimator=clf, n_estimators=1000,
                           bootstrap=True, n_jobs=-1,
                           random_state=42)

In [38]:
bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=42,
            splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=1000, n_jobs=-1, oob_score=False,
         random_state=42, verbose=0, warm_start=False)

### Model Performance Evaluation:

In [39]:
print_score(bag_clf, X_train, y_train, X_test, y_test, train = True)

Train Result:

accuracy score: 0.9158

Classification Report: 
              precision    recall  f1-score   support

          0       0.91      0.95      0.93       299
          1       0.92      0.86      0.89       200

avg / total       0.92      0.92      0.92       499


Confusion Matrix: 
 [[285  14]
 [ 28 172]]

Average Accuracy: 	 0.8014
Accuracy SD: 		 0.0736


In [40]:
print_score(bag_clf, X_train, y_train, X_test, y_test, train=False)

Test Result:

accuracy score: 0.7860

Classification Report: 
              precision    recall  f1-score   support

          0       0.80      0.84      0.82       125
          1       0.76      0.71      0.74        90

avg / total       0.78      0.79      0.78       215


Confusion Matrix: 
 [[105  20]
 [ 26  64]]



### Ensemble - Random Forest Classifier

In [41]:
from sklearn.ensemble import RandomForestClassifier

In [42]:
from sklearn.model_selection import train_test_split

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [44]:
def print_score(clf, X_train, y_train, X_test, y_test, train=True):
    '''
    print the accuracy score, classification report and confusion matrix of classifier
    '''
    if train:
        '''
        training performance
        '''
        print("Train Result:\n")
        print("accuracy score: {0:.4f}\n".format(accuracy_score(y_train, clf.predict(X_train))))
        print("Classification Report: \n {}\n".format(classification_report(y_train, clf.predict(X_train))))
        print("Confusion Matrix: \n {}\n".format(confusion_matrix(y_train, clf.predict(X_train))))

        res = cross_val_score(clf, X_train, y_train, cv=10, scoring='accuracy')
        print("Average Accuracy: \t {0:.4f}".format(np.mean(res)))
        print("Accuracy SD: \t\t {0:.4f}".format(np.std(res)))
        
    elif train==False:
        '''
        test performance
        '''
        print("Test Result:\n")        
        print("accuracy score: {0:.4f}\n".format(accuracy_score(y_test, clf.predict(X_test))))
        print("Classification Report: \n {}\n".format(classification_report(y_test, clf.predict(X_test))))
        print("Confusion Matrix: \n {}\n".format(confusion_matrix(y_test, clf.predict(X_test))))    

In [45]:
rf_clf = RandomForestClassifier(random_state=42)

In [46]:
rf_clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=42, verbose=0, warm_start=False)

### Model Performance Evaluation:

In [47]:
print_score(rf_clf, X_train, y_train, X_test, y_test, train=True)

Train Result:

accuracy score: 0.8894

Classification Report: 
              precision    recall  f1-score   support

          0       0.88      0.94      0.91       386
          1       0.90      0.81      0.85       256

avg / total       0.89      0.89      0.89       642


Confusion Matrix: 
 [[364  22]
 [ 49 207]]

Average Accuracy: 	 0.7944
Accuracy SD: 		 0.0272


In [48]:
print_score(rf_clf, X_train, y_train, X_test, y_test, train=False)

Test Result:

accuracy score: 0.8472

Classification Report: 
              precision    recall  f1-score   support

          0       0.81      0.92      0.86        38
          1       0.90      0.76      0.83        34

avg / total       0.85      0.85      0.85        72


Confusion Matrix: 
 [[35  3]
 [ 8 26]]



### Pipeline & Grid Search CV

In [49]:
from sklearn.pipeline import Pipeline

In [50]:
from sklearn.model_selection import GridSearchCV

In [51]:
rf_clf = RandomForestClassifier(random_state=42)

In [52]:
params_grid = {'max_depth': [3, None],
              'min_samples_split': [2, 3, 10],
              'min_samples_leaf': [1, 3, 10],
              'bootstrap': [True, False],
              'criterion': ['gini', 'entropy']}

In [53]:
grid_search = GridSearchCV(rf_clf, params_grid,
                          n_jobs=-1, cv=5,
                          verbose=1, scoring='accuracy')

In [54]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  55 tasks      | elapsed:    5.8s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    8.7s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=42, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'max_depth': [3, None], 'min_samples_split': [2, 3, 10], 'min_samples_leaf': [1, 3, 10], 'bootstrap': [True, False], 'criterion': ['gini', 'entropy']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=1)

### Model Performance Evaluation:

In [55]:
grid_search.best_score_

0.8130841121495327

In [56]:
grid_search.best_estimator_.get_params()

{'bootstrap': True,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 10,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 10,
 'n_jobs': 1,
 'oob_score': False,
 'random_state': 42,
 'verbose': 0,
 'warm_start': False}

In [57]:
print_score(grid_search, X_train, y_train, X_test, y_test, train = True)

Train Result:

accuracy score: 0.8240

Classification Report: 
              precision    recall  f1-score   support

          0       0.83      0.90      0.86       386
          1       0.82      0.71      0.76       256

avg / total       0.82      0.82      0.82       642


Confusion Matrix: 
 [[346  40]
 [ 73 183]]

Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.6s finished


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed:    5.0s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.8s finished


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.5s finished


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.5s finished


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.7s finished


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.5s finished


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.7s finished


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.5s finished


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  55 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.6s finished


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed:    5.0s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.8s finished


Average Accuracy: 	 0.7989
Accuracy SD: 		 0.0222


In [58]:
print_score(grid_search, X_train, y_train, X_test, y_test, train = False)

Test Result:

accuracy score: 0.7917

Classification Report: 
              precision    recall  f1-score   support

          0       0.78      0.84      0.81        38
          1       0.81      0.74      0.77        34

avg / total       0.79      0.79      0.79        72


Confusion Matrix: 
 [[32  6]
 [ 9 25]]



### Extra Tree Classifier

In [59]:
from sklearn.ensemble import ExtraTreesClassifier

In [60]:
xt_clf = ExtraTreesClassifier(random_state=42)

In [61]:
xt_clf.fit(X_train, y_train)

ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=42, verbose=0, warm_start=False)

### Model Performance Evaluation:

In [62]:
print_score(xt_clf, X_train, y_train, X_test, y_test, train = True)

Train Result:

accuracy score: 0.8956

Classification Report: 
              precision    recall  f1-score   support

          0       0.88      0.96      0.92       386
          1       0.94      0.79      0.86       256

avg / total       0.90      0.90      0.89       642


Confusion Matrix: 
 [[372  14]
 [ 53 203]]

Average Accuracy: 	 0.7977
Accuracy SD: 		 0.0267


### Ada Boost Classifier

In [63]:
from sklearn.ensemble import AdaBoostClassifier

In [64]:
ada_clf = AdaBoostClassifier()

In [65]:
ada_clf.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None)

### Model Performance Evaluation:

In [66]:
print_score(ada_clf, X_train, y_train, X_test, y_test, train = True)

Train Result:

accuracy score: 0.8178

Classification Report: 
              precision    recall  f1-score   support

          0       0.85      0.85      0.85       386
          1       0.77      0.77      0.77       256

avg / total       0.82      0.82      0.82       642


Confusion Matrix: 
 [[327  59]
 [ 58 198]]

Average Accuracy: 	 0.8005
Accuracy SD: 		 0.0355


In [67]:
print_score(ada_clf, X_train, y_train, X_test, y_test, train = False)

Test Result:

accuracy score: 0.8611

Classification Report: 
              precision    recall  f1-score   support

          0       0.87      0.87      0.87        38
          1       0.85      0.85      0.85        34

avg / total       0.86      0.86      0.86        72


Confusion Matrix: 
 [[33  5]
 [ 5 29]]



### Ensemble Method - Ada Boost with Random Forest Classifier

In [68]:
from sklearn.ensemble import RandomForestClassifier

In [69]:
ada_clf = AdaBoostClassifier(RandomForestClassifier())

In [70]:
ada_clf.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
          learning_rate=1.0, n_estimators=50, random_state=None)

### Model Performance Evaluation:

In [71]:
print_score(ada_clf, X_train, y_train, X_test, y_test, train = True)

Train Result:

accuracy score: 0.8956

Classification Report: 
              precision    recall  f1-score   support

          0       0.90      0.94      0.92       386
          1       0.90      0.84      0.86       256

avg / total       0.90      0.90      0.89       642


Confusion Matrix: 
 [[361  25]
 [ 42 214]]

Average Accuracy: 	 0.7882
Accuracy SD: 		 0.0293


In [72]:
print_score(ada_clf, X_train, y_train, X_test, y_test, train = False)

Test Result:

accuracy score: 0.8056

Classification Report: 
              precision    recall  f1-score   support

          0       0.79      0.87      0.82        38
          1       0.83      0.74      0.78        34

avg / total       0.81      0.81      0.80        72


Confusion Matrix: 
 [[33  5]
 [ 9 25]]



In [73]:
ada_clf = AdaBoostClassifier(base_estimator = RandomForestClassifier())

In [74]:
ada_clf.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
          learning_rate=1.0, n_estimators=50, random_state=None)

In [75]:
print_score(ada_clf, X_train, y_train, X_test, y_test, train = True)

Train Result:

accuracy score: 0.8956

Classification Report: 
              precision    recall  f1-score   support

          0       0.90      0.94      0.92       386
          1       0.90      0.84      0.86       256

avg / total       0.90      0.90      0.89       642


Confusion Matrix: 
 [[361  25]
 [ 42 214]]

Average Accuracy: 	 0.7822
Accuracy SD: 		 0.0329


In [76]:
print_score(ada_clf, X_train, y_train, X_test, y_test, train = False)

Test Result:

accuracy score: 0.8333

Classification Report: 
              precision    recall  f1-score   support

          0       0.80      0.92      0.85        38
          1       0.89      0.74      0.81        34

avg / total       0.84      0.83      0.83        72


Confusion Matrix: 
 [[35  3]
 [ 9 25]]



### Ggradient Boosting Classifier

In [77]:
from sklearn.ensemble import GradientBoostingClassifier

In [78]:
gbc_clf = GradientBoostingClassifier()
gbc_clf.fit(X_train, y_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False)

### Model Performance Evaluation:

In [79]:
print_score(gbc_clf, X_train, y_train, X_test, y_test, train = True)

Train Result:

accuracy score: 0.8692

Classification Report: 
              precision    recall  f1-score   support

          0       0.86      0.93      0.90       386
          1       0.88      0.77      0.82       256

avg / total       0.87      0.87      0.87       642


Confusion Matrix: 
 [[360  26]
 [ 58 198]]

Average Accuracy: 	 0.8007
Accuracy SD: 		 0.0210


In [80]:
print_score(gbc_clf, X_train, y_train, X_test, y_test, train = False)

Test Result:

accuracy score: 0.8333

Classification Report: 
              precision    recall  f1-score   support

          0       0.80      0.92      0.85        38
          1       0.89      0.74      0.81        34

avg / total       0.84      0.83      0.83        72


Confusion Matrix: 
 [[35  3]
 [ 9 25]]



### XGB Boost Classifier

In [82]:
import xgboost as xgb

In [83]:
xgb_clf = xgb.XGBClassifier(max_depth = 5, n_estimators = 10000, learning_rate=0.3,
                           n_jobs=-1)

In [84]:
xgb_clf.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.3, max_delta_step=0,
       max_depth=5, min_child_weight=1, missing=None, n_estimators=10000,
       n_jobs=-1, nthread=None, objective='binary:logistic',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=True, subsample=1)

### Model Performance Evaluation:

In [85]:
print_score(xgb_clf, X_train, y_train, X_test, y_test, train=True)

Train Result:



  if diff:


accuracy score: 0.8941



  if diff:


Classification Report: 
              precision    recall  f1-score   support

          0       0.88      0.95      0.92       386
          1       0.92      0.81      0.86       256

avg / total       0.90      0.89      0.89       642




  if diff:


Confusion Matrix: 
 [[367  19]
 [ 49 207]]



  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


Average Accuracy: 	 0.7900
Accuracy SD: 		 0.0311


  if diff:


In [86]:
print_score(xgb_clf, X_train, y_train, X_test, y_test, train = False)

Test Result:

accuracy score: 0.8472

Classification Report: 
              precision    recall  f1-score   support

          0       0.81      0.92      0.86        38
          1       0.90      0.76      0.83        34

avg / total       0.85      0.85      0.85        72


Confusion Matrix: 
 [[35  3]
 [ 8 26]]



  if diff:
  if diff:
  if diff:
