In [6]:
# Decision Tree, Random Forest, Bagging and Boosting

In [7]:
# import common libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [8]:
# Load and prepare dataset
titan = pd.read_csv("titanic.csv")
titan = titan.dropna()

titan = titan.drop(["Name"], axis = 1)

# preparing the dependent and independent variables
# Dummy coding using pd.get_dummies() and checking dtypes of coded variables

titan = pd.get_dummies(titan, drop_first = True, dtype = int)

# make y (dependent variable) as np.array
y = np.array(titan["Survival_Survived"])

# make x a dataframe subset with independent variables
X = titan[["Age", "Gender_Male", "Class_2nd", "Class_3rd", "Fare"]]

# Split dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [9]:
# Decision tree with gini index
from sklearn.tree import DecisionTreeClassifier

# Create a Decision Tree Classifier using Gini index
clf_gini = DecisionTreeClassifier(criterion='gini', random_state=42)

# Train the model
clf_gini.fit(X_train, y_train)

# Predict the responses for test dataset
y_pred_dt = clf_gini.predict(X_test)


In [10]:
# Calculate accuracy
accuracy = metrics.accuracy_score(y_test, y_pred_dt)

# Calculate confusion matrix
conf_matrix = metrics.confusion_matrix(y_test, y_pred_dt)

# Print accuracy, and confusion matrix
print("\nAccuracy:", accuracy)
print("\nConfusion Matrix:\n", conf_matrix)


Accuracy: 0.7197452229299363

Confusion Matrix:
 [[142  33]
 [ 55  84]]


In [11]:
# Bagging

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

# Create a Bagging Classifier with a Decision Tree
bagging_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=100, 
                                max_samples=1.0, random_state=42)

# Train the model
bagging_clf.fit(X_train, y_train)

# Model predictions
y_pred_bag = bagging_clf.predict(X_test)

In [12]:
# Calculate accuracy and confusion matrix of Bagging
accuracy = metrics.accuracy_score(y_test, y_pred_bag)

# Calculate confusion matrix
conf_matrix = metrics.confusion_matrix(y_test, y_pred_bag)

# Print accuracy, and confusion matrix
print("\nAccuracy:", accuracy)
print("\nConfusion Matrix:\n", conf_matrix)


Accuracy: 0.7770700636942676

Confusion Matrix:
 [[150  25]
 [ 45  94]]


In [13]:
# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=1000, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Model predictions
y_pred_rf = clf.predict(X_test)

In [14]:
# Calculate accuracy of Random Forest
accuracy = metrics.accuracy_score(y_test, y_pred_rf)

# Calculate confusion matrix
conf_matrix = metrics.confusion_matrix(y_test, y_pred_rf)

# Print accuracy, and confusion matrix
print("\nAccuracy:", accuracy)
print("\nConfusion Matrix:\n", conf_matrix)


Accuracy: 0.7802547770700637

Confusion Matrix:
 [[149  26]
 [ 43  96]]


In [15]:
# AdaBoost Classifier

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

# Create an AdaBoosting Classifier
ab_clf = AdaBoostClassifier(DecisionTreeClassifier(), n_estimators=100, learning_rate=1.0, 
                            random_state=42)

# Train the model
ab_clf.fit(X_train, y_train)

# Model predictions
y_pred_ab = ab_clf.predict(X_test)

In [16]:
# Calculate accuracy of Gradient Boost
accuracy = metrics.accuracy_score(y_test, y_pred_ab)

# Calculate confusion matrix
conf_matrix = metrics.confusion_matrix(y_test, y_pred_ab)

# Print accuracy, and confusion matrix
print("\nAccuracy:", accuracy)
print("\nConfusion Matrix:\n", conf_matrix)


Accuracy: 0.7356687898089171

Confusion Matrix:
 [[142  33]
 [ 50  89]]


In [17]:
# Gradient Boost Classifier

from sklearn.ensemble import GradientBoostingClassifier

# Create a Gradient Boosting Classifier
gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, 
                                    random_state=42)

# Train the model
gb_clf.fit(X_train, y_train)

# Model predictions
y_pred_gb = gb_clf.predict(X_test)

In [18]:
# Calculate accuracy of Gradient Boost
accuracy = metrics.accuracy_score(y_test, y_pred_gb)

# Calculate confusion matrix
conf_matrix = metrics.confusion_matrix(y_test, y_pred_gb)

# Print accuracy, and confusion matrix
print("\nAccuracy:", accuracy)
print("\nConfusion Matrix:\n", conf_matrix)


Accuracy: 0.7515923566878981

Confusion Matrix:
 [[150  25]
 [ 53  86]]
