# Machine Learning Decision Trees (Titanic Deaths)

In [109]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import joblib
from sklearn import tree
from sklearn.preprocessing import LabelEncoder

In [118]:
titanic_file = pd.read_csv("titanic.csv")
titanic_file = titanic_file[titanic_file[['Sex','Fare',"Pclass", "SibSp","Age"]].notnull().all(1)]
#titanic_file
TRIALS = 10

In [137]:
X = titanic_file.drop(columns=["Survived","Name","Ticket","PassengerId","Embarked","Cabin",
                               "Parch"])
y = titanic_file["Survived"]
le_sex = LabelEncoder()
X["Sex_n"] = le_sex.fit_transform(X["Sex"])
X = X.drop(columns=["Sex"])
X_training, X_testing, y_training, y_testing = train_test_split(X,y,test_size=0.25)
#Base
model = DecisionTreeClassifier()
model.fit(X_training,y_training)
prediction = model.predict(X_testing)
score = accuracy_score(y_testing,prediction)


In [138]:
model = DecisionTreeClassifier()
model.fit(X,y)
joblib.dump(model, "Titanic_death_prediction_base.joblib")

['Titanic_death_prediction_base.joblib']

In [139]:
model = joblib.load("Titanic_death_prediction_base.joblib")
#prediction = model.predict()
#prediction

In [140]:
tree.export_graphviz(model, out_file="Titanic_death_tree_base.dot", 
                     feature_names=["Age","Sex_n","Fare","SibSp","Pclass"])

In [142]:
#Improved model using cross validation
model = DecisionTreeClassifier()
parameters = {"max_depth":range(3,20)}
clf = GridSearchCV(model, parameters, n_jobs=4)
clf.fit(X=X_training, y=y_training)
cross_validated_model = clf.best_estimator_
prediction = cross_validated_model.predict(X_testing)
score = accuracy_score(y_testing, prediction)

joblib.dump(cross_validated_model,"Cross_validated_decision_tree_titanic_deaths.joblib")
tree.export_graphviz(cross_validated_model, out_file="Titanic_death_tree_cross_validated.dot", 
                     feature_names=["Age","Sex_n","Fare","SibSp","Pclass"])