In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [4]:
titanic_df = pd.read_csv('../datasets/titanic/titanic_processed.csv')

titanic_df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked_C,Embarked_Q,Embarked_S
0,0,3,1,35.0,0,0,7.125,0,0,1
1,1,3,1,29.0,0,0,9.5,0,0,1
2,0,3,0,39.0,1,5,31.275,0,0,1
3,1,2,0,32.0,0,0,13.0,0,0,1
4,1,2,0,27.0,0,0,10.5,0,0,1


In [16]:
X = titanic_df.drop('Survived', axis=1)

Y = titanic_df['Survived']

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [22]:
def summarize_classification(y_test, y_pred):
    
    acc = accuracy_score(y_test, y_pred, normalize=True)
    num_acc = accuracy_score(y_test, y_pred, normalize=False)
    
    prec = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    
    print("accuracy_count : ", num_acc)
    print("accuracy_score : ", acc)
    print("precision_score : ", prec)
    print("recall_score : ", recall)
    print()

In [23]:
from sklearn.model_selection import GridSearchCV

parameters = {'max_depth' : [2, 4, 5, 7, 9, 10]}

grid_search = GridSearchCV(DecisionTreeClassifier(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'max_depth': 5}

In [24]:
decision_tree_model = DecisionTreeClassifier(\
        max_depth = grid_search.best_params_['max_depth']).fit(x_train, y_train)

In [25]:
y_pred = decision_tree_model.predict(x_test)

In [26]:
summarize_classification(y_test, y_pred)

accuracy_count :  107
accuracy_score :  0.7482517482517482
precision_score :  0.6617647058823529
recall_score :  0.7758620689655172



In [27]:
parameters = {
    'penalty' : ['l1', 'l2'],
    'C' : [0.1, 0.4, 0.8, 1, 2, 5]
}

grid_search = GridSearchCV(LogisticRegression(solver='liblinear'), parameters, cv=3, return_train_score=True)

grid_search.fit(x_train, y_train)

grid_search.best_params_

{'C': 5, 'penalty': 'l1'}

In [28]:
logistic_model = LogisticRegression(solver='liblinear',\
    penalty=grid_search.best_params_['penalty'],\
    C= grid_search.best_params_['C']).\
    fit(x_train,y_train)

In [29]:
y_pred = logistic_model.predict(x_test)

In [30]:
summarize_classification(y_test, y_pred)

accuracy_count :  114
accuracy_score :  0.7972027972027972
precision_score :  0.7457627118644068
recall_score :  0.7586206896551724

