In [13]:
from pydataset import data
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

#
# Getting the data

In [14]:
# Get the data
titanic = data('titanic')

# pick a random sample
titanic.sample(5)

Unnamed: 0,class,age,sex,survived
1221,3rd class,adults,women,no
93,1st class,adults,man,no
317,1st class,adults,women,no
13,1st class,adults,man,yes
1064,3rd class,adults,man,no


#
# Featuring engineering

In [15]:
titanic['survived'].value_counts()

survived
no     817
yes    499
Name: count, dtype: int64

In [3]:
# One Hot Enconding
titanic = pd.get_dummies(titanic, drop_first=True).astype(int)

display(titanic.sample(5))

Unnamed: 0,class_2nd class,class_3rd class,age_child,sex_women,survived_yes
336,1,0,0,0,1
156,0,0,0,0,0
1224,0,1,0,1,0
696,0,1,0,0,0
506,1,0,0,1,1


#
# Split the data

In [4]:
X = np.array(titanic.drop('survived_yes', axis=1))
y = np.array(titanic['survived_yes'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#
# Train the model

In [5]:
# train
model = LogisticRegression()
model.fit(X_train, y_train)

#
# Predictions

In [6]:
# Predictions
print('classe-1 child girl:  ', model.predict(np.array([[0, 0, 1, 1]]))[0])
print('class-3 adult-age male:  ', model.predict(np.array([[0, 0, 1, 1]]))[0])

classe-1 child girl:   1
class-3 adult-age male:   1


#
# Scoring

In [7]:
# scoring the model
model.score(X_test, y_test)

0.7954545454545454

In [8]:
predictions = (model.predict(X_test) > .5).astype(int)
np.sum(predictions == y_test) / len(y_test)

0.7954545454545454

#
# Comparing different optimizers (solvers)

In [10]:
# different types of optimizers
model_lbfgs = LogisticRegression(solver='lbfgs')
model_newton_cg = LogisticRegression(solver='newton-cg')
model_liblinear = LogisticRegression(solver='liblinear')
model_sag = LogisticRegression(solver='sag')
model_saga = LogisticRegression(solver='saga')

# training the models
model_lbfgs.fit(X, y)
model_newton_cg.fit(X, y)
model_liblinear.fit(X, y)
model_sag.fit(X, y)
model_saga.fit(X, y)

# predictions
lbfgs = model_lbfgs.score(X_test, y_test)
newton_cg = model_newton_cg.score(X_test, y_test)
liblinear = model_liblinear.score(X_test, y_test)
sag = model_sag.score(X_test, y_test)
saga = model_saga.score(X_test, y_test)

print('lbfgs: ', lbfgs)
print('newton_cg: ', newton_cg)
print('newton_cg: ', liblinear)
print('newton_cg: ', sag)
print('newton_cg: ', saga)

lbfgs:  0.7954545454545454
newton_cg:  0.7954545454545454
newton_cg:  0.7954545454545454
newton_cg:  0.7954545454545454
newton_cg:  0.7954545454545454
