# Titanic

## 0 - Collect

The Titanic dataset can be download in the [kaggle website](https://www.kaggle.com/c/titanic/data)

## 1 - Prepare

In [33]:
import pandas as pd

train = pd.read_csv('./titanic_train.csv')
test = pd.read_csv('./titanic_test.csv')

## 2 - Visualize

In [34]:
import pandas_profiling

train.profile_report().to_file('./titanic_report.html')

## 3 - Selection

In [12]:
train = train.dropna(subset=['Age'])
test = test.dropna(subset=['Age'])

train = pd.get_dummies(train, columns=['Sex'])
test = pd.get_dummies(test, columns=['Sex'])

feature_cols = ['Pclass', 'Parch', 'Age', 'Sex_female', 'Sex_male']

X = train[feature_cols]
y = train.Survived

## 4 - Training

In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [20]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(solver='lbfgs')
logreg.fit(X_train,y_train)
logreg_y_pred_target = logreg.predict(X_test)

In [24]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=100)
rfc.fit(X_train,y_train)
rfc_y_pred_target = rfc.predict(X_test)

## 5 - Evaluation

In [39]:
from sklearn import metrics

def evaluate(y_test, y_pred_target):

    return dict(accuracy='{:.2f}'.format(metrics.accuracy_score(y_test, y_pred_target)),
                precision='{:.2f}'.format(metrics.precision_score(y_test, y_pred_target)),
                recall='{:.2f}'.format(metrics.recall_score(y_test, y_pred_target)))

print('logreg', evaluate(y_test, logreg_y_pred_target))
print('rfc', evaluate(y_test, rfc_y_pred_target))

logreg {'accuracy': '0.82', 'precision': '0.79', 'recall': '0.78'}
rfc {'accuracy': '0.75', 'precision': '0.71', 'recall': '0.68'}


## 6 - Prediction

## 7 - Export

In [38]:
import joblib

features = list(X.columns)
model = logreg

joblib.dump(features, '../flask-sklearn/features.pkl')
joblib.dump(model,'../flask-sklearn/logreg_model.pkl', compress=9)

['../flask-sklearn/logreg_model.pkl']