In [None]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.naive_bayes import GaussianNB 
from sklearn.neural_network import MLPClassifier

In [None]:
train = pd.read_csv('../input/titanic/train.csv')
train.count()

In [None]:
test = pd.read_csv('../input/titanic/test.csv')
test.describe()

In [None]:
test_y = pd.read_csv('../input/titanic/gender_submission.csv')
test_y = test_y.loc[test_y['PassengerId'] > 891]
test_columns = ['Survived'] 
test_y_new = test_y.filter(items=test_columns)
test_y_new.count()

In [None]:
train['Age'] = train['Age'].fillna(train['Age'].mean())
test['Age'] = test['Age'].fillna(test['Age'].mean())
test['Fare'] = test['Fare'].fillna(0)
test.count()

In [None]:
train_columns = ['PassengerId','Sex','Pclass', 'Age', 'Fare'] 
train_x_new = train.filter(items=train_columns)
train_y_new = train['Survived']

test_columns = ['PassengerId','Sex','Pclass',  'Age', 'Fare'] 
test_x_new = test.filter(items=test_columns)

test_x_new.count()

In [None]:
train_dummies = pd.get_dummies(train_x_new,drop_first=True)
test_dummies = pd.get_dummies(test_x_new,drop_first=True)
test_dummies.head(3)

In [None]:
train_dummies['male_fare'] = 0
train_dummies.loc[(train_dummies['Fare'] < 20) & (train_dummies['Sex_male'] == 1), 'male_fare'] = 1
test_dummies['male_fare'] = 0
test_dummies.loc[(test_dummies['Fare'] < 20) & (test_dummies['Sex_male'] == 1), 'male_fare'] = 1
test_dummies.count()

In [None]:
train_dummies['male_age'] = 0
train_dummies.loc[(train_dummies['Age'] > 6) & (train_dummies['Sex_male'] == 1), 'male_age'] = 1
test_dummies['male_age'] = 0
test_dummies.loc[(test_dummies['Age'] > 6) & (test_dummies['Sex_male'] == 1), 'male_age'] = 1
test_dummies.count()
train_dummies['male_age'].value_counts()

In [None]:
# train_dummies['male_Pclass'] = 0
# train_dummies.loc[(train_dummies['Pclass'] <= 2.5) & (train_dummies['male_age'] == 0), 'male_Pclass'] = 1
# test_dummies['male_Pclass'] = 0
# test_dummies.loc[(test_dummies['Age'] > 6) & (test_dummies['male_age'] == 0), 'male_Pclass'] = 1
# test_dummies.count()

## Split Model

## Decision Tree **

In [None]:
clf_dec = DecisionTreeClassifier(max_depth=2, min_samples_leaf=20, min_samples_split=0.2)
# clf_dec = DecisionTreeClassifier()
clf_dec.fit(train_dummies, train_y_new)
print(clf_dec.score(test_dummies, test_y_new))
print(f1_score(test_y_new, clf_dec.predict(test_dummies)))

In [None]:
pd.Series(
    clf_dec.feature_importances_,
    clf_dec.feature_names_in_
).sort_values().to_frame("Importance")

In [None]:
# plt.figure(figsize=(40,40))
# plot_tree(clf_dec, feature_names=train_dummies.columns, proportion=True, filled=True)
# plt.show()

## Linear Regression

In [None]:
clf_ln = LogisticRegression()
clf_ln.fit(train_dummies, train_y_new)
print(clf_ln.score(test_dummies, test_y_new))
print(f1_score(test_y_new, clf_ln.predict(test_dummies)))

## Naive Bayes

In [None]:
clf_gnb = GaussianNB()
clf_gnb.fit(train_dummies, train_y_new)
print(clf_gnb.score(test_dummies, test_y_new)) 
print(f1_score(test_y_new, clf_gnb.predict(test_dummies)))

## Redes Neurais

In [None]:
clf_neural = MLPClassifier(max_iter=100, verbose=True, tol=0.0000100)
clf_neural.fit(train_dummies, train_y_new)


In [None]:
print(clf_neural.score(test_dummies, test_y_new)) 
print(f1_score(test_y_new, clf_neural.predict(test_dummies)))

## Final Transformations

In [None]:
df_test = test_dummies.drop(columns=['Age','Pclass','Fare','male_age','Sex_male','male_fare'])
df_test["Survived"] = clf_neural.predict(test_dummies)
df_test.head()

In [None]:
df_test.to_csv("./submission.csv", index = False)

In [None]:
# testando = pd.read_csv('./submission.csv')
# testando.head()