## Load Dataset

In [64]:
import pandas as pd
train = pd.read_csv('datasets/titanic/train.csv')
test = pd.read_csv('datasets/titanic/test.csv')

train.dtypes



PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [65]:
y = train['Survived']
y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

### Remove as respostas (y) do dataset

In [66]:
X = train.drop('Survived',axis=1)
X.columns

Index(['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch',
       'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

## Pré-processamento
### Atributos desejados

In [67]:
from sklearn.base import BaseEstimator, TransformerMixin
class AtributosDesejados(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.colunasIndesejadas = ['PassengerId', 'Name', 'Ticket', 'Cabin']
        return self
    def transform(self, X, y=None):
        return X.drop(self.colunasIndesejadas,axis=1)

atributosDesejados = AtributosDesejados()
Xdrop = atributosDesejados.fit_transform(X)
Xdrop.columns

Index(['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'], dtype='object')

### Atributos Numéricos a partir dos atributos desejados (Xdrop)

In [68]:
from sklearn.base import BaseEstimator, TransformerMixin
class AtributosNumericos(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.colunasNumericas = X.select_dtypes(include='number').columns
        return self
    def transform(self, X, y=None):
        return X[self.colunasNumericas]

atributosNumericos = AtributosNumericos()
Xnum = atributosNumericos.fit_transform(Xdrop)
Xnum.columns

Index(['Pclass', 'Age', 'SibSp', 'Parch', 'Fare'], dtype='object')

In [69]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

pipenum = Pipeline([
    ('atributos_numericos', AtributosNumericos()),
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

XnumLimpo = pipenum.fit_transform(Xnum)
XnumLimpo

array([[ 0.82737724, -0.56573646,  0.43279337, -0.47367361, -0.50244517],
       [-1.56610693,  0.66386103,  0.43279337, -0.47367361,  0.78684529],
       [ 0.82737724, -0.25833709, -0.4745452 , -0.47367361, -0.48885426],
       ...,
       [ 0.82737724, -0.1046374 ,  0.43279337,  2.00893337, -0.17626324],
       [-1.56610693, -0.25833709, -0.4745452 , -0.47367361, -0.04438104],
       [ 0.82737724,  0.20276197, -0.4745452 , -0.47367361, -0.49237783]])

### Atributos Categóricos a partir dos atributos desejados

In [70]:
from sklearn.base import BaseEstimator, TransformerMixin
class AtributosCategoricos(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.colunasCategoricas = X.select_dtypes(include='object').columns
        return self
    def transform(self, X, y=None):
        return X[self.colunasCategoricas]

atributosCategoricos = AtributosCategoricos()
Xcat = atributosCategoricos.fit_transform(Xdrop)
Xcat.columns


Index(['Sex', 'Embarked'], dtype='object')

In [71]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

pipecat = Pipeline([
    ('atributos_categoricos', AtributosCategoricos()),
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder())
])

XcatLimpo = pipecat.fit_transform(Xdrop)
XcatLimpo



<891x5 sparse matrix of type '<class 'numpy.float64'>'
	with 1782 stored elements in Compressed Sparse Row format>

### X Tratado
#### Pipeline para tratar os atributos numéricos e categóricos e unir as características

In [72]:
from sklearn.pipeline import FeatureUnion
unecaracteristicas = FeatureUnion([
    ('pipenum', pipenum),
    ('pipecat', pipecat)
])
Xtratado = unecaracteristicas.fit_transform(Xdrop)
Xtratado


<891x10 sparse matrix of type '<class 'numpy.float64'>'
	with 6237 stored elements in Compressed Sparse Row format>

#### Pipeline para tratar todo o X

In [73]:
from sklearn.pipeline import Pipeline

preproc = Pipeline([
    ('atributos_desejados', AtributosDesejados()),
    ('unecaracteristicas', unecaracteristicas)
])
Xtratado = preproc.fit_transform(X)
Xtratado

<891x10 sparse matrix of type '<class 'numpy.float64'>'
	with 6237 stored elements in Compressed Sparse Row format>

## Classificador

### DecisionTreeClassifier

In [74]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

pipetotal = Pipeline([
    ('preproc', preproc),
    ('arvore', DecisionTreeClassifier())
])

pipetotal.fit(X,y)
ypred = pipetotal.predict(X)
accuracy_score(y, ypred)

0.9797979797979798

In [75]:
pipetotal.fit(X,y)
ypred = pipetotal.predict(test)

In [76]:
result = test[['PassengerId']]
result['Survived'] = ypred
result.to_csv('titanic_competition_video.csv',index=False)

### DecisionTreeClassifier Com GridSearch

In [81]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier

parametros_DTC = {
    'arvore__max_depth': [None] + list(range(1,20)),
    'arvore__criterion': ['gini', 'entropy'],
    'arvore__max_features': ['auto', 'sqrt', 'log2']
}

classificador_DTC = Pipeline([
    ('preproc', preproc),
    ('arvore', DecisionTreeClassifier())
])

classificador_DTC_Grid = GridSearchCV(classificador_DTC, param_grid=parametros_DTC)

classificador_DTC_Grid.fit(X,y)
ypred = classificador_DTC_Grid.predict(test)

np.sum(ypred==1)


128

In [82]:
result = test[['PassengerId']]
result['Survived'] = ypred
result.to_csv('titanic_competition_DTC_Grid.csv',index=False)

### LogisticRegression

In [16]:
import numpy as np
from sklearn.linear_model import LogisticRegression

parametros_LRC = {'clf__C': [0.01,0.1,1,10,100]}

classificador_LRC = Pipeline([
    ('preproc', preproc),
    ('clf', LogisticRegression())
])

classificador_LRC.fit(X,y)
ypred = classificador_LRC.predict(test)

np.sum(ypred==1)


156

In [17]:
result = test[['PassengerId']]
result['Survived'] = ypred
result.to_csv('titanic_competition_LRC.csv',index=False)

### LogisticRegression Com GridSearch

In [18]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

parametros_LRC = {'clf__C': [0.01,0.1,1,10,100]}

classificador_LRC = Pipeline([
    ('preproc', preproc),
    ('clf', LogisticRegression())
])

classificador_LRC_Grid = GridSearchCV(classificador_LRC, param_grid=parametros_LRC)

classificador_LRC_Grid.fit(X,y)
ypred = classificador_LRC_Grid.predict(test)

np.sum(ypred==1)


151

In [19]:
result = test[['PassengerId']]
result['Survived'] = ypred
result.to_csv('titanic_competition_LRC_Grid.csv',index=False)

### MLPClassifier Com GridSearch

In [20]:
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline

parametros_MLP = {
    'clf__activation': ['identity', 'logistic', 'tanh', 'relu'],
    'clf__solver': ['lbfgs', 'sgd', 'adam'],
    'clf__learning_rate': ['constant', 'invscaling', 'adaptive'],
}

classificador_MLP = Pipeline([
    ('preproc', preproc),
    ('clf', MLPClassifier())
])


classificador_MLP_Grid = GridSearchCV(classificador_MLP, param_grid=parametros_MLP)
classificador_MLP_Grid.fit(X,y)
ypred = classificador_MLP_Grid.predict(test)

np.sum(ypred==1)


133

In [21]:
result = test[['PassengerId']]
result['Survived'] = ypred
result.to_csv('titanic_competition_MLP_Grid.csv',index=False)

## 2. Definindo classes numéricas para os categóricos

In [22]:
import pandas as pd
import numpy as np

train = pd.read_csv('datasets/titanic/train.csv')
test = pd.read_csv('datasets/titanic/test.csv')

train.columns


Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [23]:
test.columns

Index(['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch',
       'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [24]:
train

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [25]:
test

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


In [26]:
y = train['Survived']
y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [27]:
train = train.drop(['PassengerId', 'Survived', 'Name'], axis=1)
train

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,male,22.0,1,0,A/5 21171,7.2500,,S
1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,1,female,35.0,1,0,113803,53.1000,C123,S
4,3,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...
886,2,male,27.0,0,0,211536,13.0000,,S
887,1,female,19.0,0,0,112053,30.0000,B42,S
888,3,female,,1,2,W./C. 6607,23.4500,,S
889,1,male,26.0,0,0,111369,30.0000,C148,C


In [28]:
test = test.drop(['PassengerId', 'Name'], axis=1)
test

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,male,34.5,0,0,330911,7.8292,,Q
1,3,female,47.0,1,0,363272,7.0000,,S
2,2,male,62.0,0,0,240276,9.6875,,Q
3,3,male,27.0,0,0,315154,8.6625,,S
4,3,female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...
413,3,male,,0,0,A.5. 3236,8.0500,,S
414,1,female,39.0,0,0,PC 17758,108.9000,C105,C
415,3,male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,3,male,,0,0,359309,8.0500,,S


In [29]:
train['Sex'] = train['Sex'].map({'male':1, 'female':0})
test['Sex'] = test['Sex'].map({'male':1, 'female':0})

train

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,1,22.0,1,0,A/5 21171,7.2500,,S
1,1,0,38.0,1,0,PC 17599,71.2833,C85,C
2,3,0,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,1,0,35.0,1,0,113803,53.1000,C123,S
4,3,1,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...
886,2,1,27.0,0,0,211536,13.0000,,S
887,1,0,19.0,0,0,112053,30.0000,B42,S
888,3,0,,1,2,W./C. 6607,23.4500,,S
889,1,1,26.0,0,0,111369,30.0000,C148,C


In [30]:
test

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,1,34.5,0,0,330911,7.8292,,Q
1,3,0,47.0,1,0,363272,7.0000,,S
2,2,1,62.0,0,0,240276,9.6875,,Q
3,3,1,27.0,0,0,315154,8.6625,,S
4,3,0,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...
413,3,1,,0,0,A.5. 3236,8.0500,,S
414,1,0,39.0,0,0,PC 17758,108.9000,C105,C
415,3,1,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,3,1,,0,0,359309,8.0500,,S


In [31]:
# replacing missing cabins with U (for Uknown)
train["Cabin"].fillna('U', inplace=True)
test["Cabin"].fillna('U', inplace=True)
# mapping each Cabin value with the cabin letter
train['Cabin'] = train['Cabin'].map(lambda c: c[0])
train['Cabin'] = train['Cabin'].map({'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7, 'T': 8, 'U': 9})
test['Cabin'] = test['Cabin'].map(lambda c: c[0])
test['Cabin'] = test['Cabin'].map({'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7, 'T': 8, 'U': 9})

train

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,1,22.0,1,0,A/5 21171,7.2500,9,S
1,1,0,38.0,1,0,PC 17599,71.2833,3,C
2,3,0,26.0,0,0,STON/O2. 3101282,7.9250,9,S
3,1,0,35.0,1,0,113803,53.1000,3,S
4,3,1,35.0,0,0,373450,8.0500,9,S
...,...,...,...,...,...,...,...,...,...
886,2,1,27.0,0,0,211536,13.0000,9,S
887,1,0,19.0,0,0,112053,30.0000,2,S
888,3,0,,1,2,W./C. 6607,23.4500,9,S
889,1,1,26.0,0,0,111369,30.0000,3,C


In [32]:
test

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,1,34.5,0,0,330911,7.8292,9,Q
1,3,0,47.0,1,0,363272,7.0000,9,S
2,2,1,62.0,0,0,240276,9.6875,9,Q
3,3,1,27.0,0,0,315154,8.6625,9,S
4,3,0,22.0,1,1,3101298,12.2875,9,S
...,...,...,...,...,...,...,...,...,...
413,3,1,,0,0,A.5. 3236,8.0500,9,S
414,1,0,39.0,0,0,PC 17758,108.9000,3,C
415,3,1,38.5,0,0,SOTON/O.Q. 3101262,7.2500,9,S
416,3,1,,0,0,359309,8.0500,9,S


In [33]:
set(train['Embarked'])

{'C', 'Q', 'S', nan}

In [34]:
train["Embarked"].fillna('U', inplace=True)
test["Embarked"].fillna('U', inplace=True)

In [35]:
set(train['Embarked'])

{'C', 'Q', 'S', 'U'}

In [36]:
set(test['Embarked'])

{'C', 'Q', 'S'}

In [37]:
train['Embarked'] = train['Embarked'].map({'C': 1, 'Q': 2, 'S': 3, 'U': 4})
test['Embarked'] = test['Embarked'].map({'C': 1, 'Q': 2, 'S': 3, 'U': 4})

In [38]:
set(train['Embarked'])

{1, 2, 3, 4}

In [39]:
set(test['Embarked'])

{1, 2, 3}

In [40]:
train

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,1,22.0,1,0,A/5 21171,7.2500,9,3
1,1,0,38.0,1,0,PC 17599,71.2833,3,1
2,3,0,26.0,0,0,STON/O2. 3101282,7.9250,9,3
3,1,0,35.0,1,0,113803,53.1000,3,3
4,3,1,35.0,0,0,373450,8.0500,9,3
...,...,...,...,...,...,...,...,...,...
886,2,1,27.0,0,0,211536,13.0000,9,3
887,1,0,19.0,0,0,112053,30.0000,2,3
888,3,0,,1,2,W./C. 6607,23.4500,9,3
889,1,1,26.0,0,0,111369,30.0000,3,1


In [41]:
test

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,1,34.5,0,0,330911,7.8292,9,2
1,3,0,47.0,1,0,363272,7.0000,9,3
2,2,1,62.0,0,0,240276,9.6875,9,2
3,3,1,27.0,0,0,315154,8.6625,9,3
4,3,0,22.0,1,1,3101298,12.2875,9,3
...,...,...,...,...,...,...,...,...,...
413,3,1,,0,0,A.5. 3236,8.0500,9,3
414,1,0,39.0,0,0,PC 17758,108.9000,3,1
415,3,1,38.5,0,0,SOTON/O.Q. 3101262,7.2500,9,3
416,3,1,,0,0,359309,8.0500,9,3


In [42]:
train_tickets = set(train['Ticket'])
values = list(range(len(train_tickets)))
map_train_tickets = {}
for i, k in enumerate(train_tickets):
    map_train_tickets[k] = i

train['Ticket'] = train['Ticket'].map(map_train_tickets)

test_tickets = set(test['Ticket'])
values = list(range(len(test_tickets)))
map_test_tickets = {}
for i, k in enumerate(test_tickets):
    map_test_tickets[k] = i

test['Ticket'] = test['Ticket'].map(map_test_tickets)

In [43]:
train

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,1,22.0,1,0,215,7.2500,9,3
1,1,0,38.0,1,0,4,71.2833,3,1
2,3,0,26.0,0,0,326,7.9250,9,3
3,1,0,35.0,1,0,598,53.1000,3,3
4,3,1,35.0,0,0,504,8.0500,9,3
...,...,...,...,...,...,...,...,...,...
886,2,1,27.0,0,0,128,13.0000,9,3
887,1,0,19.0,0,0,166,30.0000,2,3
888,3,0,,1,2,318,23.4500,9,3
889,1,1,26.0,0,0,312,30.0000,3,1


In [44]:
test

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,1,34.5,0,0,179,7.8292,9,2
1,3,0,47.0,1,0,186,7.0000,9,3
2,2,1,62.0,0,0,262,9.6875,9,2
3,3,1,27.0,0,0,67,8.6625,9,3
4,3,0,22.0,1,1,344,12.2875,9,3
...,...,...,...,...,...,...,...,...,...
413,3,1,,0,0,62,8.0500,9,3
414,1,0,39.0,0,0,181,108.9000,3,1
415,3,1,38.5,0,0,175,7.2500,9,3
416,3,1,,0,0,281,8.0500,9,3


In [45]:
train.fillna(0, inplace=True)
test.fillna(0, inplace=True)
train

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,1,22.0,1,0,215,7.2500,9,3
1,1,0,38.0,1,0,4,71.2833,3,1
2,3,0,26.0,0,0,326,7.9250,9,3
3,1,0,35.0,1,0,598,53.1000,3,3
4,3,1,35.0,0,0,504,8.0500,9,3
...,...,...,...,...,...,...,...,...,...
886,2,1,27.0,0,0,128,13.0000,9,3
887,1,0,19.0,0,0,166,30.0000,2,3
888,3,0,0.0,1,2,318,23.4500,9,3
889,1,1,26.0,0,0,312,30.0000,3,1


## Colocando dentro do pipeline

In [58]:
import pandas as pd
import numpy as np

train = pd.read_csv('datasets/titanic/train.csv')
test = pd.read_csv('datasets/titanic/test.csv')

y = train['Survived']
X_train = train.drop(['PassengerId', 'Survived', 'Name'], axis=1)
X_test = test.drop(['PassengerId', 'Name'], axis=1)

### Pré-processamento

In [59]:
from sklearn.base import BaseEstimator, TransformerMixin
class PreProcessamento(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    def transform(self, X, y=None):
        X['Sex'] = X['Sex'].map({'male':1, 'female':0})
        
        # replacing missing cabins with U (for Uknown)
        X["Cabin"].fillna('U', inplace=True)
        # mapping each Cabin value with the cabin letter
        X['Cabin'] = X['Cabin'].map(lambda c: c[0])
        X['Cabin'] = X['Cabin'].map({'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7, 'T': 8, 'U': 9})
        
        X["Embarked"].fillna('U', inplace=True)
        X['Embarked'] = X['Embarked'].map({'C': 1, 'Q': 2, 'S': 3, 'U': 4})

        # Processa Tickets
        X_tickets = set(X['Ticket'])
        values = list(range(len(X_tickets)))
        map_X_tickets = {}
        for i, k in enumerate(X_tickets):
            map_X_tickets[k] = i
        X['Ticket'] = X['Ticket'].map(map_X_tickets)    

        X.fillna(0, inplace=True)

        X.loc[X['Age'] <= 16, 'Age'] = 0
        X.loc[(X['Age'] > 16) & (X['Age'] <= 32), 'Age'] = 1
        X.loc[(X['Age'] > 32) & (X['Age'] <= 48), 'Age'] = 2
        X.loc[(X['Age'] > 48) & (X['Age'] <= 64), 'Age'] = 3
        X.loc[X['Age'] > 64, 'Age'] = 4        

        return X

### MLPClassifier Com GridSearch 2 

In [48]:
from sklearn.pipeline import Pipeline
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

parametros_RFC = {
    'clf__max_depth': [None] + list(range(1,20)),
    'clf__criterion': ['gini', 'entropy'],
    'clf__max_features': ['auto', 'sqrt', 'log2']
}
parametros_ETC = {'clf__n_estimators': [10,20,30,40,50]}
parametros_KNN = {'clf__n_neighbors': [1,3,5,7,9]}
parametros_LRC = {'clf__C': [0.01,0.1,1,10,100]}

parametros_MLP = {
    'clf__activation': ['identity', 'logistic', 'tanh', 'relu'],
    'clf__solver': ['lbfgs', 'sgd', 'adam'],
    'clf__learning_rate': ['constant', 'invscaling', 'adaptive'],
}

classificador = Pipeline([
    ('PreProc', PreProcessamento()),
    ('scaler', StandardScaler()),
    ('clf', MLPClassifier())
])

classificador_Grid = GridSearchCV(classificador, param_grid=parametros_MLP)

classificador_Grid.fit(X_train,y)
ypred = classificador_Grid.predict(X_test)

np.sum(ypred==1)


161

In [49]:
result = test[['PassengerId']]
result['Survived'] = ypred
result.to_csv('titanic_competition-MLP_Grid2.csv',index=False)

### LogisticRegression Com GridSearch 2

In [53]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

parametros_LRC = {'clf__C': [0.01,0.1,1,10,100]}

classificador_LRC = Pipeline([
    ('PreProc', PreProcessamento()),
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression())
])

classificador_LRC_Grid = GridSearchCV(classificador_LRC, param_grid=parametros_LRC)

classificador_LRC_Grid.fit(X_train,y)
ypred = classificador_LRC_Grid.predict(X_test)

np.sum(ypred==1)


162

In [54]:
result = test[['PassengerId']]
result['Survived'] = ypred
result.to_csv('titanic_competition_LRC_Grid2.csv',index=False)

### DecisionTreeClassifier Com GridSearch 2

In [62]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier

parametros_DTC = {
    'clf__max_depth': [None] + list(range(1,20)),
    'clf__criterion': ['gini', 'entropy'],
    'clf__max_features': ['auto', 'sqrt', 'log2']
}

classificador_DTC = Pipeline([
    ('PreProc', PreProcessamento()),
    ('scaler', StandardScaler()),
    ('clf', DecisionTreeClassifier())
])

classificador_DTC.fit(X_train,y)
ypred = classificador_DTC.predict(X_test)

np.sum(ypred==1)

TypeError: 'int' object is not subscriptable

In [61]:
result = test[['PassengerId']]
result['Survived'] = ypred
result.to_csv('titanic_competition_DTC_Grid2.csv',index=False)