In [26]:
import pandas as pd
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [50]:
def base_features(df):
    return df[['SibSp', 'Parch']]
base_features_tf = FunctionTransformer(base_features, validate = False)

def dummies(df):
    cols = ['Pclass', 'Sex', 'Embarked']
    return pd.get_dummies(df[cols], columns = cols)
dummies_tf = FunctionTransformer(dummies, validate = False)

def fare(df):
    return df['Fare'].fillna(df['Fare'].median()).to_frame()
fare_tf = FunctionTransformer(fare, validate = False)

def age(df):
    return df['Age'].fillna(df['Age'].median()).to_frame()
age_tf = FunctionTransformer(age, validate = False)

In [45]:
fu = FeatureUnion([
    ('base_features_tf', base_features_tf),
    ('dummies_tf', dummies_tf),
    ('fare_tf', fare_tf)
])

model = GradientBoostingClassifier()

pipe = Pipeline([
    ('fu', fu),
    ('model', model)
])

In [81]:
params = {
    'model__n_estimators':[50, 100],
    'model__max_features': [5]
}
gs = GridSearchCV(pipe, param_grid =params)
gs.fit(df, df['Survived'])
print gs.best_score_
gs.best_params_

0.817059483726


{'model__max_features': 5, 'model__n_estimators': 50}

In [82]:
test = pd.read_csv('test.csv')
pred = gs.predict(test)
test.shape

(418, 11)

In [83]:
pred.shape

(418,)

In [84]:
test['Survived'] = pred
test[['PassengerId', 'Survived']].to_csv('submission6.csv', index = False)