In [87]:
import pandas as pd

X_full = pd.read_csv('./train.csv')
X_test_full = pd.read_csv('./test.csv')

X_full.dropna(axis = 0, subset = ['Survived'], inplace = True)
y = X_full.Survived
X_full.drop(['Survived'], axis = 1, inplace = True)

categorical_cols = [cname for cname in X_full.columns if
                   X_full[cname].nunique() < 10 and
                   X_full[cname].dtype == 'object']
numerical_cols = [cname for cname in X_full.columns if
                 X_full[cname].dtype in ['int64', 'float64']]                  
my_cols = categorical_cols + numerical_cols

X_train = X_full[my_cols].copy()
X_test = X_test_full[my_cols].copy()

In [90]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression

numerical_transformer = SimpleImputer(strategy = 'median', add_indicator = True)

categorical_transformer = Pipeline(steps = [
    ('imputer', SimpleImputer(strategy = 'most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown = 'ignore'))
])

preprocessor = ColumnTransformer(transformers = [
    ('num', numerical_transformer, numerical_cols),
    ('cat', categorical_transformer, categorical_cols)
])

model = RandomForestClassifier(n_estimators = 165, max_depth = 6, random_state = 0)

my_pipeline = Pipeline(steps = [
    ('preprocessor', preprocessor),
    ('model', model)
])

my_pipeline.fit(X_train, y)

predictions = my_pipeline.predict(X_test)

from sklearn.model_selection import cross_val_score

scores = cross_val_score(my_pipeline, X_train, y)
print("Scores: ", scores)
print("Average CV score: ", scores.mean())

Scores:  [0.80446927 0.82022472 0.82022472 0.79213483 0.84831461]
Average CV score:  0.8170736300295023


In [89]:
output = pd.DataFrame({'PassengerId' : X_test.PassengerId,
                      'Survived' : test_preds})
output.to_csv('submission.csv', index = False)