In [1]:
%load_ext autoreload
%autoreload 2

#load packages
import sys
import pandas as pd
import matplotlib
import numpy as np
import scipy as sp
import IPython
from IPython import display
import sklearn
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion

from sklearn_pandas import DataFrameMapper, cross_val_score

from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd
import numpy as np


class DataFrameSelector(BaseEstimator, TransformerMixin):
    def __init__(self, attribute_names):
        self.attribute_names = attribute_names
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X[self.attribute_names].values
    
    
class EmbarkedImputer(BaseEstimator, TransformerMixin):
    def __init__(self): # no *args or **kargs
        return None
    def fit(self, X):
        return self  # nothing else to do
    def transform(self, X):
        # deep copy the df
        df = X.copy()
        
        # Clean up fares.
        value_to_input = df.loc[(df['Fare'] < 85) & (df['Fare'] > 75)  & (df['Pclass'] == 1)]['Embarked'].mode()

        value_to_input = value_to_input[0]

        df.loc[(df['Embarked'].isnull()),['Embarked']] = value_to_input

        return(df)
    
    
class GeneralImputer(BaseEstimator, TransformerMixin):
    def __init__(self, col_impute, col_group, impute_method = 'median'): # no *args or **kargs
        self.col_impute = col_impute
        self.col_group = col_group
        self.impute_method = impute_method
        return None
    def fit(self, X):
        return self  # nothing else to do
    def transform(self, X):
        # deep copy the df because of transform
        df = X.copy()

        # Create a groupby object: by_sex_class
        grouped = df.groupby(self.col_group)

        # function to impute median
        def imputer_median(series):
            return series.fillna(series.median())
        # function to impute average
        def imputer_average(series):
            return series.fillna(series.mean())

        if self.impute_method == 'median':
            # impute median
            df[self.col_impute] = grouped[self.col_impute].transform(imputer_median)
            return(df)
        elif self.impute_method == 'average':
            # impute average
            df[self.col_impute] = grouped[self.col_impute].transform(imputer_average)
            return(df)
        else:
            return np.nan

        
class TitleCreator(BaseEstimator, TransformerMixin):
    def __init__(self): # no *args or **kargs
        return None
    def fit(self, X):
        return self  # nothing else to do
    def transform(self, X):
        # deep copy the df because of transform
        df = X.copy()

        df['Title'] = df.Name.str.extract(' ([A-Za-z]+)\.', expand=False)
        df['Title'] = df['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don', 'Dr', 'Major', 'Rev','Sir','Jonkheer','Dona'], 'Rare')
        df['Title'] = df['Title'].replace('Mlle', 'Miss')
        df['Title'] = df['Title'].replace('Ms', 'Miss')
        df['Title'] = df['Title'].replace('Mme', 'Mrs')
        df['Title'] = df['Title'].fillna(np.nan) 

        return(df)
    

CAT_ATTRIBS = ['Sex','Embarked','Title']
NUMERICS_ATTRIBS = ['Pclass','Age','SibSp','Parch','Fare']

#Read train and test data
train = pd.read_csv("../input/train.csv", dtype={"Age": np.float64}, )
test = pd.read_csv("../input/test.csv", dtype={"Age": np.float64}, )


categorical_data_pipeline = Pipeline([
    ('ebarked_imputer', transformer_classes.EmbarkedImputer()),
    ('title_creator', transformer_classes.TitleCreator()),
    ('label_binarizer_df', my_mapper),
])



numerical_data_pipeline = Pipeline([
    ('fare_imputer', transformer_classes.GeneralImputer(col_impute=['Fare'], 
                                                        col_group=['Sex', 'Pclass'], 
                                                        impute_method='median')),
    ('age_imputer', transformer_classes.GeneralImputer(col_impute=['Age'], 
                                                       col_group=['Sex', 'Pclass'], 
                                                       impute_method='average')), # median perhaps?
    ('selector', transformer_classes.DataFrameSelector(NUMERICS_ATTRIBS)),
    ('std_scaler', StandardScaler()),
    
])



full_pipeline = FeatureUnion(transformer_list=[
    ("num_pipeline", numerical_data_pipeline),
    ("cat_pipeline", categorical_data_pipeline),
])    

    

train_prepared = full_pipeline.fit_transform(train)
test_prepared = full_pipeline.fit_transform(test)

# ##########################################
# # USE TPOT TO FIND A CLASSIFIER
# from tpot import TPOTClassifier
# tpot = TPOTClassifier(verbosity=2, max_time_mins=10)
# tpot.fit(train_prepared, train['Survived'])
# ##########################################

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

param_grid = [
    {'bootstrap': [False, True], 
     'n_estimators': [80, 100, 130], 
     'max_features': [0.65, 0.7500000000000001],
     'min_samples_leaf': [10,12], 
     'min_samples_split': [3,5,7] 
    },
]

grid_search = GridSearchCV(forest_reg, param_grid, cv=5,scoring='neg_mean_squared_error', refit=True)
grid_search.fit(train_prepared, train['Survived'])

final_model = grid_search.best_estimator_

final_predictions = final_model.predict(test_prepared)
final_predictions



ImportError: No module named 'titanic_functions'