In [None]:
import pandas as pd
from pandas_profiling import ProfileReport
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

import warnings
warnings.filterwarnings("ignore")

In [None]:
train = pd.read_csv('../input/tabular-playground-series-apr-2021/train.csv', index_col=0)
X_test = pd.read_csv('../input/tabular-playground-series-apr-2021/test.csv', index_col=0)
submission = pd.read_csv('../input/tabular-playground-series-apr-2021/sample_submission.csv')

In [None]:
y_train = train['Survived']
X_train = train.drop('Survived', axis=1)

# Train Profile Report

In [None]:
profile_train = ProfileReport(X_train, title="Pandas Profiling Train Dataset", explorative=False)

In [None]:
profile_train.to_widgets()

# Test Profile Report

In [None]:
profile_test = ProfileReport(X_train, title="Pandas Profiling Test Dataset", explorative=False)

In [None]:
profile_test.to_widgets()

# Random Forest Baseline

In [None]:
X_train = X_train[["Pclass", "Age", "Fare","SibSp", "Parch", "Sex", "Embarked"]]
X_test = X_test[["Pclass", "Age", "Fare","SibSp", "Parch", "Sex", "Embarked"]]

In [None]:
categorical_features = ['Embarked', 'Sex', 'Pclass']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

numeric_features = ['Age', 'Fare']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', numeric_transformer, numeric_features) ])

clf = RandomForestClassifier(random_state = 42, n_jobs=-1)

pipe = Pipeline(steps=[('preprocessor', preprocessor),
                       ('classifier', clf)])

pipe = pipe.fit(X_train,y_train)

In [None]:
submission.loc[:, 'Survived'] = pipe.predict(X_test)
submission.to_csv('submission.csv', index = False)