## Imports

In [4]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd

df_train = pd.read_csv("./train.csv")
df_test = pd.read_csv("./test.csv")
df_train.columns = df_train.columns.str.replace(' ','_')
df_test.columns = df_test.columns.str.replace(' ','_')

In [2]:
df_train_X = df_train.drop(columns=['Personality','id'])
df_train_y = df_train['Personality']

In [7]:
class Int64Converter(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return X.astype('Int64')

time_spent_alone_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value=2)),
    ('scaler', StandardScaler()),
    ('int64_converter', Int64Converter())
])


stage_fear_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='unknown')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])


social_event_attendance_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value=3.5)),
    ('scaler', StandardScaler())
])


going_outside_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value=2.5)),
    ('scaler', StandardScaler())
])


drained_after_socializing_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='unknown')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])


friends_circle_size_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value=5)),
    ('scaler', StandardScaler())
])


post_frequency_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value=2.5)),
    ('scaler', StandardScaler())
])

In [8]:
from sklearn.compose import ColumnTransformer

preprocessor = ColumnTransformer([
    ('col1', time_spent_alone_pipeline, ['Time_spent_alone']),
    ('col2', stage_fear_pipeline,['Stage_fear']),
    ('col3', social_event_attendance_pipeline,['Social_event_attendance']),
    ('col4', going_outside_pipeline,['Going_outside']),
    ('col5', drained_after_socializing_pipeline,['Drained_after_socializing']),
    ('col6', friends_circle_size_pipeline,['Friends_circle_size']),
    ('col7', post_frequency_pipeline,['Post_frequency']),

])