-
Notifications
You must be signed in to change notification settings - Fork 14
Closed
Description
data = pandas.DataFrame([
dict(CAT1='a', CAT2='c', num1=0.5, num2=0.6, y=0),
dict(CAT1='b', CAT2='d', num1=0.4, num2=0.8, y=1),
dict(CAT1='a', CAT2='d', num1=0.5, num2=0.56, y=0),
dict(CAT1='a', CAT2='d', num1=0.55, num2=0.56, y=1),
dict(CAT1='a', CAT2='c', num1=0.35, num2=0.86, y=0),
dict(CAT1='a', CAT2='c', num1=0.5, num2=0.68, y=1),
])
cat_cols = ['CAT1', 'CAT2']
train_data = data.drop('y', axis=1)
numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])
categorical_transformer = Pipeline([
('onehot', OneHotEncoder(sparse=False, handle_unknown='ignore'))])
preprocessor = ColumnTransformer(
transformers=[
('cat', categorical_transformer, cat_cols)],
remainder='passthrough')
pipe = Pipeline([('preprocess', preprocessor),
('rf', RandomForestClassifier(n_estimators=2))])
pipe.fit(train_data, data['y'])