In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.ensemble import StackingRegressor
from sklearn.metrics import mean_squared_error

tips_df = sns.load_dataset('tips')

X = tips_df.drop(columns=['tip'])
y = tips_df['tip']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

numeric_features = ['total_bill', 'size'] 
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_features = ['sex', 'smoker', 'day', 'time']
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

base_regressors = [
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('svm', SVR(kernel='rbf'))
]

stacking_regressor = StackingRegressor(estimators=base_regressors, final_estimator=RandomForestRegressor())

pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('stacking_regressor', stacking_regressor)])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 1.3891316657142865
