In [1]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.datasets import make_regression

In [2]:
class RegressionPipeline:
    def __init__(self):
        self.pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('poly', PolynomialFeatures(degree=2)),
            ('regressor', LinearRegression())
        ])

    def train(self, X, y, param_grid=None):
        if param_grid:
            grid_search = GridSearchCV(self.pipeline, param_grid, cv=5)
            grid_search.fit(X, y)
            self.best_pipeline = grid_search.best_estimator_
            return grid_search.best_params_
        else:
            self.pipeline.fit(X, y)
            self.best_pipeline = self.pipeline

    def predict(self, X):
        return self.best_pipeline.predict(X)

In [3]:
# Generate some sample data
X, y = make_regression(n_samples=100, n_features=2, noise=0.1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Define parameter grid for Grid Search
param_grid = {
    'poly__degree': [1, 2, 3],
    'regressor__fit_intercept': [True, False]
}

In [5]:
# Create and train the pipeline
reg_pipeline = RegressionPipeline()
best_params = reg_pipeline.train(X_train, y_train, param_grid)
print("Best parameters found: ", best_params)

Best parameters found:  {'poly__degree': 3, 'regressor__fit_intercept': True}


In [6]:
# Make predictions
predictions = reg_pipeline.predict(X_test)
print(predictions)

[ 32.62433093   5.75770084 -37.53980323 -27.10215657  -6.91357607
   1.93923684  16.29665491 -14.67719154  14.95075201  11.31707244
  17.58467283   9.10053779  12.37004553 -15.12207965  24.87181304
  -7.27316691  -5.09480992   5.09675407  22.80621303   4.7507837 ]
