In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# Load your data (assuming you have your data in a pandas DataFrame)
 data = pd.read_csv('housing.csv') 
  X = data.drop('target_column', axis=1)
  y = data['target_column']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define pipelines for different models
pipeline_lr = Pipeline([
    ('scaler', StandardScaler()),  # Feature scaling
    ('lr', LinearRegression())  # Linear regression
])

pipeline_rf = Pipeline([
    ('scaler', StandardScaler()),  # Feature scaling
    ('rf', RandomForestRegressor(random_state=42))  # Random Forest
])

pipeline_xgb = Pipeline([
    ('scaler', StandardScaler()),  # Feature scaling
    ('xgb', XGBRegressor(random_state=42))  # XGBoost regressor
])

# Define the parameter grid for GridSearchCV for all models
param_grid = [
    {
        'lr': [LinearRegression()]  # No hyperparameters to tune for linear regression
    },
    {
        'rf__n_estimators': [100, 200],
        'rf__max_depth': [5, 10, 15],
        'rf__min_samples_split': [2, 5, 10],
        'rf__min_samples_leaf': [1, 2, 4]
    },
    {
        'xgb__n_estimators': [100, 200],
        'xgb__max_depth': [3, 5, 7],
        'xgb__learning_rate': [0.01, 0.1, 0.2],
        'xgb__subsample': [0.8, 1.0]
    }
]

# Create a list of all pipelines
pipelines = [('Linear Regression', pipeline_lr),
             ('Random Forest', pipeline_rf),
             ('XGBoost', pipeline_xgb)]

# Perform GridSearchCV for each pipeline
best_models = {}
for name, pipeline in pipelines:
    grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error')
    grid_search.fit(X_train, y_train)
    best_models[name] = grid_search

# Now evaluate each model
for name, model in best_models.items():
    print(f"Best parameters for {name}: {model.best_params_}")
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"{name} - Mean Squared Error: {mse}")
    print(f"{name} - R^2 Score: {r2}")
    print("-" * 40)


IndentationError: unexpected indent (3524910854.py, line 12)