In [2]:
from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestRegressor

from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

import numpy as np

In [3]:
data = load_iris()
X, y = data['data'], data['target']

In [11]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, 30]
}

In [24]:
#help(GridSearchCV)

In [12]:
len(X_train), len(y_train)

(120, 120)

In [13]:
model = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

In [14]:
best_model = grid_search.best_estimator_
best_model

In [15]:
type(best_model)

sklearn.ensemble._forest.RandomForestRegressor

In [18]:
mse_train = np.mean((best_model.predict(X_train)-y_train)**2)
mse_val = np.mean((best_model.predict(X_val)-y_val)**2)

In [20]:
print(f"Training Error: {mse_train}")
print(f"Validation Error: {mse_val}")

Training Error: 0.006296666666666669
Validation Error: 0.0014799999999999998


In [21]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=2)),
    ('regressor', RandomForestRegressor(max_depth=10, n_estimators=50))
])
pipeline.fit(X_train, y_train)

In [22]:
# Evaluate the model with reduced features
train_error_pca = np.mean((pipeline.predict(X_train) - y_train) ** 2)
val_error_pca = np.mean((pipeline.predict(X_val) - y_val) ** 2)

print(f"Training Error with PCA: {train_error_pca}")
print(f"Validation Error with PCA: {val_error_pca}")

Training Error with PCA: 0.007316666666666666
Validation Error with PCA: 0.07569333333333332
