# Regression

Tell about the code.

## Imports, data, and functions

In [9]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import cross_validate
from sklearn.preprocessing import StandardScaler

### Dataset

In [None]:
X = np.random.rand(100, 1)  # Feature
y = 3 * X.squeeze() + 2 + np.random.randn(100)  # Target
X_train, X_test, y_train, y_test=train_test_split(X,y,train_size = 0.8, test_size=0.2)
scaler = StandardScaler()
scaler.fit(X_train)

## Gradient Boosting Regressor

In [10]:
def GBR(n_estimators, learning_rate):
    rf = GradientBoostingRegressor(random_state=1)
    search_space = {"n_estimators":n_estimators,"learning_rate":learning_rate}
    GS = GridSearchCV(estimator=rf,param_grid=search_space,scoring=["r2","neg_root_mean_squared_error"],refit="r2",cv=5,verbose=4)
    GS.fit(X_train,y_train)
    best_model = GS.best_estimator_
    y_pred_train = best_model.predict(X_train)
    y_pred_test = best_model.predict(X_test)

    # Evaluate the model
    mse = mean_absolute_error(y_test, y_pred_test)
    r2 = r2_score(y_test, y_pred_test)
    print(f'Mean Squared Error Test: {mse}')
    print(f'R2 Test: {r2}')

GBR([250,300], [0.4,0.5])

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END learning_rate=0.4, n_estimators=250; neg_root_mean_squared_error: (test=-1.620) r2: (test=-0.481) total time=   0.0s
[CV 2/5] END learning_rate=0.4, n_estimators=250; neg_root_mean_squared_error: (test=-1.848) r2: (test=-0.211) total time=   0.0s
[CV 3/5] END learning_rate=0.4, n_estimators=250; neg_root_mean_squared_error: (test=-1.630) r2: (test=-1.352) total time=   0.0s
[CV 4/5] END learning_rate=0.4, n_estimators=250; neg_root_mean_squared_error: (test=-1.362) r2: (test=-0.521) total time=   0.0s
[CV 5/5] END learning_rate=0.4, n_estimators=250; neg_root_mean_squared_error: (test=-1.207) r2: (test=0.351) total time=   0.0s
[CV 1/5] END learning_rate=0.4, n_estimators=300; neg_root_mean_squared_error: (test=-1.620) r2: (test=-0.481) total time=   0.0s
[CV 2/5] END learning_rate=0.4, n_estimators=300; neg_root_mean_squared_error: (test=-1.848) r2: (test=-0.211) total time=   0.0s
[CV 3/5] END learning_rate=0.4,

## Random Forest Regressor

In [11]:
def RFR(n_estimators, max_depth):
    rf = RandomForestRegressor(random_state=1)
    search_space = {"n_estimators":n_estimators,"max_depth":max_depth}
    GS = GridSearchCV(estimator=rf,param_grid=search_space,scoring=["r2","neg_root_mean_squared_error"],refit="r2",cv=5,verbose=4)
    GS.fit(X_train,y_train)
    best_model = GS.best_estimator_
    y_pred_train = best_model.predict(X_train)
    y_pred_test = best_model.predict(X_test)

    # Evaluate the model
    mse = mean_absolute_error(y_test, y_pred_test)
    r2 = r2_score(y_test, y_pred_test)
    print(f'Mean Squared Error Test: {mse}')
    print(f'R2 Test: {r2}')

RFR([50,100], [20,30])

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END max_depth=20, n_estimators=50; neg_root_mean_squared_error: (test=-1.438) r2: (test=-0.167) total time=   0.0s
[CV 2/5] END max_depth=20, n_estimators=50; neg_root_mean_squared_error: (test=-1.533) r2: (test=0.166) total time=   0.0s
[CV 3/5] END max_depth=20, n_estimators=50; neg_root_mean_squared_error: (test=-1.267) r2: (test=-0.422) total time=   0.0s
[CV 4/5] END max_depth=20, n_estimators=50; neg_root_mean_squared_error: (test=-1.378) r2: (test=-0.556) total time=   0.0s
[CV 5/5] END max_depth=20, n_estimators=50; neg_root_mean_squared_error: (test=-1.224) r2: (test=0.332) total time=   0.0s
[CV 1/5] END max_depth=20, n_estimators=100; neg_root_mean_squared_error: (test=-1.438) r2: (test=-0.166) total time=   0.0s
[CV 2/5] END max_depth=20, n_estimators=100; neg_root_mean_squared_error: (test=-1.577) r2: (test=0.119) total time=   0.0s
[CV 3/5] END max_depth=20, n_estimators=100; neg_root_mean_squared_error: