In [1]:
# Regression model build with feature scaling and gradient descent to minimize cost function

In [2]:
# Libraries
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline

In [3]:
# Load dataset
diabetes = load_diabetes()
X,y = diabetes.data, diabetes.target

In [6]:
# Train and test
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=42)

In [16]:
# Scale and apply gradienct descent
# Using StandardScalar to normalize/scale our features - helping algorithm converge quickly
pipeline = make_pipeline(StandardScaler(), SGDRegressor(max_iter = 1000, tol=1e-3, random_state=42)) # GD will run for 1000 iterations or until loss drop below 1e-3

In [18]:
# Fit training data
pipeline.fit(X_train, y_train)

In [19]:
# Prediction
y_pred = pipeline.predict(X_test)

In [20]:
# Evaluation
rmse = np.sqrt(mean_squared_error(y_test,y_pred))
print(f'Root Mean Squared Error: {rmse}')

Root Mean Squared Error: 53.25047919166815


In [21]:
# Evaluate with R^2 score
r_squared = pipeline.score(X_test, y_test)
print(f'R^2 score:{r_squared}')

R^2 score:0.47472177222595935
