# Train a Ridge Regression Model on the Diabetes Dataset

This notebook loads the Diabetes dataset from sklearn, splits the data into training and validation sets, trains a Ridge regression model, validates the model on the validation set, and saves the model.

In [1]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import joblib

## Load Data

In [2]:
X, y = load_diabetes(return_X_y=True)

## Split Data into Training and Validation Sets

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
data = {"train": {"X": X_train, "y": y_train},
        "test": {"X": X_test, "y": y_test}}

## Train Model on Training Set

In [4]:
alpha = 0.5

reg = Ridge(alpha=alpha)
reg.fit(data["train"]["X"], data["train"]["y"])

Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

## Validate Model on Validation Set

In [6]:
preds = reg.predict(data["test"]["X"])
print("mse: ", mean_squared_error(preds, y_test))

mse:  3298.9096058070622


## Save Model

In [7]:
model_name = "sklearn_regression_model.pkl"

joblib.dump(value=reg, filename=model_name)

['sklearn_regression_model.pkl']