# Ridge Regression
> Analysis of the diabetes dataset using ridge regression

In [None]:
import pickle, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from azureml.core import Workspace
from azureml.core.run import Run
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib

In [None]:
RANDOM_STATE = 42
MODEL_NAME = "mymodel.pkl"

In [None]:
run = Run.get_context()

In [None]:
data_path = os.path.join(os.path.abspath(os.path.join(os.getcwd(), os.pardir)),'data/diabetes.csv')

In [None]:
df = pd.read_csv(data_path)
print(df.info())
df.head()

In [None]:
df.describe()

In [None]:
X, y = load_diabetes(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)
data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}}

alphas = np.arange(0.0, 1.0, 0.05)
alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0]

reg = Ridge(alpha=alpha)
reg.fit(data["train"]["X"], data["train"]["y"])
preds = reg.predict(data["test"]["X"])
mse = mean_squared_error(preds, data["test"]["y"])
print("Alpha is {0:.2f}, and MSE is {1:0.2f}".format(alpha, mse))

print("Logging values")
# run.log("alpha", alpha)
# run.log("mse", mse)