# Ridge Regression
Ridge regression is a type of regularized regression. This means that overfitting is reduced by penalizing complex models. 

In [15]:
#base case of ridge regression
from sklearn import datasets
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split

bcancer = datasets.load_breast_cancer()
X = bcancer.data
y = bcancer.target

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=42)

ridge=Ridge(alpha=0.4, normalize=True)
ridge.fit(X_train, y_train)
ridge_pred = ridge.predict(X_test)
print(ridge.score(X_test, y_test))

0.739282992042948


## The ridge algorithm uses an alpha parameter
It doesn't make a huge difference in this case, but this is a demonstration of hyperparameter tuning, in this case using the GridSearchCV object. Also available for this purpose is RandomizedSearchCV but not demonstrated.

In [16]:
from sklearn.model_selection import GridSearchCV
import numpy as np

ridge = Ridge(normalize=True)
param_grid = {'alpha': np.linspace(0,1,50)}
ridge_cv = GridSearchCV(ridge, param_grid, cv=5)
ridge_cv.fit(X_train,y_train)

print("Tuned Parameters: {}".format(ridge_cv.best_params_)) 
print("Best score is {}".format(ridge_cv.best_score_))

print(ridge_cv.score(X_test, y_test))

Tuned Parameters: {'alpha': 0.02040816326530612}
Best score is 0.7066652809265238
0.7439874221710989
