# Ridge Regression

---


IMPORTING DATA SCIENCE LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


IMPORT MACHINE LEARNING LIBRARIES AND CLASSES

In [None]:
from sklearn.model_selection import train_test_split               #for splitting the data into test and training data
from sklearn.compose import ColumnTransformer                       #for transforming the columns
from sklearn.impute import SimpleImputer                             #for imputing the missing values
from sklearn.preprocessing import OneHotEncoder                      #one hot encoding
from sklearn.preprocessing import MinMaxScaler                        #standard scaling

from sklearn.datasets import load_diabetes

from sklearn.datasets import make_regression

import plotly.express as px
import plotly.graph_objects as go


from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score                 # for accuracy score
from sklearn.model_selection import cross_val_score        # for cross validation score

from sklearn.linear_model import LinearRegression # Import the LinearRegression class
from sklearn.metrics import mean_squared_error, r2_score    # to find out the error functions
from sklearn.preprocessing import PolynomialFeatures , StandardScaler   # for the polunomial features

from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge   # ridge Regression

# Loading and Information about the Dataset

In [None]:
m = 100
x1 = 5*np.random.rand(m,1) - 2
x2 = 0.7*x1**2 - 2*x1 + 1 + np.random.randn(m,1)

plt.scatter(x1,x2)
plt.show()

In [None]:
X_train , X_test , y_train , y_test = train_test_split(x1 , x2 , test_size=0.2 , random_state=2)  # splitting the data into test and training data

# Linear Regression

In [None]:
reg = LinearRegression()
reg.fit(X_train, y_train)   # fitting the data

In [None]:
print (reg.coef_)
print (reg.intercept_)

In [None]:
y_pred = reg.predict(X_test)      # predicting the y value from x test value  from training the data
r2_score(y_test, y_pred)         # calculating the r2 score
print("R2 Score",r2_score(y_test , y_pred))
print("RMSE " , np.sqrt(mean_squared_error(y_test,y_pred)))

# Ridge Regression with Polynomials

Basically We added alpha the regularization term so that we can do Ridge Regularization that is our L2 regularization

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

def get_preds_ridge(x1 , x2 , alpha):

   model = Pipeline([

       ('poly',PolynomialFeatures(degree=16)) ,
       ('ridge',Ridge(alpha=alpha))

   ])

   model.fit(x1,x2)
   return model.predict(x1)

#  Plotting the figure with 3 different values of Alpha

We are going to have a Red line when our alpha gonna be 0 that is no regularization , we gonna have Green curve when alpha gonna be 20  and we gonna have a blue curve when our alpha gonna be 200

This is to show that how the trajectory of our line changes when we increase or decrease the value of alpha

In [None]:
# plotting the data points

plt.figure(figsize=(10,10))
plt.plot(x1,x2,'b.', label='Datapoints')
plt.xlabel('x1')
plt.ylabel('x2')


Now we are plotting the datapoints with our 3 colorful curves for the 3 values of alpha .

In [None]:
alphas = [ 0 , 20 , 200]
cs = ['r' , 'g' , 'b']

plt.figure(figsize=(10,10))
plt.plot(x1,x2,'b.', label='Datapoints')
plt.xlabel('x1')
plt.ylabel('x2')


for alpha , c in zip(alphas , cs ):
   preds = get_preds_ridge(x1 , x2 , alpha)

   # plot
   plt.plot(sorted(x1[:,0]), preds[np.argsort(x1[:,0])] , c , label=f'Alpha = {alpha}')

plt.legend()
plt.show()


In the above diagram we can see that blue curve has less curve its just going with the flow , in that cruve only we have the MAximum regularization / alpha value maximum as 200  [ UNDERFITTING ]

Red Curve is turning too much that it it has regularization value  / alpha value as 0 [ OVERFITTING ]

Green Curve is perfect balance between the Blue Curve and Red curve .