#  Ridge Recession 04  
---

IMPORTING DATA SCIENCE LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


IMPORT MACHINE LEARNING LIBRARIES AND CLASSES

In [None]:
from sklearn.model_selection import train_test_split               #for splitting the data into test and training data
from sklearn.compose import ColumnTransformer                       #for transforming the columns
from sklearn.impute import SimpleImputer                             #for imputing the missing values
from sklearn.preprocessing import OneHotEncoder                      #one hot encoding
from sklearn.preprocessing import MinMaxScaler                        #standard scaling

from sklearn.datasets import load_diabetes

from sklearn.datasets import make_regression

import plotly.express as px
import plotly.graph_objects as go


from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score                 # for accuracy score
from sklearn.model_selection import cross_val_score        # for cross validation score

from sklearn.linear_model import LinearRegression           # Import the LinearRegression class
from sklearn.metrics import mean_squared_error, r2_score    # to find out the error functions
from sklearn.preprocessing import PolynomialFeatures , StandardScaler   # for the polunomial features

from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge   # ridge Regression

# Loading and Information about the Dataset

In [None]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()

X , y = diabetes.data , diabetes.target

#  Train and test split

In [None]:
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state = 4)

# 1. How are Co-efficients affected ?

In [None]:
coefs = []
r2_score = []

for i in [0 , 10 , 100 , 1000]:
  reg=Ridge(alpha = i)
  reg.fit(X_train , y_train)
  coefs.append(reg.coef_)
  r2_score.append(reg.score(X_test , y_test))

In [None]:
plt.figure(figsize = ( 14, 9))
plt.plot(221)
plt.bar(diabetes.feature_names,coefs[0])
plt.title('Alpha = 0 , r2_score = {}'.format(round(r2_score[0],2)))

plt.subplot(222)
plt.bar(diabetes.feature_names,coefs[1])
plt.title('Alpha = 10 , r2_score = {}'.format(round(r2_score[1],2)))

plt.subplot(223)
plt.bar(diabetes.feature_names ,coefs[2])
plt.title('Alpha = 100 , r2_score = {}'.format(round(r2_score[2],2)))

plt.subplot(224)
plt.bar(diabetes.feature_names ,coefs[3])
plt.title('Alpha = 1000 , r2_score = {}'.format(round(r2_score[3],2)))


# 2. Higher Co-efficients are Affected more

In [None]:
alpha = [ 0 , 0.001 , 0.01 , 0.1 , 1 , 10 , 100 , 1000]
coefs = []

for i in alpha:
  reg=Ridge(alpha = i)
  reg.fit(X_train , y_train)

  coefs.append(reg.coef_.tolist())

In [None]:
input_array = np.array(coefs).T

In [None]:
plt.figure(figsize=(8,5))
plt.plot (alpha , np.zeros(len(alpha)) , color='black' , linewidth = 5)

for i in range(input_array.shape[0]):

  plt.plot(alpha , input_array[i] , label = diabetes.feature_names[i])

plt.legend()

#  3. Impact on Bias and Variance

In [None]:
m = 100
X = 5 * np.random.rand(m, 1) - 2
y = 0.7 * X ** 2 - 2 * X + 3 + np.random.randn(m, 1)

plt.scatter(X, y)
plt.show()

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X.reshape(100,1),y.reshape(100),test_size=0.2,random_state=2)

In [None]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=15)

X_train = poly.fit_transform(X_train)
X_test = poly.transform(X_test)

In [None]:
from mlxtend.evaluate import bias_variance_decomp

alphas = np.linspace(0,30,100)

loss = []
bias = []
variance = []

for i in alphas:
    reg = Ridge(alpha=i)
    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
        reg, X_train, y_train, X_test, y_test,
        loss='mse',
        random_seed=123)
    loss.append(avg_expected_loss)
    bias.append(avg_bias)
    variance.append(avg_var)


In [None]:
plt.plot(alphas,loss,label='loss')
plt.plot(alphas,bias,label='Bias')
plt.plot(alphas,variance,label='Variance')
plt.ylim(0,5)
plt.xlabel('Alpha')
plt.legend()

# 4.Effect of Regularization on Loss Function

In [None]:
from sklearn.datasets import make_regression

X,y = make_regression(n_samples=100, n_features=1, n_informative=1, n_targets=1,noise=20,random_state=13)

plt.scatter(X,y)

from sklearn.linear_model import LinearRegression

reg = LinearRegression()
reg.fit(X,y)
print(reg.coef_)
print(reg.intercept_)

In [None]:
def cal_loss(m,alpha):
    return np.sum((y - m*X.ravel() + 2.29)**2) + alpha*m*m

In [None]:
def predict(m):
    return m*X - 2.29

In [None]:
m = np.linspace(-45,100,100)
plt.figure(figsize=(4,6))
for j in [0,10,20,30,40,50,100]:
    loss = []
    for i in range(m.shape[0]):
        loss_i = cal_loss(m[i],j)
        loss.append(loss_i)
    plt.plot(m,loss,label='alpha = {}'.format(j))
plt.legend()
plt.xlabel('Alpha')
plt.ylabel('Loss')
plt.show()